scaler optimizations:
on sh, use 8.24 fixed-point C math for final division in scaler on coldfire, use 8.32 fixed-point via emac on other architectures, use 8.32 fixed-point C math use shift-and-add to divide when adjusting scale factors in pictureflow git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19802 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
73f2d001fd
commit
1b13299769
3 changed files with 66 additions and 22 deletions
|
@ -397,8 +397,8 @@ static inline PFreal fcos(int iangle)
|
|||
return fsin(iangle + (IANGLE_MAX >> 2));
|
||||
}
|
||||
|
||||
#define RB_DIV ((31ULL << 32) / 255 + 1)
|
||||
#define G_DIV ((63ULL << 32) / 255 + 1)
|
||||
#define DIV255(val) ((((((val)>>8)+(val))>>8)+(val))>>8)
|
||||
#define SCALE_VAL(val,out) DIV255((val) * (out) + 127)
|
||||
|
||||
static void output_row_transposed(uint32_t row, void * row_in,
|
||||
struct scaler_context *ctx)
|
||||
|
@ -408,19 +408,19 @@ static void output_row_transposed(uint32_t row, void * row_in,
|
|||
#ifdef USEGSLIB
|
||||
uint32_t *qp = (uint32_t*)row_in;
|
||||
for (; dest < end; dest += ctx->bm->height)
|
||||
*dest = ((*qp++) + ctx->round) * (uint64_t)ctx->divisor >> 32;
|
||||
*dest = SC_MUL((*qp++) + ctx->round), ctx->divisor);
|
||||
#else
|
||||
struct uint32_rgb *qp = (struct uint32_rgb*)row_in;
|
||||
uint32_t rb_mul = ctx->divisor * (uint64_t)RB_DIV >> 32,
|
||||
rb_rnd = ctx->round * (uint64_t)RB_DIV >> 32,
|
||||
g_mul = ctx->divisor * (uint64_t)G_DIV >> 32,
|
||||
g_rnd = ctx->round * (uint64_t)G_DIV >> 32;
|
||||
int r, g, b;
|
||||
uint32_t rb_mul = SCALE_VAL(ctx->divisor, 31),
|
||||
rb_rnd = SCALE_VAL(ctx->round, 31),
|
||||
g_mul = SCALE_VAL(ctx->divisor, 63),
|
||||
g_rnd = SCALE_VAL(ctx->round, 63);
|
||||
int r, g, b;
|
||||
for (; dest < end; dest += ctx->bm->height)
|
||||
{
|
||||
r = (qp->r + rb_rnd) * (uint64_t)rb_mul >> 32;
|
||||
g = (qp->g + g_rnd) * (uint64_t)g_mul >> 32;
|
||||
b = (qp->b + rb_rnd) * (uint64_t)rb_mul >> 32;
|
||||
r = SC_MUL(qp->r + rb_rnd, rb_mul);
|
||||
g = SC_MUL(qp->g + g_rnd, g_mul);
|
||||
b = SC_MUL(qp->b + rb_rnd, rb_mul);
|
||||
qp++;
|
||||
*dest = LCD_RGBPACK_LCD(r,g,b);
|
||||
}
|
||||
|
|
|
@ -244,7 +244,7 @@ static inline bool scale_v_area(struct rowset *rset, struct scaler_context *ctx)
|
|||
/* Set up rounding and scale factors */
|
||||
ctx->divisor *= ctx->src->height;
|
||||
ctx->round = ctx->divisor >> 1;
|
||||
ctx->divisor = ((ctx->divisor - 1 + 0x80000000U) / ctx->divisor) << 1;
|
||||
ctx->divisor = (((ctx->divisor >> 1) + SC_NUM) / ctx->divisor) << SC_FIX;
|
||||
mul = 0;
|
||||
oy = rset->rowstart;
|
||||
oye = 0;
|
||||
|
@ -442,7 +442,7 @@ static inline bool scale_v_linear(struct rowset *rset,
|
|||
/* Set up scale and rounding factors, the divisor is bm->height - 1 */
|
||||
ctx->divisor *= (ctx->bm->height - 1);
|
||||
ctx->round = ctx->divisor >> 1;
|
||||
ctx->divisor = ((ctx->divisor - 1 + 0x80000000U) / ctx->divisor) << 1;
|
||||
ctx->divisor = (((ctx->divisor >> 1) + SC_NUM) / ctx->divisor) << SC_FIX;
|
||||
/* Set up our two temp buffers. The names are generic because they'll be
|
||||
swapped each time a new input row is read
|
||||
*/
|
||||
|
@ -531,8 +531,7 @@ void output_row_native(uint32_t row, void * row_in, struct scaler_context *ctx)
|
|||
for (col = 0; col < ctx->bm->width; col++) {
|
||||
if (ctx->dither)
|
||||
delta = DITHERXDY(col,dy);
|
||||
bright = ((*qp++) + ctx->round) *
|
||||
(uint64_t)ctx->divisor >> 32;
|
||||
bright = SC_MUL((*qp++) + ctx->round,ctx->divisor);
|
||||
bright = (3 * bright + (bright >> 6) + delta) >> 8;
|
||||
data |= (~bright & 3) << shift;
|
||||
shift -= 2;
|
||||
|
@ -555,8 +554,7 @@ void output_row_native(uint32_t row, void * row_in, struct scaler_context *ctx)
|
|||
for (col = 0; col < ctx->bm->width; col++) {
|
||||
if (ctx->dither)
|
||||
delta = DITHERXDY(col,dy);
|
||||
bright = ((*qp++) + ctx->round) *
|
||||
(uint64_t)ctx->divisor >> 32;
|
||||
bright = SC_MUL((*qp++) + ctx->round, ctx->divisor);
|
||||
bright = (3 * bright + (bright >> 6) + delta) >> 8;
|
||||
*dest++ |= (~bright & 3) << shift;
|
||||
}
|
||||
|
@ -571,8 +569,7 @@ void output_row_native(uint32_t row, void * row_in, struct scaler_context *ctx)
|
|||
for (col = 0; col < ctx->bm->width; col++) {
|
||||
if (ctx->dither)
|
||||
delta = DITHERXDY(col,dy);
|
||||
bright = ((*qp++) + ctx->round) *
|
||||
(uint64_t)ctx->divisor >> 32;
|
||||
bright = SC_MUL((*qp++) + ctx->round, ctx->divisor);
|
||||
bright = (3 * bright + (bright >> 6) + delta) >> 8;
|
||||
*dest++ |= vi_pattern[bright] << shift;
|
||||
}
|
||||
|
@ -588,9 +585,9 @@ void output_row_native(uint32_t row, void * row_in, struct scaler_context *ctx)
|
|||
if (ctx->dither)
|
||||
delta = DITHERXDY(col,dy);
|
||||
q0 = *qp++;
|
||||
r = (q0.r + ctx->round) * (uint64_t)ctx->divisor >> 32;
|
||||
g = (q0.g + ctx->round) * (uint64_t)ctx->divisor >> 32;
|
||||
b = (q0.b + ctx->round) * (uint64_t)ctx->divisor >> 32;
|
||||
r = SC_MUL(q0.r + ctx->round, ctx->divisor);
|
||||
g = SC_MUL(q0.g + ctx->round, ctx->divisor);
|
||||
b = SC_MUL(q0.b + ctx->round, ctx->divisor);
|
||||
r = (31 * r + (r >> 3) + delta) >> 8;
|
||||
g = (63 * g + (g >> 2) + delta) >> 8;
|
||||
b = (31 * b + (b >> 3) + delta) >> 8;
|
||||
|
@ -680,6 +677,7 @@ int resize_on_load(struct bitmap *bm, bool dither, struct dim *src,
|
|||
scale_h_linear_setup(&ctx);
|
||||
}
|
||||
#endif
|
||||
SC_MUL_INIT;
|
||||
#ifdef HAVE_UPSCALER
|
||||
if (sh > dh)
|
||||
#endif
|
||||
|
@ -688,6 +686,7 @@ int resize_on_load(struct bitmap *bm, bool dither, struct dim *src,
|
|||
else
|
||||
ret = scale_v_linear(rset, &ctx);
|
||||
#endif
|
||||
SC_MUL_END;
|
||||
#ifdef HAVE_ADJUSTABLE_CPU_FREQ
|
||||
cpu_boost(false);
|
||||
#endif
|
||||
|
|
|
@ -43,6 +43,51 @@
|
|||
#define MAX_SC_STACK_ALLOC 0
|
||||
#define HAVE_UPSCALER 1
|
||||
|
||||
#if defined(CPU_COLDFIRE)
|
||||
#define SC_NUM 0x80000000U
|
||||
#define SC_MUL_INIT \
|
||||
unsigned long macsr_st = coldfire_get_macsr(); \
|
||||
coldfire_set_macsr(0);
|
||||
#define SC_MUL_END coldfire_set_macsr(macsr_st);
|
||||
#define SC_MUL(x, y) \
|
||||
({ \
|
||||
unsigned long t; \
|
||||
asm ("mac.l %[a], %[b], %%acc0\n\t" \
|
||||
"move.l %%accext01, %[t]\n\t" \
|
||||
"move.l #0, %%acc0\n\t" \
|
||||
: [t] "=r" (t) : [a] "r" (x), [b] "r" (y)); \
|
||||
t; \
|
||||
})
|
||||
#elif defined(CPU_SH)
|
||||
#define SC_SHIFT 24
|
||||
#endif
|
||||
|
||||
#ifndef SC_SHIFT
|
||||
#define SC_SHIFT 32
|
||||
#endif
|
||||
|
||||
#if SC_SHIFT == 24
|
||||
#define SC_NUM 0x1000000U
|
||||
#define SC_FIX 0
|
||||
|
||||
#ifndef SC_MUL
|
||||
#define SC_MUL(x, y) ((x) * (y) >> 24)
|
||||
#define SC_MUL_INIT
|
||||
#define SC_MUL_END
|
||||
#endif
|
||||
|
||||
#else /* SC_SHIFT == 32 */
|
||||
#define SC_NUM 0x80000000U
|
||||
#define SC_FIX 1
|
||||
|
||||
#ifndef SC_MUL
|
||||
#define SC_MUL(x, y) ((x) * (uint64_t)(y) >> 32)
|
||||
#define SC_MUL_INIT
|
||||
#define SC_MUL_END
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
struct img_part {
|
||||
int len;
|
||||
#if !defined(HAVE_LCD_COLOR)
|
||||
|
|
Loading…
Reference in a new issue