SPC codec on Coldfire: Move movclrs into the light and out of the long dark shadow cast my emac latency as much as possible. Put in a faster interpolation routine (emac saves the day...again). Add comments about what's going on.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12558 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
55aabe34ca
commit
f2b6ecd5fe
1 changed files with 62 additions and 31 deletions
|
@ -974,23 +974,35 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
||||||
if ( (this->r.g.noise_enables & vbit) == 0 )
|
if ( (this->r.g.noise_enables & vbit) == 0 )
|
||||||
{
|
{
|
||||||
uint32_t f = voice->position;
|
uint32_t f = voice->position;
|
||||||
int32_t y1;
|
int32_t y0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Formula (fastest found so far of MANY):
|
||||||
|
* output = y0 + f*y1 - f*y0
|
||||||
|
*/
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"move.l %[f], %[y0] \r\n" /* separate fraction */
|
/* separate fractional and whole parts */
|
||||||
"and.l #0xfff, %[f] \r\n" /* and whole parts */
|
"move.l %[f], %[y1] \r\n"
|
||||||
"lsr.l %[sh], %[y0] \r\n"
|
"and.l #0xfff, %[f] \r\n"
|
||||||
"move.l 2(%[s], %[y0].l*2), %[y1] \r\n" /* load two samples */
|
"lsr.l %[sh], %[y1] \r\n"
|
||||||
"move.l %[y1], %[y0] \r\n" /* separate samples */
|
/* load samples y0 (upper) & y1 (lower) */
|
||||||
"ext.l %[y1] \r\n" /* y0=s[1], y1=s[2] */
|
"move.l 2(%[s], %[y1].l*2), %[y1] \r\n"
|
||||||
"swap %[y0] \r\n"
|
/* %acc0 = f*y1 */
|
||||||
"ext.l %[y0] \r\n"
|
"mac.w %[f]l, %[y1]l, %%acc0 \r\n"
|
||||||
"sub.l %[y0], %[y1] \r\n" /* diff = y1 - y0 */
|
/* msac.w is 2% boostier so add negative */
|
||||||
"muls.l %[f], %[y1] \r\n" /* y0 += f*diff */
|
"neg.l %[f] \r\n"
|
||||||
|
/* %acc0 -= f*y0 */
|
||||||
|
"mac.w %[f]l, %[y1]u, %%acc0 \r\n"
|
||||||
|
/* separate out y0 and sign extend */
|
||||||
|
"swap %[y1] \r\n"
|
||||||
|
"movea.w %[y1], %[y0] \r\n"
|
||||||
|
/* fetch result, scale down and add y0 */
|
||||||
|
"movclr.l %%acc0, %[y1] \r\n"
|
||||||
|
/* output = y0 + (result >> 12) */
|
||||||
"asr.l %[sh], %[y1] \r\n"
|
"asr.l %[sh], %[y1] \r\n"
|
||||||
"add.l %[y1], %[y0] \r\n"
|
"add.l %[y0], %[y1] \r\n"
|
||||||
: [f]"+&d"(f), [y0]"=&d"(output), [y1]"=&d"(y1)
|
: [f]"+&d"(f), [y0]"=&a"(y0), [y1]"=&d"(output)
|
||||||
: [s]"a"(voice->samples), [sh]"r"(12)
|
: [s]"a"(voice->samples), [sh]"d"(12)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1093,9 +1105,13 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
||||||
*this->last_fir_ptr = fb;
|
*this->last_fir_ptr = fb;
|
||||||
this->last_fir_ptr = this->fir_ptr;
|
this->last_fir_ptr = this->fir_ptr;
|
||||||
|
|
||||||
/* Apply echo FIR filter to output - circular buffer is hardware
|
/* Apply echo FIR filter to output samples read from echo buffer -
|
||||||
incremented and masked; FIR coefficients and buffer history are
|
circular buffer is hardware incremented and masked; FIR
|
||||||
loaded in parallel with multiply accumulate operations. */
|
coefficients and buffer history are loaded in parallel with
|
||||||
|
multiply accumulate operations. Shift left by one here and once
|
||||||
|
again when calculating feedback to have sample values justified
|
||||||
|
to bit 31 in the output to ease endian swap, interleaving and
|
||||||
|
clamping before placing result in the program's echo buffer. */
|
||||||
int _0, _1, _2;
|
int _0, _1, _2;
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"move.l (%[fir_c]) , %[_2] \r\n"
|
"move.l (%[fir_c]) , %[_2] \r\n"
|
||||||
|
@ -1115,53 +1131,68 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
||||||
"mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n"
|
"mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n"
|
||||||
"mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
|
"mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
|
||||||
"mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n"
|
"mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n"
|
||||||
"movclr.l %%acc0, %[out_0] \r\n"
|
|
||||||
"movclr.l %%acc1, %[out_1] \r\n"
|
|
||||||
: [_0]"=&r"(_0), [_1]"=&r"(_1), [_2]"=&r"(_2),
|
: [_0]"=&r"(_0), [_1]"=&r"(_1), [_2]"=&r"(_2),
|
||||||
[fir_p]"+a"(this->fir_ptr),
|
[fir_p]"+a"(this->fir_ptr)
|
||||||
[out_0]"=r"(out_0), [out_1]"=r"(out_1)
|
|
||||||
: [fir_c]"a"(this->fir_coeff), [fb]"r"(fb)
|
: [fir_c]"a"(this->fir_coeff), [fb]"r"(fb)
|
||||||
);
|
);
|
||||||
|
|
||||||
/* Generate output */
|
/* Generate output */
|
||||||
asm volatile (
|
asm volatile (
|
||||||
|
/* fetch filter results to eliminate stalls */
|
||||||
|
"movclr.l %%acc0, %[out_0] \r\n"
|
||||||
|
"movclr.l %%acc1, %[out_1] \r\n"
|
||||||
|
/* apply global volume */
|
||||||
"mac.l %[chans_0], %[gv_0] , %%acc2 \r\n"
|
"mac.l %[chans_0], %[gv_0] , %%acc2 \r\n"
|
||||||
"mac.l %[chans_1], %[gv_1] , %%acc3 \r\n"
|
"mac.l %[chans_1], %[gv_1] , %%acc3 \r\n"
|
||||||
|
/* apply echo volume and add to final output */
|
||||||
"mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n"
|
"mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n"
|
||||||
"mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n"
|
"mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n"
|
||||||
:
|
: [out_0]"=&r"(out_0), [out_1]"=&r"(out_1)
|
||||||
: [chans_0]"r"(chans_0), [gv_0]"r"(global_vol_0),
|
: [chans_0]"r"(chans_0), [gv_0]"r"(global_vol_0),
|
||||||
[ev_0]"r"((int)this->r.g.echo_volume_0),
|
[ev_0]"r"((int)this->r.g.echo_volume_0),
|
||||||
[chans_1]"r"(chans_1), [gv_1]"r"(global_vol_1),
|
[chans_1]"r"(chans_1), [gv_1]"r"(global_vol_1),
|
||||||
[ev_1]"r"((int)this->r.g.echo_volume_1),
|
[ev_1]"r"((int)this->r.g.echo_volume_1)
|
||||||
[out_0]"r"(out_0), [out_1]"r"(out_1)
|
|
||||||
);
|
);
|
||||||
|
|
||||||
/* Feedback into echo buffer */
|
/* Feedback into echo buffer */
|
||||||
if ( !(this->r.g.flags & 0x20) )
|
if ( !(this->r.g.flags & 0x20) )
|
||||||
{
|
{
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"mac.l %[sh], %[e0] , %%acc0 \r\n"
|
/* scale echo voices; saturate if overflow */
|
||||||
"mac.l %[out_0], %[ef], <<, %%acc0 \r\n"
|
|
||||||
"mac.l %[sh], %[e1] , %%acc1 \r\n"
|
"mac.l %[sh], %[e1] , %%acc1 \r\n"
|
||||||
|
"mac.l %[sh], %[e0] , %%acc0 \r\n"
|
||||||
|
/* add scaled output from FIR filter */
|
||||||
"mac.l %[out_1], %[ef], <<, %%acc1 \r\n"
|
"mac.l %[out_1], %[ef], <<, %%acc1 \r\n"
|
||||||
"movclr.l %%acc0, %[e0] \r\n"
|
"mac.l %[out_0], %[ef], <<, %%acc0 \r\n"
|
||||||
|
/* swap and fetch feedback results - simply
|
||||||
|
swap_odd_even32 mixed in between macs and
|
||||||
|
movclrs to mitigate stall issues */
|
||||||
|
"move.l #0x00ff00ff, %[sh] \r\n"
|
||||||
"movclr.l %%acc1, %[e1] \r\n"
|
"movclr.l %%acc1, %[e1] \r\n"
|
||||||
"swap %[e1] \r\n"
|
"swap %[e1] \r\n"
|
||||||
|
"movclr.l %%acc0, %[e0] \r\n"
|
||||||
"move.w %[e1], %[e0] \r\n"
|
"move.w %[e1], %[e0] \r\n"
|
||||||
|
"and.l %[e0], %[sh] \r\n"
|
||||||
|
"eor.l %[sh], %[e0] \r\n"
|
||||||
|
"lsl.l #8, %[sh] \r\n"
|
||||||
|
"lsr.l #8, %[e0] \r\n"
|
||||||
|
"or.l %[sh], %[e0] \r\n"
|
||||||
|
/* save final feedback into echo buffer */
|
||||||
|
"move.l %[e0], (%[echo_ptr]) \r\n"
|
||||||
: [e0]"+&d"(echo_0), [e1]"+&d"(echo_1)
|
: [e0]"+&d"(echo_0), [e1]"+&d"(echo_1)
|
||||||
: [out_0]"r"(out_0), [out_1]"r"(out_1),
|
: [out_0]"r"(out_0), [out_1]"r"(out_1),
|
||||||
[ef]"r"((int)this->r.g.echo_feedback),
|
[ef]"r"((int)this->r.g.echo_feedback),
|
||||||
[sh]"r"(1 << 9)
|
[echo_ptr]"a"((int32_t *)echo_ptr),
|
||||||
|
[sh]"d"(1 << 9)
|
||||||
);
|
);
|
||||||
|
|
||||||
*(int32_t *)echo_ptr = swap_odd_even32(echo_0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Output final samples */
|
/* Output final samples */
|
||||||
asm volatile (
|
asm volatile (
|
||||||
|
/* fetch output saved in %acc2 and %acc3 */
|
||||||
"movclr.l %%acc2, %[out_0] \r\n"
|
"movclr.l %%acc2, %[out_0] \r\n"
|
||||||
"movclr.l %%acc3, %[out_1] \r\n"
|
"movclr.l %%acc3, %[out_1] \r\n"
|
||||||
|
/* scale right by global_muting shift */
|
||||||
"asr.l %[gm], %[out_0] \r\n"
|
"asr.l %[gm], %[out_0] \r\n"
|
||||||
"asr.l %[gm], %[out_1] \r\n"
|
"asr.l %[gm], %[out_1] \r\n"
|
||||||
: [out_0]"=&d"(out_0), [out_1]"=&d"(out_1)
|
: [out_0]"=&d"(out_0), [out_1]"=&d"(out_1)
|
||||||
|
|
Loading…
Reference in a new issue