Assembler optimised FLAC 24 bit handling routines for Coldfire based players. Decoding speed should be improved drastically. Haven't got so many 24 bit files myself, so let me know if something sounds off.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@11329 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
73c0cfa632
commit
4134e91950
3 changed files with 299 additions and 40 deletions
|
@ -17,11 +17,14 @@
|
|||
*
|
||||
****************************************************************************/
|
||||
|
||||
/* The following is an assembler optimised version of the LPC filtering
|
||||
routines needed for FLAC decoding. It is optimised for use with the
|
||||
/* The following are assembler optimised version of the LPC filtering
|
||||
routines needed for FLAC decoding. They is optimised for use with the
|
||||
MCF5249 processor, or any other similar ColdFire core with the EMAC unit.
|
||||
All LPC filtering up to order 10 is done in specially optimised unrolled
|
||||
loops, while every order above this is handled by a slower default routine.
|
||||
*/
|
||||
|
||||
/* This routine deals with sample widths 16 and lower. All LPC filtering up to
|
||||
order 10 is done in specially optimised unrolled loops, while every order
|
||||
above this is handled by a slower default routine.
|
||||
*/
|
||||
.section .icode,"ax",@progbits
|
||||
.global lpc_decode_emac
|
||||
|
@ -65,7 +68,7 @@ lpc_decode_emac:
|
|||
.order10:
|
||||
movem.l (%a1), %d3-%d7/%a1-%a5 | load lpc coefs
|
||||
move.l (%a0)+, %a6 | load first history sample
|
||||
.loop10:
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
||||
|
@ -81,13 +84,13 @@ lpc_decode_emac:
|
|||
add.l %d2, (%a0) | add residual and save
|
||||
lea.l (-8*4, %a0), %a0 | point history back at second element
|
||||
subq.l #1, %d0 | decrement sample count
|
||||
jne .loop10 | are we done?
|
||||
jne 1b | are we done?
|
||||
jra .exit
|
||||
|
||||
.order9:
|
||||
movem.l (%a1), %d4-%d7/%a1-%a5
|
||||
move.l (%a0)+, %a6
|
||||
.loop9:
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
||||
|
@ -102,13 +105,13 @@ lpc_decode_emac:
|
|||
add.l %d2, (%a0)
|
||||
lea.l (-7*4, %a0), %a0
|
||||
subq.l #1, %d0
|
||||
jne .loop9
|
||||
jne 1b
|
||||
jra .exit
|
||||
|
||||
.order8:
|
||||
movem.l (%a1), %d5-%d7/%a1-%a5
|
||||
move.l (%a0)+, %a6
|
||||
.loop8:
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
||||
|
@ -122,13 +125,13 @@ lpc_decode_emac:
|
|||
add.l %d2, (%a0)
|
||||
lea.l (-6*4, %a0), %a0
|
||||
subq.l #1, %d0
|
||||
jne .loop8
|
||||
jne 1b
|
||||
jra .exit
|
||||
|
||||
.order7:
|
||||
movem.l (%a1), %d6-%d7/%a1-%a5
|
||||
move.l (%a0)+, %a6
|
||||
.loop7:
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
||||
|
@ -141,13 +144,13 @@ lpc_decode_emac:
|
|||
add.l %d2, (%a0)
|
||||
lea.l (-5*4, %a0), %a0
|
||||
subq.l #1, %d0
|
||||
jne .loop7
|
||||
jne 1b
|
||||
jra .exit
|
||||
|
||||
.order6:
|
||||
movem.l (%a1), %d7/%a1-%a5
|
||||
move.l (%a0)+, %a6
|
||||
.loop6:
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
||||
|
@ -159,13 +162,13 @@ lpc_decode_emac:
|
|||
add.l %d2, (%a0)
|
||||
lea.l (-4*4, %a0), %a0
|
||||
subq.l #1, %d0
|
||||
jne .loop6
|
||||
jne 1b
|
||||
jra .exit
|
||||
|
||||
.order5:
|
||||
movem.l (%a1), %a1-%a5
|
||||
move.l (%a0)+, %a6
|
||||
.loop5:
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
||||
|
@ -176,13 +179,13 @@ lpc_decode_emac:
|
|||
add.l %d2, (%a0)
|
||||
lea.l (-3*4, %a0), %a0
|
||||
subq.l #1, %d0
|
||||
jne .loop5
|
||||
jne 1b
|
||||
jra .exit
|
||||
|
||||
.order4:
|
||||
movem.l (%a1), %a2-%a5
|
||||
move.l (%a0)+, %a6
|
||||
.loop4:
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
||||
|
@ -192,13 +195,13 @@ lpc_decode_emac:
|
|||
add.l %d2, (%a0)
|
||||
subq.l #8, %a0
|
||||
subq.l #1, %d0
|
||||
jne .loop4
|
||||
jne 1b
|
||||
jra .exit
|
||||
|
||||
.order3:
|
||||
movem.l (%a1), %a3-%a5
|
||||
move.l (%a0)+, %a6
|
||||
.loop3:
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a3, (-2*4, %a0), %a6, %acc0
|
||||
|
@ -207,32 +210,32 @@ lpc_decode_emac:
|
|||
add.l %d2, (%a0)
|
||||
subq.l #4, %a0
|
||||
subq.l #1, %d0
|
||||
jne .loop3
|
||||
jne 1b
|
||||
jra .exit
|
||||
|
||||
.order2:
|
||||
movem.l (%a1), %a4-%a5
|
||||
move.l (%a0)+, %a6
|
||||
.loop2:
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a4, %acc0 | data for next iteration is already loaded
|
||||
movclr.l %acc0, %d2
|
||||
asr.l %d1, %d2
|
||||
add.l %d2, (%a0)
|
||||
subq.l #1, %d0
|
||||
jne .loop2
|
||||
jne 1b
|
||||
jra .exit
|
||||
|
||||
.order1:
|
||||
| no point in using mac here
|
||||
move.l (%a1), %a5
|
||||
.loop1:
|
||||
1:
|
||||
move.l %a5, %d2
|
||||
muls.l (%a0)+, %d2
|
||||
asr.l %d1, %d2
|
||||
add.l %d2, (%a0)
|
||||
subq.l #1, %d0
|
||||
jne .loop1
|
||||
jne 1b
|
||||
jra .exit
|
||||
|
||||
.default:
|
||||
|
@ -243,7 +246,7 @@ lpc_decode_emac:
|
|||
move.l %d2, %d3
|
||||
lsr.l #2, %d3 | coefs/4, num of iterations needed in next loop
|
||||
move.l (%a3)+, %a5 | preload data for loop
|
||||
.dloop1:
|
||||
1:
|
||||
lea.l (-4*4, %a2), %a2 | move lpc coef pointer four samples backwards
|
||||
movem.l (%a2), %d4-%d7 | load four coefs
|
||||
mac.l %a5, %d7, (%a3)+, %a5, %acc0
|
||||
|
@ -251,33 +254,277 @@ lpc_decode_emac:
|
|||
mac.l %a5, %d5, (%a3)+, %a5, %acc0
|
||||
mac.l %a5, %d4, (%a3)+, %a5, %acc0
|
||||
subq.l #1, %d3 | any more unrolled loop operations left?
|
||||
jne .dloop1
|
||||
jne 1b
|
||||
|
||||
moveq.l #3, %d3 | mask 0x00000003
|
||||
and.l %d2, %d3 | get the remaining samples to be filtered
|
||||
jmp.l (2, %pc, %d3*2) | then jump into mac.l chain
|
||||
| jumptable:
|
||||
bra.b .dsave
|
||||
bra.b .oneleft
|
||||
bra.b .twoleft
|
||||
| implicit .threeleft
|
||||
bra.b 3f | none left
|
||||
bra.b 2f | one left
|
||||
bra.b 1f | two left
|
||||
| three left
|
||||
move.l -(%a2), %d4
|
||||
mac.l %a5, %d4, (%a3)+, %a5, %acc0
|
||||
.twoleft:
|
||||
1:
|
||||
move.l -(%a2), %d4
|
||||
mac.l %a5, %d4, (%a3)+, %a5, %acc0
|
||||
.oneleft:
|
||||
2:
|
||||
move.l -(%a2), %d4
|
||||
mac.l %a5, %d4, (%a3)+, %a5, %acc0 | need this fetch to not break line below
|
||||
|
||||
.dsave:
|
||||
subq.l #4, %a3 | we're one past the save location
|
||||
mac.l %a5, %d4, (%a3)+, %a5, %acc0
|
||||
3:
|
||||
movclr.l %acc0, %d3 | get result
|
||||
asr.l %d1, %d3 | shift qlevel bits right
|
||||
add.l %d3, (%a3) | add residual and save
|
||||
add.l %a5, %d3 | add residual, which is in a5 by now
|
||||
move.l %d3, -(%a3) | save, a3 is also one past save location
|
||||
addq.l #4, %a0 | increment history pointer
|
||||
subq.l #1, %d0 | decrement sample count
|
||||
jne .default | are we done?
|
||||
jra .exit | if so, fall through to exit
|
||||
|
||||
|
||||
/* This routine deals with sample widths 24 and lower. All LPC filtering up to
|
||||
order 8 is done in specially optimised unrolled loops, while every order
|
||||
above this is handled by a slower default routine.
|
||||
*/
|
||||
.global lpc_decode_emac_wide
|
||||
.align 2
|
||||
lpc_decode_emac_wide:
|
||||
lea.l (-44, %sp), %sp
|
||||
movem.l %d2-%d7/%a2-%a6, (%sp)
|
||||
movem.l (44+4, %sp), %d0-%d1/%d3/%a0-%a1
|
||||
/* d0 = blocksize, d1 = qlevel, d3 = pred_order
|
||||
a0 = data, a1 = coeffs
|
||||
*/
|
||||
|
||||
/* the data pointer always lags behind history pointer by 'pred_order'
|
||||
samples. since we have one loop for each order, we can hard code this
|
||||
and free a register by not saving data pointer.
|
||||
*/
|
||||
move.l %d3, %d2
|
||||
neg.l %d2
|
||||
lea.l (%a0, %d2.l*4), %a0 | history
|
||||
clr.l %d2
|
||||
move.l %d2, %macsr | we'll need integer mode for this
|
||||
tst.l %d0
|
||||
jeq .exit | zero samples to process, exit
|
||||
moveq.l #32, %d2
|
||||
sub.l %d1, %d2 | calculate shift amount for extension byte
|
||||
moveq.l #8, %d4
|
||||
cmp.l %d4, %d3
|
||||
jgt .wdefault | order is over 8, jump to default case
|
||||
jmp.l (2, %pc, %d3.l*4) | jump to loop corresponding to pred_order
|
||||
| jumptable:
|
||||
bra.w .exit | zero order filter isn't possible, exit function
|
||||
bra.w .worder1
|
||||
bra.w .worder2
|
||||
bra.w .worder3
|
||||
bra.w .worder4
|
||||
bra.w .worder5
|
||||
bra.w .worder6
|
||||
bra.w .worder7
|
||||
|
||||
| last jump table entry coincides with target, so leave it out
|
||||
.worder8:
|
||||
movem.l (%a1), %d5-%d7/%a1-%a5 | load lpc coefs
|
||||
move.l (%a0)+, %a6 | load first history sample
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a2, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a1, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %d7, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %d6, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %d5, (-7*4, %a0), %a6, %acc0 | load for the next iteration
|
||||
move.l %accext01, %d4 | get top 8 bits of sum
|
||||
movclr.l %acc0, %d3 | then botten 32 bits
|
||||
lsr.l %d1, %d3 | shift bottom bits qlevel bits right
|
||||
asl.l %d2, %d4 | shift top bits 32 - qlevel bits left
|
||||
or.l %d4, %d3 | now combine results
|
||||
add.l %d3, (%a0) | add residual and save
|
||||
lea.l (-6*4, %a0), %a0 | point history back at second element
|
||||
subq.l #1, %d0 | decrement sample count
|
||||
jne 1b | are we done?
|
||||
jra .exit
|
||||
|
||||
.worder7:
|
||||
movem.l (%a1), %d6-%d7/%a1-%a5
|
||||
move.l (%a0)+, %a6
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a2, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a1, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %d7, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %d6, (-6*4, %a0), %a6, %acc0
|
||||
move.l %accext01, %d4
|
||||
movclr.l %acc0, %d3
|
||||
lsr.l %d1, %d3
|
||||
asl.l %d2, %d4
|
||||
or.l %d4, %d3
|
||||
add.l %d3, (%a0)
|
||||
lea.l (-5*4, %a0), %a0
|
||||
subq.l #1, %d0
|
||||
jne 1b
|
||||
jra .exit
|
||||
|
||||
.worder6:
|
||||
movem.l (%a1), %d7/%a1-%a5
|
||||
move.l (%a0)+, %a6
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a2, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a1, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %d7, (-5*4, %a0), %a6, %acc0
|
||||
move.l %accext01, %d4
|
||||
movclr.l %acc0, %d3
|
||||
lsr.l %d1, %d3
|
||||
asl.l %d2, %d4
|
||||
or.l %d4, %d3
|
||||
add.l %d3, (%a0)
|
||||
lea.l (-4*4, %a0), %a0
|
||||
subq.l #1, %d0
|
||||
jne 1b
|
||||
jra .exit
|
||||
|
||||
.worder5:
|
||||
movem.l (%a1), %a1-%a5
|
||||
move.l (%a0)+, %a6
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a2, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a1, (-4*4, %a0), %a6, %acc0
|
||||
move.l %accext01, %d4
|
||||
movclr.l %acc0, %d3
|
||||
lsr.l %d1, %d3
|
||||
asl.l %d2, %d4
|
||||
or.l %d4, %d3
|
||||
add.l %d3, (%a0)
|
||||
lea.l (-3*4, %a0), %a0
|
||||
subq.l #1, %d0
|
||||
jne 1b
|
||||
jra .exit
|
||||
|
||||
.worder4:
|
||||
movem.l (%a1), %a2-%a5
|
||||
move.l (%a0)+, %a6
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a2, (-3*4, %a0), %a6, %acc0
|
||||
move.l %accext01, %d4
|
||||
movclr.l %acc0, %d3
|
||||
lsr.l %d1, %d3
|
||||
asl.l %d2, %d4
|
||||
or.l %d4, %d3
|
||||
add.l %d3, (%a0)
|
||||
subq.l #8, %a0
|
||||
subq.l #1, %d0
|
||||
jne 1b
|
||||
jra .exit
|
||||
|
||||
.worder3:
|
||||
movem.l (%a1), %a3-%a5
|
||||
move.l (%a0)+, %a6
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a3, (-2*4, %a0), %a6, %acc0
|
||||
move.l %accext01, %d4
|
||||
movclr.l %acc0, %d3
|
||||
lsr.l %d1, %d3
|
||||
asl.l %d2, %d4
|
||||
or.l %d4, %d3
|
||||
add.l %d3, (%a0)
|
||||
subq.l #4, %a0
|
||||
subq.l #1, %d0
|
||||
jne 1b
|
||||
jra .exit
|
||||
|
||||
.worder2:
|
||||
movem.l (%a1), %a4-%a5
|
||||
move.l (%a0)+, %a6
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
||||
mac.l %a6, %a4, %acc0 | data for next iteration is already loaded
|
||||
move.l %accext01, %d4
|
||||
movclr.l %acc0, %d3
|
||||
lsr.l %d1, %d3
|
||||
asl.l %d2, %d4
|
||||
or.l %d4, %d3
|
||||
add.l %d3, (%a0)
|
||||
subq.l #1, %d0
|
||||
jne 1b
|
||||
jra .exit
|
||||
|
||||
.worder1:
|
||||
move.l (%a1), %a5
|
||||
move.l (%a0)+, %a6
|
||||
1:
|
||||
mac.l %a6, %a5, (%a0), %a6, %acc0
|
||||
move.l %accext01, %d4
|
||||
movclr.l %acc0, %d3
|
||||
lsr.l %d1, %d3
|
||||
asl.l %d2, %d4
|
||||
or.l %d4, %d3
|
||||
add.l %a6, %d3 | residual is already in a6
|
||||
move.l %d3, (%a0)+
|
||||
subq.l #1, %d0
|
||||
jne 1b
|
||||
jra .exit
|
||||
|
||||
.wdefault:
|
||||
/* we do the filtering in an unrolled by 4 loop as far as we can, and then
|
||||
do the rest by jump table. */
|
||||
lea.l (%a1, %d3.l*4), %a2 | need to start in the other end of coefs
|
||||
move.l %a0, %a3 | working copy of history pointer
|
||||
move.l %d3, %d4
|
||||
lsr.l #2, %d4 | coefs/4, num of iterations needed in next loop
|
||||
move.l (%a3)+, %a5 | preload data for loop
|
||||
1:
|
||||
lea.l (-4*4, %a2), %a2 | move lpc coef pointer four samples backwards
|
||||
movem.l (%a2), %d5-%d7/%a4 | load four coefs
|
||||
mac.l %a5, %a4, (%a3)+, %a5, %acc0
|
||||
mac.l %a5, %d7, (%a3)+, %a5, %acc0
|
||||
mac.l %a5, %d6, (%a3)+, %a5, %acc0
|
||||
mac.l %a5, %d5, (%a3)+, %a5, %acc0
|
||||
subq.l #1, %d4 | any more unrolled loop operations left?
|
||||
jne 1b
|
||||
|
||||
moveq.l #3, %d4 | mask 0x00000003
|
||||
and.l %d3, %d4 | get the remaining samples to be filtered
|
||||
jmp.l (2, %pc, %d4*2) | then jump into mac.l chain
|
||||
| jumptable:
|
||||
bra.b 3f | none left
|
||||
bra.b 2f | one left
|
||||
bra.b 1f | two left
|
||||
| three left
|
||||
move.l -(%a2), %d4
|
||||
mac.l %a5, %d4, (%a3)+, %a5, %acc0
|
||||
1:
|
||||
move.l -(%a2), %d4
|
||||
mac.l %a5, %d4, (%a3)+, %a5, %acc0
|
||||
2:
|
||||
move.l -(%a2), %d4
|
||||
mac.l %a5, %d4, (%a3)+, %a5, %acc0
|
||||
3:
|
||||
move.l %accext01, %d5 | get high 32 bits of result
|
||||
movclr.l %acc0, %d4 | get low 32 bits of result
|
||||
lsr.l %d1, %d4 | shift qlevel bits right
|
||||
asl.l %d2, %d5 | shift 32 - qlevel bits left
|
||||
or.l %d5, %d4 | combine top and low bits after shift
|
||||
add.l %a5, %d4 | add residual, which is in a5 by now
|
||||
move.l %d4, -(%a3) | save, a3 is also one past save location
|
||||
addq.l #4, %a0 | increment history pointer
|
||||
subq.l #1, %d0 | decrement sample count
|
||||
jne .wdefault | are we done?
|
||||
| if so, fall through to exit
|
||||
|
||||
.exit:
|
||||
|
|
|
@ -3,6 +3,9 @@
|
|||
|
||||
#include "bitstream.h"
|
||||
|
||||
void lpc_decode_emac(int blocksize, int qlevel, int pred_order, int32_t* data, int* coeffs);
|
||||
void lpc_decode_emac(int blocksize, int qlevel, int pred_order, int32_t* data,
|
||||
int* coeffs);
|
||||
void lpc_decode_emac_wide(int blocksize, int qlevel, int pred_order,
|
||||
int32_t* data, int* coeffs);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -262,10 +262,12 @@ static int decode_subframe_lpc(FLACContext *s, int32_t* decoded, int pred_order)
|
|||
if ((s->bps + coeff_prec + av_log2(pred_order)) <= 32) {
|
||||
#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
|
||||
(void)sum;
|
||||
lpc_decode_emac(s->blocksize - pred_order, qlevel, pred_order, decoded + pred_order, coeffs);
|
||||
lpc_decode_emac(s->blocksize - pred_order, qlevel, pred_order,
|
||||
decoded + pred_order, coeffs);
|
||||
#elif defined(CPU_ARM) && !defined(SIMULATOR)
|
||||
(void)sum;
|
||||
lpc_decode_arm(s->blocksize - pred_order, qlevel, pred_order, decoded + pred_order, coeffs);
|
||||
lpc_decode_arm(s->blocksize - pred_order, qlevel, pred_order,
|
||||
decoded + pred_order, coeffs);
|
||||
#else
|
||||
for (i = pred_order; i < s->blocksize; i++)
|
||||
{
|
||||
|
@ -276,6 +278,12 @@ static int decode_subframe_lpc(FLACContext *s, int32_t* decoded, int pred_order)
|
|||
}
|
||||
#endif
|
||||
} else {
|
||||
#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
|
||||
(void)wsum;
|
||||
(void)j;
|
||||
lpc_decode_emac_wide(s->blocksize - pred_order, qlevel, pred_order,
|
||||
decoded + pred_order, coeffs);
|
||||
#else
|
||||
for (i = pred_order; i < s->blocksize; i++)
|
||||
{
|
||||
wsum = 0;
|
||||
|
@ -283,6 +291,7 @@ static int decode_subframe_lpc(FLACContext *s, int32_t* decoded, int pred_order)
|
|||
wsum += (int64_t)coeffs[j] * (int64_t)decoded[i-j-1];
|
||||
decoded[i] += wsum >> qlevel;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
Loading…
Reference in a new issue