libmusepack: tiny optimization of the ARMv4 mpc_decoder_windowing_D implementations, using register indexed addressing to store data, saving one instruction in the loop and deleting one instruction adter the loop which isn't needed.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28488 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
dbdc0a8a8c
commit
9d98583a18
1 changed files with 9 additions and 13 deletions
|
@ -164,7 +164,7 @@ mpc_decoder_windowing_D:
|
||||||
* r10 = lo, r11 = hi of 31..17
|
* r10 = lo, r11 = hi of 31..17
|
||||||
* r12 = V[31..16]
|
* r12 = V[31..16]
|
||||||
*****************************************/
|
*****************************************/
|
||||||
mov lr, #15
|
mov lr, #15*8
|
||||||
add r12, r1, #30*4 /* r12 = V[31] */
|
add r12, r1, #30*4 /* r12 = V[31] */
|
||||||
.loop15:
|
.loop15:
|
||||||
ldmia r2!, { r3-r6 } /* load D[00..03] */
|
ldmia r2!, { r3-r6 } /* load D[00..03] */
|
||||||
|
@ -240,19 +240,18 @@ mpc_decoder_windowing_D:
|
||||||
orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
|
orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
|
||||||
str r8, [r0] /* store Data */
|
str r8, [r0] /* store Data */
|
||||||
/* store Data[31..17] */
|
/* store Data[31..17] */
|
||||||
add r0, r0, lr, asl #3 /* r0 = r0 + 2*lr [words] */
|
|
||||||
mov r10, r10, lsr #16
|
mov r10, r10, lsr #16
|
||||||
orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */
|
orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */
|
||||||
rsb r10, r10, #0 /* r10 = -r10 */
|
rsb r10, r10, #0 /* r10 = -r10 */
|
||||||
str r10, [r0], #4 /* store Data */
|
str r10, [r0, lr] /* store Data */
|
||||||
sub r0, r0, lr, asl #3 /* r0 = r0 - 2*lr [words] */
|
add r0, r0, #4 /* r0++ */
|
||||||
/* correct adresses for next loop */
|
/* correct adresses for next loop */
|
||||||
sub r12, r12, #4 /* r12 = V-- */
|
sub r12, r12, #4 /* r12 = V-- */
|
||||||
add r1, r1, #4 /* r1 = V++ */
|
add r1, r1, #4 /* r1 = V++ */
|
||||||
/* next loop */
|
/* next loop */
|
||||||
subs lr, lr, #1
|
subs lr, lr, #8
|
||||||
bgt .loop15
|
bgt .loop15
|
||||||
|
|
||||||
/******************************************
|
/******************************************
|
||||||
* V[16] with internal symmetry
|
* V[16] with internal symmetry
|
||||||
*****************************************/
|
*****************************************/
|
||||||
|
@ -293,7 +292,6 @@ mpc_decoder_windowing_D:
|
||||||
mov r8, r8, lsr #16
|
mov r8, r8, lsr #16
|
||||||
orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
|
orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
|
||||||
str r8, [r0], #4 /* store Data */
|
str r8, [r0], #4 /* store Data */
|
||||||
add r1, r1, #4 /* V++ */
|
|
||||||
|
|
||||||
ldmpc regs=r4-r11
|
ldmpc regs=r4-r11
|
||||||
#elif ARM_ARCH < 6 /* arm9 and above */
|
#elif ARM_ARCH < 6 /* arm9 and above */
|
||||||
|
@ -365,7 +363,7 @@ mpc_decoder_windowing_D:
|
||||||
* r10 = lo, r11 = hi of 31..17
|
* r10 = lo, r11 = hi of 31..17
|
||||||
* r12 = V[31..16]
|
* r12 = V[31..16]
|
||||||
*****************************************/
|
*****************************************/
|
||||||
mov lr, #15
|
mov lr, #15*8
|
||||||
add r12, r1, #30*4 /* r12 = V[31] */
|
add r12, r1, #30*4 /* r12 = V[31] */
|
||||||
.loop15:
|
.loop15:
|
||||||
ldmia r2!, { r3-r4 } /* load D[00..01] */
|
ldmia r2!, { r3-r4 } /* load D[00..01] */
|
||||||
|
@ -445,17 +443,16 @@ mpc_decoder_windowing_D:
|
||||||
orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
|
orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
|
||||||
str r8, [r0] /* store Data */
|
str r8, [r0] /* store Data */
|
||||||
/* store Data[31..17] */
|
/* store Data[31..17] */
|
||||||
add r0, r0, lr, asl #3 /* r0 = r0 + 2*lr [words] */
|
|
||||||
mov r10, r10, lsr #16
|
mov r10, r10, lsr #16
|
||||||
orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */
|
orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */
|
||||||
rsb r10, r10, #0 /* r10 = -r10 */
|
rsb r10, r10, #0 /* r10 = -r10 */
|
||||||
str r10, [r0], #4 /* store Data */
|
str r10, [r0, lr] /* store Data */
|
||||||
sub r0, r0, lr, asl #3 /* r0 = r0 - 2*lr [words] */
|
add r0, r0, #4 /* r0++ */
|
||||||
/* correct adresses for next loop */
|
/* correct adresses for next loop */
|
||||||
sub r12, r12, #4 /* r12 = V-- */
|
sub r12, r12, #4 /* r12 = V-- */
|
||||||
add r1, r1, #4 /* r1 = V++ */
|
add r1, r1, #4 /* r1 = V++ */
|
||||||
/* next loop */
|
/* next loop */
|
||||||
subs lr, lr, #1
|
subs lr, lr, #8
|
||||||
bgt .loop15
|
bgt .loop15
|
||||||
|
|
||||||
/******************************************
|
/******************************************
|
||||||
|
@ -498,7 +495,6 @@ mpc_decoder_windowing_D:
|
||||||
mov r8, r8, lsr #16
|
mov r8, r8, lsr #16
|
||||||
orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
|
orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
|
||||||
str r8, [r0], #4 /* store Data */
|
str r8, [r0], #4 /* store Data */
|
||||||
add r1, r1, #4 /* V++ */
|
|
||||||
|
|
||||||
ldmpc regs=r4-r11
|
ldmpc regs=r4-r11
|
||||||
#else
|
#else
|
||||||
|
|
Loading…
Reference in a new issue