ARM9 optimized synth_full for libmad. Speeds up mp3 decoding by an even 2 MHz on all ARM9 and later devices. Note this is only optimized for arm9 (non-E), although it is faster on later devices. An arm9E/11 version will be needed for optimal performance on newer devices.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28710 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
90d77fb77a
commit
9929512682
1 changed files with 118 additions and 112 deletions
|
@ -31,7 +31,12 @@
|
|||
;; r1 = fo
|
||||
;; r2 = fe
|
||||
;; r3 = D0ptr
|
||||
;; r4 = D1ptr
|
||||
;; r4 = D1ptr
|
||||
|
||||
/*;; r5 = loop counter
|
||||
;; r6,r7 accumulator1
|
||||
;; r8,r9 accumulator2 */
|
||||
|
||||
synth_full_odd_sbsample:
|
||||
stmdb sp!, {r4-r11, lr}
|
||||
ldr r4, [sp, #36]
|
||||
|
@ -40,88 +45,89 @@ synth_full_odd_sbsample:
|
|||
mov r5, #15
|
||||
add r2, r2, #32
|
||||
.l:
|
||||
/* ;; PROD_O and odd half of SB_SAMPLE*/
|
||||
add r3, r3, #128
|
||||
add r4, r4, #128
|
||||
ldmia r1!, {r10, r11, r12, lr}
|
||||
ldr r7, [r3, #4]
|
||||
smull r6, r7, r10, r7
|
||||
ldmia r1!, {r10, r11, r12, lr}
|
||||
ldr r9, [r4, #120]
|
||||
smull r6, r7, r10, r7
|
||||
ldr sp, [r3, #60]
|
||||
smull r8, r9, r10, r9
|
||||
|
||||
ldr r10, [r3, #60]
|
||||
smlal r6, r7, r11, r10
|
||||
ldr r10, [r3, #52]
|
||||
smlal r6, r7, r11, sp
|
||||
ldr sp, [r3, #44]
|
||||
smlal r6, r7, r12, r10
|
||||
ldr r10, [r3, #44]
|
||||
smlal r6, r7, lr, r10
|
||||
|
||||
ldr r10, [r4, #64]
|
||||
smlal r6, r7, lr, sp
|
||||
ldr sp, [r4, #72]
|
||||
smlal r8, r9, r11, r10
|
||||
ldr r10, [r4, #72]
|
||||
smlal r8, r9, r12, r10
|
||||
ldr r10, [r4, #80]
|
||||
smlal r8, r9, r12, sp
|
||||
smlal r8, r9, lr, r10
|
||||
|
||||
ldr r10, [r3, #36]
|
||||
|
||||
ldmia r1!, {r11, r12, sp, lr}
|
||||
ldr r10, [r3, #36]
|
||||
smlal r6, r7, r11, r10
|
||||
ldr r10, [r3, #28]
|
||||
smlal r6, r7, r12, r10
|
||||
ldr r10, [r3, #20]
|
||||
smlal r6, r7, sp, r10
|
||||
ldr r10, [r3, #12]
|
||||
smlal r6, r7, lr, r10
|
||||
|
||||
ldr r10, [r4, #88]
|
||||
ldr r10, [r4, #88] /*;;1 cycle stall on arm9, but we free up r11*/
|
||||
smlal r8, r9, r11, r10
|
||||
ldr r10, [r4, #96]
|
||||
smlal r8, r9, r12, r10
|
||||
|
||||
ldr r10, [r3, #28]
|
||||
ldr r11, [r3, #20]
|
||||
smlal r6, r7, r12, r10
|
||||
ldr r10, [r3, #12]
|
||||
smlal r6, r7, sp, r11
|
||||
ldr r11, [r4, #96]
|
||||
smlal r6, r7, lr, r10
|
||||
ldr r10, [r4, #104]
|
||||
smlal r8, r9, r12, r11
|
||||
ldr r11, [r4, #112]
|
||||
smlal r8, r9, sp, r10
|
||||
ldr r10, [r4, #112]
|
||||
smlal r8, r9, lr, r10
|
||||
smlal r8, r9, lr, r11
|
||||
|
||||
rsbs r6, r6, #0
|
||||
rsc r7, r7, #0
|
||||
|
||||
ldmia r2!, {r11, r12, sp, lr}
|
||||
|
||||
|
||||
/* ;; PROD_A and even half of SB_SAMPLE*/
|
||||
ldr r10, [r3, #0]
|
||||
smlal r6, r7, r11, r10
|
||||
ldr r10, [r3, #56]
|
||||
smlal r6, r7, r12, r10
|
||||
ldr r10, [r3, #48]
|
||||
smlal r6, r7, sp, r10
|
||||
ldr r10, [r3, #40]
|
||||
smlal r6, r7, lr, r10
|
||||
|
||||
ldr r10, [r4, #60]
|
||||
smlal r8, r9, r11, r10
|
||||
ldr r10, [r4, #68]
|
||||
smlal r8, r9, r12, r10
|
||||
ldr r10, [r4, #76]
|
||||
smlal r8, r9, sp, r10
|
||||
ldr r10, [r4, #84]
|
||||
smlal r8, r9, lr, r10
|
||||
|
||||
ldmia r2!, {r11, r12, sp, lr}
|
||||
ldr r10, [r3, #32]
|
||||
smlal r6, r7, r11, r10
|
||||
ldr r10, [r3, #24]
|
||||
smlal r6, r7, r12, r10
|
||||
ldr r10, [r3, #16]
|
||||
|
||||
ldr r10, [r4, #60] /*;;1 cycle stall on arm9, but we free up r11*/
|
||||
smlal r8, r9, r11, r10
|
||||
ldr r11, [r3, #56]
|
||||
ldr r10, [r3, #48]
|
||||
smlal r6, r7, r12, r11
|
||||
ldr r11, [r3, #40]
|
||||
smlal r6, r7, sp, r10
|
||||
ldr r10, [r3, #8]
|
||||
smlal r6, r7, lr, r10
|
||||
|
||||
ldr r10, [r4, #92]
|
||||
smlal r8, r9, r11, r10
|
||||
ldr r10, [r4, #100]
|
||||
ldr r10, [r4, #68]
|
||||
smlal r6, r7, lr, r11
|
||||
ldr r11, [r4, #76]
|
||||
smlal r8, r9, r12, r10
|
||||
ldr r10, [r4, #108]
|
||||
smlal r8, r9, sp, r10
|
||||
ldr r10, [r4, #116]
|
||||
ldr r10, [r4, #84]
|
||||
smlal r8, r9, sp, r11
|
||||
smlal r8, r9, lr, r10
|
||||
|
||||
ldr r10, [r3, #32]
|
||||
ldmia r2!, {r11, r12, sp, lr}
|
||||
smlal r6, r7, r11, r10
|
||||
|
||||
ldr r10, [r4, #92] /*;;1 cycle stall on arm9, but we free up r11*/
|
||||
smlal r8, r9, r11, r10
|
||||
|
||||
ldr r10, [r3, #24]
|
||||
ldr r11, [r3, #16]
|
||||
smlal r6, r7, r12, r10
|
||||
ldr r10, [r3, #8]
|
||||
smlal r6, r7, sp, r11
|
||||
ldr r11, [r4, #100]
|
||||
smlal r6, r7, lr, r10
|
||||
ldr r10, [r4, #108]
|
||||
smlal r8, r9, r12, r11
|
||||
ldr r11, [r4, #116]
|
||||
smlal r8, r9, sp, r10
|
||||
smlal r8, r9, lr, r11
|
||||
|
||||
movs r6, r6, lsr #16
|
||||
adc r6, r6, r7, lsl #16
|
||||
|
@ -146,88 +152,88 @@ synth_full_even_sbsample:
|
|||
mov r5, #15
|
||||
add r2, r2, #32
|
||||
.l2:
|
||||
/* ;; PROD_O and odd half of SB_SAMPLE*/
|
||||
add r3, r3, #128
|
||||
add r4, r4, #128
|
||||
ldmia r1!, {r10, r11, r12, lr}
|
||||
ldr r7, [r3, #0]
|
||||
smull r6, r7, r10, r7
|
||||
ldmia r1!, {r10, r11, r12, lr}
|
||||
ldr r9, [r4, #60]
|
||||
smull r6, r7, r10, r7
|
||||
ldr sp, [r3, #56]
|
||||
smull r8, r9, r10, r9
|
||||
|
||||
ldr r10, [r3, #56]
|
||||
smlal r6, r7, r11, r10
|
||||
ldr r10, [r3, #48]
|
||||
smlal r6, r7, r11, sp
|
||||
ldr sp, [r3, #40]
|
||||
smlal r6, r7, r12, r10
|
||||
ldr r10, [r3, #40]
|
||||
smlal r6, r7, lr, r10
|
||||
|
||||
ldr r10, [r4, #68]
|
||||
smlal r6, r7, lr, sp
|
||||
|
||||
ldr sp, [r4, #76]
|
||||
smlal r8, r9, r11, r10
|
||||
ldr r10, [r4, #76]
|
||||
smlal r8, r9, r12, r10
|
||||
ldr r10, [r4, #84]
|
||||
smlal r8, r9, r12, sp
|
||||
smlal r8, r9, lr, r10
|
||||
|
||||
ldmia r1!, {r11, r12, sp, lr}
|
||||
|
||||
ldr r10, [r3, #32]
|
||||
smlal r6, r7, r11, r10
|
||||
ldr r10, [r3, #24]
|
||||
smlal r6, r7, r12, r10
|
||||
ldr r10, [r3, #16]
|
||||
smlal r6, r7, sp, r10
|
||||
ldr r10, [r3, #8]
|
||||
smlal r6, r7, lr, r10
|
||||
ldmia r1!, {r11, r12, sp, lr}
|
||||
|
||||
ldr r10, [r4, #92]
|
||||
smlal r6, r7, r11, r10
|
||||
ldr r10, [r4, #92]
|
||||
smlal r8, r9, r11, r10
|
||||
ldr r10, [r4, #100]
|
||||
smlal r8, r9, r12, r10
|
||||
ldr r10, [r3, #24]
|
||||
ldr r11, [r3, #16]
|
||||
smlal r6, r7, r12, r10
|
||||
ldr r10, [r3, #8]
|
||||
smlal r6, r7, sp, r11
|
||||
ldr r11, [r4, #100]
|
||||
smlal r6, r7, lr, r10
|
||||
ldr r10, [r4, #108]
|
||||
smlal r8, r9, r12, r11
|
||||
ldr r11, [r4, #116]
|
||||
smlal r8, r9, sp, r10
|
||||
ldr r10, [r4, #116]
|
||||
smlal r8, r9, lr, r10
|
||||
smlal r8, r9, lr, r11
|
||||
|
||||
rsbs r6, r6, #0
|
||||
rsc r7, r7, #0
|
||||
|
||||
ldmia r2!, {r11, r12, sp, lr}
|
||||
|
||||
ldr r10, [r3, #4]
|
||||
smlal r6, r7, r11, r10
|
||||
ldr r10, [r3, #60]
|
||||
smlal r6, r7, r12, r10
|
||||
ldr r10, [r3, #52]
|
||||
smlal r6, r7, sp, r10
|
||||
ldr r10, [r3, #44]
|
||||
smlal r6, r7, lr, r10
|
||||
|
||||
ldr r10, [r4, #120]
|
||||
smlal r8, r9, r11, r10
|
||||
ldr r10, [r4, #64]
|
||||
smlal r8, r9, r12, r10
|
||||
ldr r10, [r4, #72]
|
||||
smlal r8, r9, sp, r10
|
||||
ldr r10, [r4, #80]
|
||||
smlal r8, r9, lr, r10
|
||||
|
||||
ldmia r2!, {r11, r12, sp, lr}
|
||||
ldr r10, [r3, #36]
|
||||
smlal r6, r7, r11, r10
|
||||
ldr r10, [r3, #28]
|
||||
ldr r10, [r4, #120] /*;;1 cycle stall on arm9, but we free up r11*/
|
||||
smlal r8, r9, r11, r10
|
||||
ldr r10, [r3, #60]
|
||||
ldr r11, [r3, #52]
|
||||
smlal r6, r7, r12, r10
|
||||
ldr r10, [r3, #20]
|
||||
smlal r6, r7, sp, r10
|
||||
ldr r10, [r3, #12]
|
||||
ldr r10, [r3, #44]
|
||||
smlal r6, r7, sp, r11
|
||||
ldr r11, [r4, #64]
|
||||
smlal r6, r7, lr, r10
|
||||
|
||||
ldr r10, [r4, #88]
|
||||
smlal r8, r9, r11, r10
|
||||
ldr r10, [r4, #96]
|
||||
smlal r8, r9, r12, r10
|
||||
ldr r10, [r4, #104]
|
||||
ldr r10, [r4, #72]
|
||||
smlal r8, r9, r12, r11
|
||||
ldr r11, [r4, #80]
|
||||
smlal r8, r9, sp, r10
|
||||
ldr r10, [r4, #112]
|
||||
smlal r8, r9, lr, r10
|
||||
|
||||
smlal r8, r9, lr, r11
|
||||
|
||||
ldr r10, [r3, #36]
|
||||
ldmia r2!, {r11, r12, sp, lr}
|
||||
smlal r6, r7, r11, r10
|
||||
ldr r10, [r4, #88] /*;;1 cycle stall on arm9, but we free up r11*/
|
||||
smlal r8, r9, r11, r10
|
||||
|
||||
ldr r10, [r3, #28]
|
||||
ldr r11, [r3, #20]
|
||||
smlal r6, r7, r12, r10
|
||||
ldr r10, [r3, #12]
|
||||
smlal r6, r7, sp, r11
|
||||
ldr r11, [r4, #96]
|
||||
smlal r6, r7, lr, r10
|
||||
ldr r10, [r4, #104]
|
||||
smlal r8, r9, r12, r11
|
||||
ldr r11, [r4, #112]
|
||||
smlal r8, r9, sp, r10
|
||||
smlal r8, r9, lr, r11
|
||||
|
||||
movs r6, r6, lsr #16
|
||||
adc r6, r6, r7, lsl #16
|
||||
|
|
Loading…
Reference in a new issue