rockbox/apps/codecs/libwavpack/arm.S
Daniel Stenberg 2acc0ac542 Updated our source code header to explicitly mention that we are GPL v2 or
later. We still need to hunt down snippets used that are not. 1324 modified
files...
http://www.rockbox.org/mail/archive/rockbox-dev-archive-2008-06/0060.shtml


git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17847 a1c6a512-1295-4272-9138-f99709370657
2008-06-28 18:10:04 +00:00

474 lines
17 KiB
ArmAsm

/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006 by David Bryant
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
/* This is an assembly optimized version of the following WavPack function:
*
* void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp,
* long *buffer, long sample_count);
*
* It performs a single pass of stereo decorrelation on the provided buffer.
* Note that this version of the function requires that the 8 previous stereo
* samples are visible and correct. In other words, it ignores the "samples_*"
* fields in the decorr_pass structure and gets the history data directly
* from the buffer. It does, however, return the appropriate history samples
* to the decorr_pass structure before returning.
*
* This is written to work on a ARM7TDMI processor. This version only uses the
* 32-bit multiply-accumulate instruction and so will overflow with 24-bit
* WavPack files.
*/
.text
.align
.global decorr_stereo_pass_cont_arm
/*
* on entry:
*
* r0 = struct decorr_pass *dpp
* r1 = long *buffer
* r2 = long sample_count
*/
decorr_stereo_pass_cont_arm:
stmfd sp!, {r4 - r8, r10, r11, lr}
mov r5, r0 @ r5 = dpp
mov r11, #512 @ r11 = 512 for rounding
ldrsh r6, [r0, #2] @ r6 = dpp->delta
ldrsh r4, [r0, #4] @ r4 = dpp->weight_A
ldrsh r0, [r0, #6] @ r0 = dpp->weight_B
cmp r2, #0 @ exit if no samples to process
beq common_exit
add r7, r1, r2, asl #3 @ r7 = buffer ending position
ldrsh r2, [r5, #0] @ r2 = dpp->term
cmp r2, #0
bmi minus_term
ldr lr, [r1, #-16] @ load 2 sample history from buffer
ldr r10, [r1, #-12] @ for terms 2, 17, and 18
ldr r8, [r1, #-8]
ldr r3, [r1, #-4]
cmp r2, #17
beq term_17_loop
cmp r2, #18
beq term_18_loop
cmp r2, #2
beq term_2_loop
b term_default_loop @ else handle default (1-8, except 2)
minus_term:
mov r10, #1024 @ r10 = -1024 for weight clipping
rsb r10, r10, #0 @ (only used for negative terms)
cmn r2, #1
beq term_minus_1
cmn r2, #2
beq term_minus_2
cmn r2, #3
beq term_minus_3
b common_exit
/*
******************************************************************************
* Loop to handle term = 17 condition
*
* r0 = dpp->weight_B r8 = previous left sample
* r1 = bptr r9 =
* r2 = current sample r10 = second previous left sample
* r3 = previous right sample r11 = 512 (for rounding)
* r4 = dpp->weight_A ip = current decorrelation value
* r5 = dpp sp =
* r6 = dpp->delta lr = second previous right sample
* r7 = eptr pc =
*******************************************************************************
*/
term_17_loop:
rsbs ip, lr, r8, asl #1 @ decorr value = (2 * prev) - 2nd prev
mov lr, r8 @ previous becomes 2nd previous
ldr r2, [r1], #4 @ get sample & update pointer
mla r8, ip, r4, r11 @ mult decorr value by weight, round,
add r8, r2, r8, asr #10 @ shift, and add to new sample
strne r8, [r1, #-4] @ if change possible, store sample back
cmpne r2, #0
beq .L325
teq ip, r2 @ update weight based on signs
submi r4, r4, r6
addpl r4, r4, r6
.L325: rsbs ip, r10, r3, asl #1 @ do same thing for right channel
mov r10, r3
ldr r2, [r1], #4
mla r3, ip, r0, r11
add r3, r2, r3, asr #10
strne r3, [r1, #-4]
cmpne r2, #0
beq .L329
teq ip, r2
submi r0, r0, r6
addpl r0, r0, r6
.L329: cmp r7, r1 @ loop back if more samples to do
bhi term_17_loop
b store_1718 @ common exit for terms 17 & 18
/*
******************************************************************************
* Loop to handle term = 18 condition
*
* r0 = dpp->weight_B r8 = previous left sample
* r1 = bptr r9 =
* r2 = current sample r10 = second previous left sample
* r3 = previous right sample r11 = 512 (for rounding)
* r4 = dpp->weight_A ip = decorrelation value
* r5 = dpp sp =
* r6 = dpp->delta lr = second previous right sample
* r7 = eptr pc =
*******************************************************************************
*/
term_18_loop:
sub ip, r8, lr @ decorr value =
mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1
adds ip, r8, ip, asr #1
ldr r2, [r1], #4 @ get sample & update pointer
mla r8, ip, r4, r11 @ mult decorr value by weight, round,
add r8, r2, r8, asr #10 @ shift, and add to new sample
strne r8, [r1, #-4] @ if change possible, store sample back
cmpne r2, #0
beq .L337
teq ip, r2 @ update weight based on signs
submi r4, r4, r6
addpl r4, r4, r6
.L337: sub ip, r3, r10 @ do same thing for right channel
mov r10, r3
adds ip, r3, ip, asr #1
ldr r2, [r1], #4
mla r3, ip, r0, r11
add r3, r2, r3, asr #10
strne r3, [r1, #-4]
cmpne r2, #0
beq .L341
teq ip, r2
submi r0, r0, r6
addpl r0, r0, r6
.L341: cmp r7, r1 @ loop back if more samples to do
bhi term_18_loop
/* common exit for terms 17 & 18 */
store_1718:
str r3, [r5, #40] @ store sample history into struct
str r8, [r5, #8]
str r10, [r5, #44]
str lr, [r5, #12]
b common_exit @ and return
/*
******************************************************************************
* Loop to handle term = 2 condition
* (note that this case can be handled by the default term handler (1-8), but
* this special case is faster because it doesn't have to read memory twice)
*
* r0 = dpp->weight_B r8 = previous left sample
* r1 = bptr r9 =
* r2 = current sample r10 = second previous left sample
* r3 = previous right sample r11 = 512 (for rounding)
* r4 = dpp->weight_A ip = decorrelation value
* r5 = dpp sp =
* r6 = dpp->delta lr = second previous right sample
* r7 = eptr pc =
*******************************************************************************
*/
term_2_loop:
movs ip, lr @ get decorrelation value & test
mov lr, r8 @ previous becomes 2nd previous
ldr r2, [r1], #4 @ get sample & update pointer
mla r8, ip, r4, r11 @ mult decorr value by weight, round,
add r8, r2, r8, asr #10 @ shift, and add to new sample
strne r8, [r1, #-4] @ if change possible, store sample back
cmpne r2, #0
beq .L225
teq ip, r2 @ update weight based on signs
submi r4, r4, r6
addpl r4, r4, r6
.L225: movs ip, r10 @ do same thing for right channel
mov r10, r3
ldr r2, [r1], #4
mla r3, ip, r0, r11
add r3, r2, r3, asr #10
strne r3, [r1, #-4]
cmpne r2, #0
beq .L229
teq ip, r2
submi r0, r0, r6
addpl r0, r0, r6
.L229: cmp r7, r1 @ loop back if more samples to do
bhi term_2_loop
b default_term_exit @ this exit updates all dpp->samples
/*
******************************************************************************
* Loop to handle default term condition
*
* r0 = dpp->weight_B r8 = result accumulator
* r1 = bptr r9 =
* r2 = dpp->term r10 =
* r3 = decorrelation value r11 = 512 (for rounding)
* r4 = dpp->weight_A ip = current sample
* r5 = dpp sp =
* r6 = dpp->delta lr =
* r7 = eptr pc =
*******************************************************************************
*/
term_default_loop:
ldr ip, [r1] @ get original sample
ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term
mla r8, r3, r4, r11 @ mult decorr value by weight, round,
add r8, ip, r8, asr #10 @ shift and add to new sample
str r8, [r1], #4 @ store update sample
cmp r3, #0
cmpne ip, #0
beq .L350
teq ip, r3 @ update weight based on signs
submi r4, r4, r6
addpl r4, r4, r6
.L350: ldr ip, [r1] @ do the same thing for right channel
ldr r3, [r1, -r2, asl #3]
mla r8, r3, r0, r11
add r8, ip, r8, asr #10
str r8, [r1], #4
cmp r3, #0
cmpne ip, #0
beq .L354
teq ip, r3
submi r0, r0, r6
addpl r0, r0, r6
.L354: cmp r7, r1 @ loop back if more samples to do
bhi term_default_loop
/*
* This exit is used by terms 1-8 to store the previous 8 samples into the decorr
* structure (even if they are not all used for the given term)
*/
default_term_exit:
ldrsh r3, [r5, #0]
sub ip, r3, #1
mov lr, #7
.L358: and r3, ip, #7
add r3, r5, r3, asl #2
ldr r2, [r1, #-4]
str r2, [r3, #40]
ldr r2, [r1, #-8]!
str r2, [r3, #8]
sub ip, ip, #1
sub lr, lr, #1
cmn lr, #1
bne .L358
b common_exit
/*
******************************************************************************
* Loop to handle term = -1 condition
*
* r0 = dpp->weight_B r8 =
* r1 = bptr r9 =
* r2 = intermediate result r10 = -1024 (for clipping)
* r3 = previous right sample r11 = 512 (for rounding)
* r4 = dpp->weight_A ip = current sample
* r5 = dpp sp =
* r6 = dpp->delta lr = updated left sample
* r7 = eptr pc =
*******************************************************************************
*/
term_minus_1:
ldr r3, [r1, #-4]
term_minus_1_loop:
ldr ip, [r1] @ for left channel the decorrelation value
mla r2, r3, r4, r11 @ is the previous right sample (in r3)
add lr, ip, r2, asr #10
str lr, [r1], #8
cmp r3, #0
cmpne ip, #0
beq .L361
teq ip, r3 @ update weight based on signs
submi r4, r4, r6
addpl r4, r4, r6
cmp r4, #1024
movgt r4, #1024
cmp r4, r10
movlt r4, r10
.L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value
mla r3, lr, r0, r11 @ is the just updated right sample (in lr)
add r3, r2, r3, asr #10
str r3, [r1, #-4]
cmp lr, #0
cmpne r2, #0
beq .L369
teq r2, lr
submi r0, r0, r6
addpl r0, r0, r6
cmp r0, #1024 @ then clip weight to +/-1024
movgt r0, #1024
cmp r0, r10
movlt r0, r10
.L369: cmp r7, r1 @ loop back if more samples to do
bhi term_minus_1_loop
str r3, [r5, #8] @ else store right sample and exit
b common_exit
/*
******************************************************************************
* Loop to handle term = -2 condition
* (note that the channels are processed in the reverse order here)
*
* r0 = dpp->weight_B r8 =
* r1 = bptr r9 =
* r2 = intermediate result r10 = -1024 (for clipping)
* r3 = previous left sample r11 = 512 (for rounding)
* r4 = dpp->weight_A ip = current sample
* r5 = dpp sp =
* r6 = dpp->delta lr = updated right sample
* r7 = eptr pc =
*******************************************************************************
*/
term_minus_2:
ldr r3, [r1, #-8]
term_minus_2_loop:
ldr ip, [r1, #4] @ for right channel the decorrelation value
mla r2, r3, r0, r11 @ is the previous left sample (in r3)
add lr, ip, r2, asr #10
str lr, [r1, #4]
cmp r3, #0
cmpne ip, #0
beq .L380
teq ip, r3 @ update weight based on signs
submi r0, r0, r6
addpl r0, r0, r6
cmp r0, #1024 @ then clip weight to +/-1024
movgt r0, #1024
cmp r0, r10
movlt r0, r10
.L380: ldr r2, [r1, #0] @ for left channel the decorrelation value
mla r3, lr, r4, r11 @ is the just updated left sample (in lr)
add r3, r2, r3, asr #10
str r3, [r1], #8
cmp lr, #0
cmpne r2, #0
beq .L388
teq r2, lr
submi r4, r4, r6
addpl r4, r4, r6
cmp r4, #1024
movgt r4, #1024
cmp r4, r10
movlt r4, r10
.L388: cmp r7, r1 @ loop back if more samples to do
bhi term_minus_2_loop
str r3, [r5, #40] @ else store left channel and exit
b common_exit
/*
******************************************************************************
* Loop to handle term = -3 condition
*
* r0 = dpp->weight_B r8 = previous left sample
* r1 = bptr r9 =
* r2 = current left sample r10 = -1024 (for clipping)
* r3 = previous right sample r11 = 512 (for rounding)
* r4 = dpp->weight_A ip = intermediate result
* r5 = dpp sp =
* r6 = dpp->delta lr =
* r7 = eptr pc =
*******************************************************************************
*/
term_minus_3:
ldr r3, [r1, #-4] @ load previous samples
ldr r8, [r1, #-8]
term_minus_3_loop:
ldr ip, [r1]
mla r2, r3, r4, r11
add r2, ip, r2, asr #10
str r2, [r1], #4
cmp r3, #0
cmpne ip, #0
beq .L399
teq ip, r3 @ update weight based on signs
submi r4, r4, r6
addpl r4, r4, r6
cmp r4, #1024 @ then clip weight to +/-1024
movgt r4, #1024
cmp r4, r10
movlt r4, r10
.L399: movs ip, r8 @ ip = previous left we use now
mov r8, r2 @ r8 = current left we use next time
ldr r2, [r1], #4
mla r3, ip, r0, r11
add r3, r2, r3, asr #10
strne r3, [r1, #-4]
cmpne r2, #0
beq .L407
teq ip, r2
submi r0, r0, r6
addpl r0, r0, r6
cmp r0, #1024
movgt r0, #1024
cmp r0, r10
movlt r0, r10
.L407: cmp r7, r1 @ loop back if more samples to do
bhi term_minus_3_loop
str r3, [r5, #8] @ else store previous samples & exit
str r8, [r5, #40]
/*
* Before finally exiting we must store weights back for next time
*/
common_exit:
strh r4, [r5, #4]
strh r0, [r5, #6]
ldmfd sp!, {r4 - r8, r10, r11, pc}