f40bfc9267
Change-Id: Id7f4717d51ed02d67cb9f9cb3c0ada4a81843f97 Reviewed-on: http://gerrit.rockbox.org/137 Reviewed-by: Nils Wallménius <nils@rockbox.org> Tested-by: Nils Wallménius <nils@rockbox.org>
477 lines
17 KiB
ArmAsm
477 lines
17 KiB
ArmAsm
/***************************************************************************
|
|
* __________ __ ___.
|
|
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
|
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
|
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
|
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
|
* \/ \/ \/ \/ \/
|
|
* $Id$
|
|
*
|
|
* Copyright (C) 2006 by David Bryant
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
|
* KIND, either express or implied.
|
|
*
|
|
****************************************************************************/
|
|
|
|
/* This is an assembly optimized version of the following WavPack function:
|
|
*
|
|
* void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp,
|
|
* long *buffer, long sample_count);
|
|
*
|
|
* It performs a single pass of stereo decorrelation on the provided buffer.
|
|
* Note that this version of the function requires that the 8 previous stereo
|
|
* samples are visible and correct. In other words, it ignores the "samples_*"
|
|
* fields in the decorr_pass structure and gets the history data directly
|
|
* from the buffer. It does, however, return the appropriate history samples
|
|
* to the decorr_pass structure before returning.
|
|
*
|
|
* This is written to work on a ARM7TDMI processor. This version only uses the
|
|
* 32-bit multiply-accumulate instruction and so will overflow with 24-bit
|
|
* WavPack files.
|
|
*/
|
|
|
|
#include "config.h"
|
|
|
|
.text
|
|
.align
|
|
.global decorr_stereo_pass_cont_arm
|
|
|
|
/*
|
|
* on entry:
|
|
*
|
|
* r0 = struct decorr_pass *dpp
|
|
* r1 = long *buffer
|
|
* r2 = long sample_count
|
|
*/
|
|
|
|
decorr_stereo_pass_cont_arm:
|
|
|
|
stmfd sp!, {r4 - r8, r10, r11, lr}
|
|
mov r5, r0 @ r5 = dpp
|
|
mov r11, #512 @ r11 = 512 for rounding
|
|
ldrsh r6, [r0, #2] @ r6 = dpp->delta
|
|
ldrsh r4, [r0, #4] @ r4 = dpp->weight_A
|
|
ldrsh r0, [r0, #6] @ r0 = dpp->weight_B
|
|
cmp r2, #0 @ exit if no samples to process
|
|
beq common_exit
|
|
|
|
add r7, r1, r2, asl #3 @ r7 = buffer ending position
|
|
ldrsh r2, [r5, #0] @ r2 = dpp->term
|
|
cmp r2, #0
|
|
bmi minus_term
|
|
|
|
ldr lr, [r1, #-16] @ load 2 sample history from buffer
|
|
ldr r10, [r1, #-12] @ for terms 2, 17, and 18
|
|
ldr r8, [r1, #-8]
|
|
ldr r3, [r1, #-4]
|
|
cmp r2, #17
|
|
beq term_17_loop
|
|
cmp r2, #18
|
|
beq term_18_loop
|
|
cmp r2, #2
|
|
beq term_2_loop
|
|
b term_default_loop @ else handle default (1-8, except 2)
|
|
|
|
minus_term:
|
|
mov r10, #1024 @ r10 = -1024 for weight clipping
|
|
rsb r10, r10, #0 @ (only used for negative terms)
|
|
cmn r2, #1
|
|
beq term_minus_1
|
|
cmn r2, #2
|
|
beq term_minus_2
|
|
cmn r2, #3
|
|
beq term_minus_3
|
|
b common_exit
|
|
|
|
/*
|
|
******************************************************************************
|
|
* Loop to handle term = 17 condition
|
|
*
|
|
* r0 = dpp->weight_B r8 = previous left sample
|
|
* r1 = bptr r9 =
|
|
* r2 = current sample r10 = second previous left sample
|
|
* r3 = previous right sample r11 = 512 (for rounding)
|
|
* r4 = dpp->weight_A ip = current decorrelation value
|
|
* r5 = dpp sp =
|
|
* r6 = dpp->delta lr = second previous right sample
|
|
* r7 = eptr pc =
|
|
*******************************************************************************
|
|
*/
|
|
|
|
term_17_loop:
|
|
rsbs ip, lr, r8, asl #1 @ decorr value = (2 * prev) - 2nd prev
|
|
mov lr, r8 @ previous becomes 2nd previous
|
|
ldr r2, [r1], #4 @ get sample & update pointer
|
|
mla r8, ip, r4, r11 @ mult decorr value by weight, round,
|
|
add r8, r2, r8, asr #10 @ shift, and add to new sample
|
|
strne r8, [r1, #-4] @ if change possible, store sample back
|
|
cmpne r2, #0
|
|
beq .L325
|
|
teq ip, r2 @ update weight based on signs
|
|
submi r4, r4, r6
|
|
addpl r4, r4, r6
|
|
|
|
.L325: rsbs ip, r10, r3, asl #1 @ do same thing for right channel
|
|
mov r10, r3
|
|
ldr r2, [r1], #4
|
|
mla r3, ip, r0, r11
|
|
add r3, r2, r3, asr #10
|
|
strne r3, [r1, #-4]
|
|
cmpne r2, #0
|
|
beq .L329
|
|
teq ip, r2
|
|
submi r0, r0, r6
|
|
addpl r0, r0, r6
|
|
|
|
.L329: cmp r7, r1 @ loop back if more samples to do
|
|
bhi term_17_loop
|
|
b store_1718 @ common exit for terms 17 & 18
|
|
|
|
/*
|
|
******************************************************************************
|
|
* Loop to handle term = 18 condition
|
|
*
|
|
* r0 = dpp->weight_B r8 = previous left sample
|
|
* r1 = bptr r9 =
|
|
* r2 = current sample r10 = second previous left sample
|
|
* r3 = previous right sample r11 = 512 (for rounding)
|
|
* r4 = dpp->weight_A ip = decorrelation value
|
|
* r5 = dpp sp =
|
|
* r6 = dpp->delta lr = second previous right sample
|
|
* r7 = eptr pc =
|
|
*******************************************************************************
|
|
*/
|
|
|
|
term_18_loop:
|
|
sub ip, r8, lr @ decorr value =
|
|
mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1
|
|
adds ip, r8, ip, asr #1
|
|
ldr r2, [r1], #4 @ get sample & update pointer
|
|
mla r8, ip, r4, r11 @ mult decorr value by weight, round,
|
|
add r8, r2, r8, asr #10 @ shift, and add to new sample
|
|
strne r8, [r1, #-4] @ if change possible, store sample back
|
|
cmpne r2, #0
|
|
beq .L337
|
|
teq ip, r2 @ update weight based on signs
|
|
submi r4, r4, r6
|
|
addpl r4, r4, r6
|
|
|
|
.L337: sub ip, r3, r10 @ do same thing for right channel
|
|
mov r10, r3
|
|
adds ip, r3, ip, asr #1
|
|
ldr r2, [r1], #4
|
|
mla r3, ip, r0, r11
|
|
add r3, r2, r3, asr #10
|
|
strne r3, [r1, #-4]
|
|
cmpne r2, #0
|
|
beq .L341
|
|
teq ip, r2
|
|
submi r0, r0, r6
|
|
addpl r0, r0, r6
|
|
|
|
.L341: cmp r7, r1 @ loop back if more samples to do
|
|
bhi term_18_loop
|
|
|
|
/* common exit for terms 17 & 18 */
|
|
|
|
store_1718:
|
|
str r3, [r5, #40] @ store sample history into struct
|
|
str r8, [r5, #8]
|
|
str r10, [r5, #44]
|
|
str lr, [r5, #12]
|
|
b common_exit @ and return
|
|
|
|
/*
|
|
******************************************************************************
|
|
* Loop to handle term = 2 condition
|
|
* (note that this case can be handled by the default term handler (1-8), but
|
|
* this special case is faster because it doesn't have to read memory twice)
|
|
*
|
|
* r0 = dpp->weight_B r8 = previous left sample
|
|
* r1 = bptr r9 =
|
|
* r2 = current sample r10 = second previous left sample
|
|
* r3 = previous right sample r11 = 512 (for rounding)
|
|
* r4 = dpp->weight_A ip = decorrelation value
|
|
* r5 = dpp sp =
|
|
* r6 = dpp->delta lr = second previous right sample
|
|
* r7 = eptr pc =
|
|
*******************************************************************************
|
|
*/
|
|
|
|
term_2_loop:
|
|
movs ip, lr @ get decorrelation value & test
|
|
mov lr, r8 @ previous becomes 2nd previous
|
|
ldr r2, [r1], #4 @ get sample & update pointer
|
|
mla r8, ip, r4, r11 @ mult decorr value by weight, round,
|
|
add r8, r2, r8, asr #10 @ shift, and add to new sample
|
|
strne r8, [r1, #-4] @ if change possible, store sample back
|
|
cmpne r2, #0
|
|
beq .L225
|
|
teq ip, r2 @ update weight based on signs
|
|
submi r4, r4, r6
|
|
addpl r4, r4, r6
|
|
|
|
.L225: movs ip, r10 @ do same thing for right channel
|
|
mov r10, r3
|
|
ldr r2, [r1], #4
|
|
mla r3, ip, r0, r11
|
|
add r3, r2, r3, asr #10
|
|
strne r3, [r1, #-4]
|
|
cmpne r2, #0
|
|
beq .L229
|
|
teq ip, r2
|
|
submi r0, r0, r6
|
|
addpl r0, r0, r6
|
|
|
|
.L229: cmp r7, r1 @ loop back if more samples to do
|
|
bhi term_2_loop
|
|
b default_term_exit @ this exit updates all dpp->samples
|
|
|
|
/*
|
|
******************************************************************************
|
|
* Loop to handle default term condition
|
|
*
|
|
* r0 = dpp->weight_B r8 = result accumulator
|
|
* r1 = bptr r9 =
|
|
* r2 = dpp->term r10 =
|
|
* r3 = decorrelation value r11 = 512 (for rounding)
|
|
* r4 = dpp->weight_A ip = current sample
|
|
* r5 = dpp sp =
|
|
* r6 = dpp->delta lr =
|
|
* r7 = eptr pc =
|
|
*******************************************************************************
|
|
*/
|
|
|
|
term_default_loop:
|
|
ldr ip, [r1] @ get original sample
|
|
ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term
|
|
mla r8, r3, r4, r11 @ mult decorr value by weight, round,
|
|
add r8, ip, r8, asr #10 @ shift and add to new sample
|
|
str r8, [r1], #4 @ store update sample
|
|
cmp r3, #0
|
|
cmpne ip, #0
|
|
beq .L350
|
|
teq ip, r3 @ update weight based on signs
|
|
submi r4, r4, r6
|
|
addpl r4, r4, r6
|
|
|
|
.L350: ldr ip, [r1] @ do the same thing for right channel
|
|
ldr r3, [r1, -r2, asl #3]
|
|
mla r8, r3, r0, r11
|
|
add r8, ip, r8, asr #10
|
|
str r8, [r1], #4
|
|
cmp r3, #0
|
|
cmpne ip, #0
|
|
beq .L354
|
|
teq ip, r3
|
|
submi r0, r0, r6
|
|
addpl r0, r0, r6
|
|
|
|
.L354: cmp r7, r1 @ loop back if more samples to do
|
|
bhi term_default_loop
|
|
|
|
/*
|
|
* This exit is used by terms 1-8 to store the previous 8 samples into the decorr
|
|
* structure (even if they are not all used for the given term)
|
|
*/
|
|
|
|
default_term_exit:
|
|
ldrsh r3, [r5, #0]
|
|
sub ip, r3, #1
|
|
mov lr, #7
|
|
|
|
.L358: and r3, ip, #7
|
|
add r3, r5, r3, asl #2
|
|
ldr r2, [r1, #-4]
|
|
str r2, [r3, #40]
|
|
ldr r2, [r1, #-8]!
|
|
str r2, [r3, #8]
|
|
sub ip, ip, #1
|
|
sub lr, lr, #1
|
|
cmn lr, #1
|
|
bne .L358
|
|
b common_exit
|
|
|
|
/*
|
|
******************************************************************************
|
|
* Loop to handle term = -1 condition
|
|
*
|
|
* r0 = dpp->weight_B r8 =
|
|
* r1 = bptr r9 =
|
|
* r2 = intermediate result r10 = -1024 (for clipping)
|
|
* r3 = previous right sample r11 = 512 (for rounding)
|
|
* r4 = dpp->weight_A ip = current sample
|
|
* r5 = dpp sp =
|
|
* r6 = dpp->delta lr = updated left sample
|
|
* r7 = eptr pc =
|
|
*******************************************************************************
|
|
*/
|
|
|
|
term_minus_1:
|
|
ldr r3, [r1, #-4]
|
|
|
|
term_minus_1_loop:
|
|
ldr ip, [r1] @ for left channel the decorrelation value
|
|
mla r2, r3, r4, r11 @ is the previous right sample (in r3)
|
|
add lr, ip, r2, asr #10
|
|
str lr, [r1], #8
|
|
cmp r3, #0
|
|
cmpne ip, #0
|
|
beq .L361
|
|
teq ip, r3 @ update weight based on signs
|
|
submi r4, r4, r6
|
|
addpl r4, r4, r6
|
|
cmp r4, #1024
|
|
movgt r4, #1024
|
|
cmp r4, r10
|
|
movlt r4, r10
|
|
|
|
.L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value
|
|
mla r3, lr, r0, r11 @ is the just updated right sample (in lr)
|
|
add r3, r2, r3, asr #10
|
|
str r3, [r1, #-4]
|
|
cmp lr, #0
|
|
cmpne r2, #0
|
|
beq .L369
|
|
teq r2, lr
|
|
submi r0, r0, r6
|
|
addpl r0, r0, r6
|
|
cmp r0, #1024 @ then clip weight to +/-1024
|
|
movgt r0, #1024
|
|
cmp r0, r10
|
|
movlt r0, r10
|
|
|
|
.L369: cmp r7, r1 @ loop back if more samples to do
|
|
bhi term_minus_1_loop
|
|
|
|
str r3, [r5, #8] @ else store right sample and exit
|
|
b common_exit
|
|
|
|
/*
|
|
******************************************************************************
|
|
* Loop to handle term = -2 condition
|
|
* (note that the channels are processed in the reverse order here)
|
|
*
|
|
* r0 = dpp->weight_B r8 =
|
|
* r1 = bptr r9 =
|
|
* r2 = intermediate result r10 = -1024 (for clipping)
|
|
* r3 = previous left sample r11 = 512 (for rounding)
|
|
* r4 = dpp->weight_A ip = current sample
|
|
* r5 = dpp sp =
|
|
* r6 = dpp->delta lr = updated right sample
|
|
* r7 = eptr pc =
|
|
*******************************************************************************
|
|
*/
|
|
|
|
term_minus_2:
|
|
ldr r3, [r1, #-8]
|
|
|
|
term_minus_2_loop:
|
|
ldr ip, [r1, #4] @ for right channel the decorrelation value
|
|
mla r2, r3, r0, r11 @ is the previous left sample (in r3)
|
|
add lr, ip, r2, asr #10
|
|
str lr, [r1, #4]
|
|
cmp r3, #0
|
|
cmpne ip, #0
|
|
beq .L380
|
|
teq ip, r3 @ update weight based on signs
|
|
submi r0, r0, r6
|
|
addpl r0, r0, r6
|
|
cmp r0, #1024 @ then clip weight to +/-1024
|
|
movgt r0, #1024
|
|
cmp r0, r10
|
|
movlt r0, r10
|
|
|
|
.L380: ldr r2, [r1, #0] @ for left channel the decorrelation value
|
|
mla r3, lr, r4, r11 @ is the just updated left sample (in lr)
|
|
add r3, r2, r3, asr #10
|
|
str r3, [r1], #8
|
|
cmp lr, #0
|
|
cmpne r2, #0
|
|
beq .L388
|
|
teq r2, lr
|
|
submi r4, r4, r6
|
|
addpl r4, r4, r6
|
|
cmp r4, #1024
|
|
movgt r4, #1024
|
|
cmp r4, r10
|
|
movlt r4, r10
|
|
|
|
.L388: cmp r7, r1 @ loop back if more samples to do
|
|
bhi term_minus_2_loop
|
|
|
|
str r3, [r5, #40] @ else store left channel and exit
|
|
b common_exit
|
|
|
|
/*
|
|
******************************************************************************
|
|
* Loop to handle term = -3 condition
|
|
*
|
|
* r0 = dpp->weight_B r8 = previous left sample
|
|
* r1 = bptr r9 =
|
|
* r2 = current left sample r10 = -1024 (for clipping)
|
|
* r3 = previous right sample r11 = 512 (for rounding)
|
|
* r4 = dpp->weight_A ip = intermediate result
|
|
* r5 = dpp sp =
|
|
* r6 = dpp->delta lr =
|
|
* r7 = eptr pc =
|
|
*******************************************************************************
|
|
*/
|
|
|
|
term_minus_3:
|
|
ldr r3, [r1, #-4] @ load previous samples
|
|
ldr r8, [r1, #-8]
|
|
|
|
term_minus_3_loop:
|
|
ldr ip, [r1]
|
|
mla r2, r3, r4, r11
|
|
add r2, ip, r2, asr #10
|
|
str r2, [r1], #4
|
|
cmp r3, #0
|
|
cmpne ip, #0
|
|
beq .L399
|
|
teq ip, r3 @ update weight based on signs
|
|
submi r4, r4, r6
|
|
addpl r4, r4, r6
|
|
cmp r4, #1024 @ then clip weight to +/-1024
|
|
movgt r4, #1024
|
|
cmp r4, r10
|
|
movlt r4, r10
|
|
|
|
.L399: movs ip, r8 @ ip = previous left we use now
|
|
mov r8, r2 @ r8 = current left we use next time
|
|
ldr r2, [r1], #4
|
|
mla r3, ip, r0, r11
|
|
add r3, r2, r3, asr #10
|
|
strne r3, [r1, #-4]
|
|
cmpne r2, #0
|
|
beq .L407
|
|
teq ip, r2
|
|
submi r0, r0, r6
|
|
addpl r0, r0, r6
|
|
cmp r0, #1024
|
|
movgt r0, #1024
|
|
cmp r0, r10
|
|
movlt r0, r10
|
|
|
|
.L407: cmp r7, r1 @ loop back if more samples to do
|
|
bhi term_minus_3_loop
|
|
|
|
str r3, [r5, #8] @ else store previous samples & exit
|
|
str r8, [r5, #40]
|
|
|
|
/*
|
|
* Before finally exiting we must store weights back for next time
|
|
*/
|
|
|
|
common_exit:
|
|
strh r4, [r5, #4]
|
|
strh r0, [r5, #6]
|
|
ldmpc regs="r4-r8, r10-r11"
|
|
|