Keep assembler versions of DSP routines in dsp_arch.S files. This also
solves the annoying register allocator problem when not using -fomit-frame-pointer for the Coldfire assembler optimised crossfeed routine. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@9215 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
5f9bd4fb98
commit
cc94ae45a7
4 changed files with 145 additions and 83 deletions
|
@ -75,6 +75,7 @@ codecs.c
|
|||
dsp.c
|
||||
eq.c
|
||||
#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
|
||||
dsp_cf.S
|
||||
eq_cf.S
|
||||
#elif defined(CPU_ARM) && !defined(SIMULATOR)
|
||||
eq_arm.S
|
||||
|
|
92
apps/dsp.c
92
apps/dsp.c
|
@ -28,6 +28,10 @@
|
|||
#include "replaygain.h"
|
||||
#include "debug.h"
|
||||
|
||||
#ifndef SIMULATOR
|
||||
#include <dsp_asm.h>
|
||||
#endif
|
||||
|
||||
/* The "dither" code to convert the 24-bit samples produced by libmad was
|
||||
* taken from the coolplayer project - coolplayer.sourceforge.net
|
||||
*/
|
||||
|
@ -517,90 +521,12 @@ static long dither_sample(int32_t sample, int32_t bias, int32_t mask,
|
|||
return output;
|
||||
}
|
||||
|
||||
/* Apply a constant gain to the samples (e.g., for ReplayGain). May update
|
||||
* the src array if gain was applied.
|
||||
* Note that this must be called before the resampler.
|
||||
/* Applies crossfeed to the stereo signal in src.
|
||||
* Crossfeed is a process where listening over speakers is simulated. This
|
||||
* is good for old hard panned stereo records, which might be quite fatiguing
|
||||
* to listen to on headphones with no crossfeed.
|
||||
*/
|
||||
#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
|
||||
static const long crossfeed_coefs[6] ICONST_ATTR = {
|
||||
LOW, LOW_COMP, HIGH_NEG, HIGH_COMP, ATT, ATT_COMP
|
||||
};
|
||||
|
||||
static void apply_crossfeed(int32_t* src[], int count)
|
||||
{
|
||||
asm volatile (
|
||||
"lea.l crossfeed_data, %%a1 \n"
|
||||
"lea.l (16, %%a1), %%a0 \n"
|
||||
"movem.l (%%a1), %%d0-%%d3 \n"
|
||||
"move.l (120, %%a1), %%d4 \n"
|
||||
/* fetch left, right, LOW and LOW_COMP for first iteration */
|
||||
"move.l (%[src0]), %%d5 \n"
|
||||
"move.l (%[src1]), %%d6 \n"
|
||||
"move.l (%[coef])+, %%a1 \n"
|
||||
"move.l (%[coef])+, %%a2 \n"
|
||||
/* Register usage in loop:
|
||||
* a0 = &delay[0][0], a1 & a2 = coefs
|
||||
* d0 = low_left, d1 = low_right,
|
||||
* d2 = high_left, d3 = high_right,
|
||||
* d4 = delay line index,
|
||||
* d5 = src[0][i], d6 = src[1][i].
|
||||
* The rest are described in asm constraint list.
|
||||
*/
|
||||
".cfloop:"
|
||||
/* LOW*low_left + LOW_COMP*left */
|
||||
"mac.l %%a1, %%d0, %%acc0 \n"
|
||||
"mac.l %%a2, %%d5, %%acc0 \n"
|
||||
/* LOW*low_right + LOW_COMP*right */
|
||||
"mac.l %%a1, %%d1, (%[coef])+, %%a1, %%acc1 \n" /* a1 = HIGH_NEG */
|
||||
"mac.l %%a2, %%d6, (%[coef])+, %%a2, %%acc1 \n" /* a2 = HIGH_COMP */
|
||||
"movclr.l %%acc0, %%d0 \n" /* get low_left */
|
||||
"movclr.l %%acc1, %%d1 \n" /* get low_right */
|
||||
/* HIGH_NEG*high_left + HIGH_COMP*left */
|
||||
"mac.l %%a1, %%d2, %%acc0 \n"
|
||||
"mac.l %%a2, %%d5, %%acc0 \n"
|
||||
/* HIGH_NEG*high_right + HIGH_COMP*right */
|
||||
"mac.l %%a1, %%d3, (%[coef])+, %%a1, %%acc1 \n" /* a1 = ATT */
|
||||
"mac.l %%a2, %%d6, (%[coef])+, %%a2, %%acc1 \n" /* a2 = ATT_COMP */
|
||||
"lea.l (-6*4, %[coef]), %[coef] \n" /* coef = &coefs[0] */
|
||||
"move.l (%%a0, %%d4*4), %%a3 \n" /* a3=delay[0][idx] */
|
||||
"move.l (52, %%a0, %%d4*4), %%d5 \n" /* d5=delay[1][idx] */
|
||||
"movclr.l %%acc0, %%d2 \n" /* get high_left */
|
||||
"movclr.l %%acc1, %%d3 \n" /* get high_right */
|
||||
/* ATT*delay_r + ATT_COMP*high_left */
|
||||
"mac.l %%a1, %%d5, (4, %[src0]), %%d5, %%acc0\n" /* d5 = src[0][i+1] */
|
||||
"mac.l %%a2, %%d2, (4, %[src1]), %%d6, %%acc0\n" /* d6 = src[1][i+1] */
|
||||
/* ATT*delay_l + ATT_COMP*high_right */
|
||||
"mac.l %%a1, %%a3, (%[coef])+, %%a1, %%acc1 \n" /* a1 = LOW */
|
||||
"mac.l %%a2, %%d3, (%[coef])+, %%a2, %%acc1 \n" /* a2 = LOW_COMP */
|
||||
|
||||
/* save crossfed samples to output */
|
||||
"movclr.l %%acc0, %%a3 \n"
|
||||
"move.l %%a3, (%[src0])+ \n" /* src[0][i++] = out_l */
|
||||
"movclr.l %%acc1, %%a3 \n"
|
||||
"move.l %%a3, (%[src1])+ \n" /* src[1][i++] = out_r */
|
||||
"move.l %%d0, (%%a0, %%d4*4) \n" /* delay[0][index] = low_left */
|
||||
"move.l %%d1, (52, %%a0, %%d4*4)\n" /* delay[1][index] = low_right */
|
||||
"addq.l #1, %%d4 \n" /* index++ */
|
||||
"cmp.l #13, %%d4 \n" /* if (index >= 13) { */
|
||||
"jlt .nowrap \n"
|
||||
"clr.l %%d4 \n" /* index = 0 */
|
||||
".nowrap: \n" /* } */
|
||||
"subq.l #1, %[count] \n"
|
||||
"jne .cfloop \n"
|
||||
/* save data back to struct */
|
||||
"lea.l crossfeed_data, %%a1 \n"
|
||||
"movem.l %%d0-%%d3, (%%a1) \n"
|
||||
"move.l %%d4, (120, %%a1) \n"
|
||||
/* NOTE: We _just_ have enough registers for our use here, clobber just
|
||||
one more and GCC will fail. */
|
||||
:
|
||||
: [count] "d" (count),
|
||||
[src0] "a" (src[0]), [src1] "a" (src[1]), [coef] "a" (crossfeed_coefs)
|
||||
: "d0", "d1", "d2", "d3", "d4", "d5", "d6",
|
||||
"a0", "a1", "a2", "a3"
|
||||
);
|
||||
}
|
||||
#else
|
||||
#ifndef DSP_HAVE_ASM_CROSSFEED
|
||||
static void apply_crossfeed(int32_t* src[], int count)
|
||||
{
|
||||
int32_t a; /* accumulator */
|
||||
|
|
31
apps/dsp_asm.h
Normal file
31
apps/dsp_asm.h
Normal file
|
@ -0,0 +1,31 @@
|
|||
/***************************************************************************
|
||||
* __________ __ ___.
|
||||
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||
* \/ \/ \/ \/ \/
|
||||
* $Id$
|
||||
*
|
||||
* Copyright (C) 2006 Thom Johansen
|
||||
*
|
||||
* All files in this archive are subject to the GNU General Public License.
|
||||
* See the file COPYING in the source tree root for full license agreement.
|
||||
*
|
||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||
* KIND, either express or implied.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#ifndef _DSP_ASM_H
|
||||
#define _DSP_ASM_H
|
||||
|
||||
#ifdef CPU_COLDFIRE
|
||||
#define DSP_HAVE_ASM_CROSSFEED
|
||||
void apply_crossfeed(int32_t* src[], int count);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
104
apps/dsp_cf.S
Normal file
104
apps/dsp_cf.S
Normal file
|
@ -0,0 +1,104 @@
|
|||
/***************************************************************************
|
||||
* __________ __ ___.
|
||||
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||
* \/ \/ \/ \/ \/
|
||||
* $Id$
|
||||
*
|
||||
* Copyright (C) 2006 Thom Johansen
|
||||
*
|
||||
* All files in this archive are subject to the GNU General Public License.
|
||||
* See the file COPYING in the source tree root for full license agreement.
|
||||
*
|
||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||
* KIND, either express or implied.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
.section .idata,"aw",@progbits
|
||||
crossfeed_coefs:
|
||||
.long 0x4CCCCCCD | LOW
|
||||
.long 0x33333333 | LOW_COMP
|
||||
.long -0x66666666 | HIGH_NEG
|
||||
.long 0x66666666 | HIGH_COMP
|
||||
.long 0x0CCCCCCD | ATT
|
||||
.long 0x73333333 | ATT_COMP
|
||||
|
||||
.section .text
|
||||
.global apply_crossfeed
|
||||
apply_crossfeed:
|
||||
lea.l (-44, %sp), %sp
|
||||
movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs
|
||||
move.l (44+4, %sp), %a4
|
||||
movem.l (%a4), %a4-%a5 | a4 = src[0], a5 = src[1]
|
||||
move.l (44+8, %sp), %d7 | d7 = count
|
||||
|
||||
lea.l crossfeed_data, %a1
|
||||
lea.l crossfeed_coefs, %a6
|
||||
lea.l (16, %a1), %a0 | a0 = &delay[0][0]
|
||||
movem.l (%a1), %d0-%d3 | fetch filter history samples
|
||||
move.l (120, %a1), %d4 | fetch delay line index
|
||||
move.l (%a4), %d5 | d5 = left sample
|
||||
move.l (%a5), %d6 | d6 = right sample
|
||||
move.l (%a6)+, %a1 | a1 = LOW value
|
||||
move.l (%a6)+, %a2 | a2 = LOW_COMP value
|
||||
/* Register usage in loop:
|
||||
* a0 = &delay[0][0], a1 & a2 = coefs, a3 = temp storage,
|
||||
* a4 = src[0], a5 = src[1], a6 = &crossfeed_coefs[0],
|
||||
* d0 = low_left, d1 = low_right,
|
||||
* d2 = high_left, d3 = high_right,
|
||||
* d4 = delay line index,
|
||||
* d5 = src[0][i], d6 = src[1][i].
|
||||
* d7 = count
|
||||
*/
|
||||
.cfloop:
|
||||
| LOW*low_left + LOW_COMP*left
|
||||
mac.l %a1, %d0, %acc0
|
||||
mac.l %a2, %d5, %acc0
|
||||
| LOW*low_right + LOW_COMP*right
|
||||
mac.l %a1, %d1, (%a6)+, %a1, %acc1 | a1 = HIGH_NEG
|
||||
mac.l %a2, %d6, (%a6)+, %a2, %acc1 | a2 = HIGH_COMP
|
||||
movclr.l %acc0, %d0 | get low_left
|
||||
movclr.l %acc1, %d1 | get low_right
|
||||
| HIGH_NEG*high_left + HIGH_COMP*left
|
||||
mac.l %a1, %d2, %acc0
|
||||
mac.l %a2, %d5, %acc0
|
||||
| HIGH_NEG*high_right + HIGH_COMP*right
|
||||
mac.l %a1, %d3, (%a6)+, %a1, %acc1 | a1 = ATT
|
||||
mac.l %a2, %d6, (%a6)+, %a2, %acc1 | a2 = ATT_COMP
|
||||
lea.l (-6*4, %a6), %a6 | coef = &coefs[0]
|
||||
move.l (%a0, %d4*4), %a3 | a3 = delay[0][idx]
|
||||
move.l (52, %a0, %d4*4), %d5 | d5 = delay[1][idx]
|
||||
movclr.l %acc0, %d2 | get high_left
|
||||
movclr.l %acc1, %d3 | get high_right
|
||||
| ATT*delay_r + ATT_COMP*high_left
|
||||
mac.l %a1, %d5, (4, %a4), %d5, %acc0 | d5 = src[0][i+1]
|
||||
mac.l %a2, %d2, (4, %a5), %d6, %acc0 | d6 = src[1][i+1]
|
||||
| ATT*delay_l + ATT_COMP*high_right
|
||||
mac.l %a1, %a3, (%a6)+, %a1, %acc1 | a1 = LOW
|
||||
mac.l %a2, %d3, (%a6)+, %a2, %acc1 | a2 = LOW_COMP
|
||||
|
||||
| save crossfed samples to output
|
||||
movclr.l %acc0, %a3
|
||||
move.l %a3, (%a4)+ | src[0][i++] = out_l
|
||||
movclr.l %acc1, %a3
|
||||
move.l %a3, (%a5)+ | src[1][i++] = out_r
|
||||
move.l %d0, (%a0, %d4*4) | delay[0][index] = low_left
|
||||
move.l %d1, (52, %a0, %d4*4) | delay[1][index] = low_right */
|
||||
addq.l #1, %d4 | index++ */
|
||||
cmp.l #13, %d4 | if (index >= 13) {
|
||||
jlt .nowrap
|
||||
clr.l %d4 | index = 0
|
||||
.nowrap: | }
|
||||
subq.l #1, %d7
|
||||
jne .cfloop
|
||||
| save data back to struct
|
||||
lea.l crossfeed_data, %a1
|
||||
movem.l %d0-%d3, (%a1)
|
||||
move.l %d4, (120, %a1)
|
||||
movem.l (%sp), %d2-%d7/%a2-%a6
|
||||
lea.l (44, %sp), %sp
|
||||
rts
|
||||
|
Loading…
Reference in a new issue