Vorbis opts: keep floor1 lookup table in IRAM.
Slightly faster 16-bit clipping function. Misc: changed tabs for spaces to conform with Rockbox coding standards. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6608 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
ead61c1d18
commit
0a3f8e0924
11 changed files with 863 additions and 835 deletions
|
@ -24,13 +24,12 @@
|
|||
#ifndef _V_WIDE_MATH
|
||||
#define _V_WIDE_MATH
|
||||
|
||||
//#define MB() asm volatile ("" : : : "memory")
|
||||
#define MB()
|
||||
|
||||
static inline void mcf5249_init_mac(void) {
|
||||
int r;
|
||||
asm volatile ("move.l #0x20, %%macsr;" // frac, truncate, no saturation
|
||||
"movclr.l %%acc0, %[r];" // clear accumulators
|
||||
asm volatile ("move.l #0x20, %%macsr;" /* frac, truncate, no saturation */
|
||||
"movclr.l %%acc0, %[r];" /* clear accumulators */
|
||||
"move.l %%acc0, %%acc1;"
|
||||
"move.l %%acc0, %%acc2;"
|
||||
"move.l %%acc0, %%acc3;"
|
||||
|
@ -38,19 +37,18 @@ static inline void mcf5249_init_mac(void) {
|
|||
}
|
||||
|
||||
static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
|
||||
ogg_int32_t r;
|
||||
asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply into acc
|
||||
"movclr.l %%acc0, %[r];" // move & clear acc
|
||||
"asr.l #1, %[r];" // no overflow test
|
||||
: [r] "=d" (r)
|
||||
: [x] "r" (x), [y] "r" (y)
|
||||
asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply & shift */
|
||||
"movclr.l %%acc0, %[x];" /* move & clear acc */
|
||||
"asr.l #1, %[x];" /* no overflow test */
|
||||
: [x] "+&d" (x)
|
||||
: [y] "r" (y)
|
||||
: "cc");
|
||||
return r;
|
||||
return x;
|
||||
}
|
||||
|
||||
static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
|
||||
asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply
|
||||
"movclr.l %%acc0, %[x];" // move and clear
|
||||
asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
|
||||
"movclr.l %%acc0, %[x];" /* move and clear */
|
||||
: [x] "+&r" (x)
|
||||
: [y] "r" (y)
|
||||
: "cc");
|
||||
|
@ -60,14 +58,14 @@ static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
|
|||
|
||||
static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
|
||||
ogg_int32_t r;
|
||||
asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply
|
||||
"movclr.l %%acc0, %[r];" // get higher half
|
||||
"mulu.l %[y], %[x];" // get lower half
|
||||
"asl.l #8, %[r];" // hi << 17
|
||||
asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
|
||||
"movclr.l %%acc0, %[r];" /* get higher half */
|
||||
"mulu.l %[y], %[x];" /* get lower half */
|
||||
"asl.l #8, %[r];" /* hi<<16, plus one free */
|
||||
"asl.l #8, %[r];"
|
||||
"lsr.l #8, %[x];" // (unsigned)lo >> 15
|
||||
"lsr.l #8, %[x];" /* (unsigned)lo >> 15 */
|
||||
"lsr.l #7, %[x];"
|
||||
"or.l %[x], %[r];" // or
|
||||
"or.l %[x], %[r];" /* logical-or results */
|
||||
: [r] "=&d" (r), [x] "+d" (x)
|
||||
: [y] "d" (y)
|
||||
: "cc");
|
||||
|
@ -116,10 +114,28 @@ void XNPROD31(ogg_int32_t a, ogg_int32_t b,
|
|||
|
||||
|
||||
|
||||
/* is there no better way of doing this using the MAC? */
|
||||
|
||||
#if 1 /* Canonical definition */
|
||||
#define XPROD32(_a, _b, _t, _v, _x, _y) \
|
||||
{ (_x)=MULT32(_a,_t)+MULT32(_b,_v); \
|
||||
(_y)=MULT32(_b,_t)-MULT32(_a,_v); }
|
||||
#else
|
||||
/* Thom Johansen suggestion; this could loose the lsb by overflow
|
||||
but does it matter in practice? */
|
||||
#define XPROD32(_a, _b, _t, _v, _x, _y) \
|
||||
asm volatile ("mac.l %[a], %[t], %%acc0;" \
|
||||
"mac.l %[b], %[v], %%acc0;" \
|
||||
"mac.l %[b], %[t], %%acc1;" \
|
||||
"msac.l %[a], %[v], %%acc1;" \
|
||||
"movclr.l %%acc0, %[x];" \
|
||||
"asr.l #1, %[x];" \
|
||||
"movclr.l %%acc1, %[y];" \
|
||||
"asr.l #1, %[y];" \
|
||||
: [x] "=&d" (_x), [y] "=&d" (_y) \
|
||||
: [a] "r" (_a), [b] "r" (_b), \
|
||||
[t] "r" (_t), [v] "r" (_v) \
|
||||
: "cc");
|
||||
#endif
|
||||
|
||||
|
||||
/* asm versions of vector multiplication for window.c */
|
||||
|
@ -127,41 +143,41 @@ void XNPROD31(ogg_int32_t a, ogg_int32_t b,
|
|||
static inline
|
||||
void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
|
||||
{
|
||||
asm volatile ("movem.l (%[d]), %%d0-%%d3;" // loop start
|
||||
"movem.l (%[w]), %%a0-%%a3;" // pre-fetch registers
|
||||
asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */
|
||||
"movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */
|
||||
"lea.l (4*4, %[w]), %[w];"
|
||||
"bra 1f;" // jump to loop condition
|
||||
"0:" // loop body
|
||||
// multiply and load next window values
|
||||
"bra 1f;" /* jump to loop condition */
|
||||
"0:" /* loop body */
|
||||
/* multiply and load next window values */
|
||||
"mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
|
||||
"mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
|
||||
"mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
|
||||
"mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
|
||||
"movclr.l %%acc0, %%d0;" // get the products
|
||||
"movclr.l %%acc0, %%d0;" /* get the products */
|
||||
"movclr.l %%acc1, %%d1;"
|
||||
"movclr.l %%acc2, %%d2;"
|
||||
"movclr.l %%acc3, %%d3;"
|
||||
// store and advance
|
||||
/* store and advance */
|
||||
"movem.l %%d0-%%d3, (%[d]);"
|
||||
"lea.l (4*4, %[d]), %[d];"
|
||||
"movem.l (%[d]), %%d0-%%d3;"
|
||||
"subq.l #4, %[n];" // done 4 elements
|
||||
"subq.l #4, %[n];" /* done 4 elements */
|
||||
"1: cmpi.l #4, %[n];"
|
||||
"bge 0b;"
|
||||
// multiply final elements
|
||||
/* multiply final elements */
|
||||
"tst.l %[n];"
|
||||
"beq 1f;" // n=0
|
||||
"beq 1f;" /* n=0 */
|
||||
"mac.l %%d0, %%a0, %%acc0;"
|
||||
"movclr.l %%acc0, %%d0;"
|
||||
"move.l %%d0, (%[d])+;"
|
||||
"subq.l #1, %[n];"
|
||||
"beq 1f;" // n=1
|
||||
"beq 1f;" /* n=1 */
|
||||
"mac.l %%d1, %%a1, %%acc0;"
|
||||
"movclr.l %%acc0, %%d1;"
|
||||
"move.l %%d1, (%[d])+;"
|
||||
"subq.l #1, %[n];"
|
||||
"beq 1f;" // n=2
|
||||
// otherwise n = 3
|
||||
"beq 1f;" /* n=2 */
|
||||
/* otherwise n = 3 */
|
||||
"mac.l %%d2, %%a2, %%acc0;"
|
||||
"movclr.l %%acc0, %%d2;"
|
||||
"move.l %%d2, (%[d])+;"
|
||||
|
@ -174,41 +190,41 @@ void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
|
|||
static inline
|
||||
void mcf5249_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
|
||||
{
|
||||
asm volatile ("lea.l (-3*4, %[w]), %[w];" // loop start
|
||||
"movem.l (%[d]), %%d0-%%d3;" // pre-fetch registers
|
||||
asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */
|
||||
"movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */
|
||||
"movem.l (%[w]), %%a0-%%a3;"
|
||||
"bra 1f;" // jump to loop condition
|
||||
"0:" // loop body
|
||||
// multiply and load next window value
|
||||
"bra 1f;" /* jump to loop condition */
|
||||
"0:" /* loop body */
|
||||
/* multiply and load next window value */
|
||||
"mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
|
||||
"mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
|
||||
"mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
|
||||
"mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
|
||||
"movclr.l %%acc0, %%d0;" // get the products
|
||||
"movclr.l %%acc0, %%d0;" /* get the products */
|
||||
"movclr.l %%acc1, %%d1;"
|
||||
"movclr.l %%acc2, %%d2;"
|
||||
"movclr.l %%acc3, %%d3;"
|
||||
// store and advance
|
||||
/* store and advance */
|
||||
"movem.l %%d0-%%d3, (%[d]);"
|
||||
"lea.l (4*4, %[d]), %[d];"
|
||||
"movem.l (%[d]), %%d0-%%d3;"
|
||||
"subq.l #4, %[n];" // done 4 elements
|
||||
"subq.l #4, %[n];" /* done 4 elements */
|
||||
"1: cmpi.l #4, %[n];"
|
||||
"bge 0b;"
|
||||
// multiply final elements
|
||||
/* multiply final elements */
|
||||
"tst.l %[n];"
|
||||
"beq 1f;" // n=0
|
||||
"beq 1f;" /* n=0 */
|
||||
"mac.l %%d0, %%a3, %%acc0;"
|
||||
"movclr.l %%acc0, %%d0;"
|
||||
"move.l %%d0, (%[d])+;"
|
||||
"subq.l #1, %[n];"
|
||||
"beq 1f;" // n=1
|
||||
"beq 1f;" /* n=1 */
|
||||
"mac.l %%d1, %%a2, %%acc0;"
|
||||
"movclr.l %%acc0, %%d1;"
|
||||
"move.l %%d1, (%[d])+;"
|
||||
"subq.l #1, %[n];"
|
||||
"beq 1f;" // n=2
|
||||
// otherwise n = 3
|
||||
"beq 1f;" /* n=2 */
|
||||
/* otherwise n = 3 */
|
||||
"mac.l %%d2, %%a1, %%acc0;"
|
||||
"movclr.l %%acc0, %%d2;"
|
||||
"move.l %%d2, (%[d])+;"
|
||||
|
@ -226,23 +242,23 @@ void mcf5249_vect_zero(ogg_int32_t *ptr, int n)
|
|||
"clr.l %%d1;"
|
||||
"clr.l %%d2;"
|
||||
"clr.l %%d3;"
|
||||
// loop start
|
||||
/* loop start */
|
||||
"tst.l %[n];"
|
||||
"bra 1f;"
|
||||
"0: movem.l %%d0-%%d3, (%[ptr]);"
|
||||
"lea (4*4, %[ptr]), %[ptr];"
|
||||
"subq.l #4, %[n];"
|
||||
"1: bgt 0b;"
|
||||
// remaing elements
|
||||
/* remaing elements */
|
||||
"tst.l %[n];"
|
||||
"beq 1f;" // n=0
|
||||
"beq 1f;" /* n=0 */
|
||||
"clr.l (%[ptr])+;"
|
||||
"subq.l #1, %[n];"
|
||||
"beq 1f;" // n=1
|
||||
"beq 1f;" /* n=1 */
|
||||
"clr.l (%[ptr])+;"
|
||||
"subq.l #1, %[n];"
|
||||
"beq 1f;" // n=2
|
||||
// otherwise n = 3
|
||||
"beq 1f;" /* n=2 */
|
||||
/* otherwise n = 3 */
|
||||
"clr.l (%[ptr])+;"
|
||||
"1:"
|
||||
: [n] "+d" (n), [ptr] "+a" (ptr)
|
||||
|
@ -250,6 +266,16 @@ void mcf5249_vect_zero(ogg_int32_t *ptr, int n)
|
|||
: "%d0","%d1","%d2","%d3","cc","memory");
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef _V_CLIP_MATH
|
||||
#define _V_CLIP_MATH
|
||||
|
||||
/* this is portable C and simple; why not use this as default? */
|
||||
static inline ogg_int32_t CLIP_TO_15(register ogg_int32_t x) {
|
||||
register ogg_int32_t hi=32767, lo=-32768;
|
||||
return (x>=hi ? hi : (x<=lo ? lo : x));
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -216,7 +216,8 @@ static int render_point(int x0,int x1,int y0,int y1,int x){
|
|||
# define XdB(n) (n)
|
||||
#endif
|
||||
|
||||
static ogg_int32_t FLOOR_fromdB_LOOKUP[256] ={
|
||||
/* keep the floor lookup table in fast IRAM */
|
||||
static ogg_int32_t FLOOR_fromdB_LOOKUP[256] IDATA_ATTR = {
|
||||
XdB(0x000000e5), XdB(0x000000f4), XdB(0x00000103), XdB(0x00000114),
|
||||
XdB(0x00000126), XdB(0x00000139), XdB(0x0000014e), XdB(0x00000163),
|
||||
XdB(0x0000017a), XdB(0x00000193), XdB(0x000001ad), XdB(0x000001c9),
|
||||
|
|
|
@ -342,7 +342,7 @@ void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out) {
|
|||
int step;
|
||||
|
||||
#if CONFIG_CPU == MCF5249
|
||||
mcf5249_init_mac(); /* should be redundant */
|
||||
/* mcf5249_init_mac(); */ /* should be redundant */
|
||||
#endif
|
||||
|
||||
for (shift=6;!(n&(1<<shift));shift++);
|
||||
|
|
|
@ -17,7 +17,8 @@
|
|||
|
||||
#include "os_types.h"
|
||||
|
||||
/* keep lookup tables in fast IRAM */
|
||||
/* we keep the most used sin cosine table in fast IRAM;
|
||||
unfortunately, we don't have the space for both tables */
|
||||
|
||||
/* {sin(2*i*PI/4096), cos(2*i*PI/4096)}, with i = 0 to 512 */
|
||||
static LOOKUP_T sincos_lookup0[1026] IDATA_ATTR = {
|
||||
|
|
|
@ -37,7 +37,7 @@ void* alloca(size_t size);
|
|||
|
||||
#ifndef _LOW_ACCURACY_
|
||||
/* 64 bit multiply */
|
||||
//#include <sys/types.h>
|
||||
/* #include <sys/types.h> */
|
||||
|
||||
#if BYTE_ORDER==LITTLE_ENDIAN
|
||||
union magic {
|
||||
|
|
|
@ -26,8 +26,8 @@
|
|||
|
||||
|
||||
/* IRAM buffer keep the block pcm data; only for windows size upto 2048
|
||||
for space restrictions. No real compromise, larger window sizes
|
||||
are only used for very low quality settings (q<0?) */
|
||||
for space restrictions.
|
||||
libVorbis 1.1 Oggenc doesn't use larger windows anyway. */
|
||||
/* max 2 channels on the ihp-1xx (stereo), 2048 samples (2*2048*4=16Kb) */
|
||||
#define IRAM_PCM_END 2048
|
||||
#define CHANNELS 2
|
||||
|
|
|
@ -68,7 +68,7 @@ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2],
|
|||
long rightend=rightbegin+rn/2;
|
||||
|
||||
#if CONFIG_CPU == MCF5249
|
||||
mcf5249_init_mac(); /* shouldn't be needed, but just in case */
|
||||
/* mcf5249_init_mac(); */ /* shouldn't be needed, but just in case */
|
||||
mcf5249_vect_zero(&d[0], leftbegin);
|
||||
mcf5249_vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin);
|
||||
mcf5249_vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin);
|
||||
|
|
|
@ -18,8 +18,10 @@
|
|||
|
||||
#include "os_types.h"
|
||||
|
||||
/* keep small window tables in fast IRAM */
|
||||
static LOOKUP_T vwin64[32] IDATA_ATTR = {
|
||||
/* Oggenc 1.1 seems to use exclusively windows sizes 256, 2048
|
||||
keep the most common sizes in fast IRAM;
|
||||
because we have the available space also 128, 512 */
|
||||
static LOOKUP_T vwin64[32] = {
|
||||
X(0x001f0003), X(0x01168c98), X(0x030333c8), X(0x05dfe3a4),
|
||||
X(0x09a49562), X(0x0e45df18), X(0x13b47ef2), X(0x19dcf676),
|
||||
X(0x20a74d83), X(0x27f7137c), X(0x2fabb05a), X(0x37a1105a),
|
||||
|
@ -151,7 +153,7 @@ static LOOKUP_T vwin512[256] IDATA_ATTR = {
|
|||
X(0x7ffffdcd), X(0x7fffff6d), X(0x7fffffed), X(0x7fffffff),
|
||||
};
|
||||
|
||||
static LOOKUP_T vwin1024[512] IDATA_ATTR = {
|
||||
static LOOKUP_T vwin1024[512] = {
|
||||
X(0x00001f02), X(0x0001170e), X(0x00030724), X(0x0005ef40),
|
||||
X(0x0009cf59), X(0x000ea767), X(0x0014775e), X(0x001b3f2e),
|
||||
X(0x0022fec8), X(0x002bb618), X(0x00356508), X(0x00400b81),
|
||||
|
@ -541,8 +543,6 @@ static LOOKUP_T vwin2048[1024] IDATA_ATTR = {
|
|||
X(0x7ffffffe), X(0x7fffffff), X(0x7fffffff), X(0x7fffffff),
|
||||
};
|
||||
|
||||
/* The remaining large window sizes are used only for very low
|
||||
quality Vorbis files so we don't bother to put them in IRAM */
|
||||
static LOOKUP_T vwin4096[2048] = {
|
||||
X(0x000001f0), X(0x00001171), X(0x00003072), X(0x00005ef5),
|
||||
X(0x00009cf8), X(0x0000ea7c), X(0x00014780), X(0x0001b405),
|
||||
|
|
Loading…
Reference in a new issue