Vorbis opts: keep floor1 lookup table in IRAM.

Slightly faster 16-bit clipping function.
Misc: changed tabs for spaces to conform with Rockbox coding standards.


git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6608 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Pedro Vasconcelos 2005-06-08 13:09:30 +00:00
parent ead61c1d18
commit 0a3f8e0924
11 changed files with 863 additions and 835 deletions

View file

@ -24,13 +24,12 @@
#ifndef _V_WIDE_MATH
#define _V_WIDE_MATH
//#define MB() asm volatile ("" : : : "memory")
#define MB()
static inline void mcf5249_init_mac(void) {
int r;
asm volatile ("move.l #0x20, %%macsr;" // frac, truncate, no saturation
"movclr.l %%acc0, %[r];" // clear accumulators
asm volatile ("move.l #0x20, %%macsr;" /* frac, truncate, no saturation */
"movclr.l %%acc0, %[r];" /* clear accumulators */
"move.l %%acc0, %%acc1;"
"move.l %%acc0, %%acc2;"
"move.l %%acc0, %%acc3;"
@ -38,19 +37,18 @@ static inline void mcf5249_init_mac(void) {
}
static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
ogg_int32_t r;
asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply into acc
"movclr.l %%acc0, %[r];" // move & clear acc
"asr.l #1, %[r];" // no overflow test
: [r] "=d" (r)
: [x] "r" (x), [y] "r" (y)
asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply & shift */
"movclr.l %%acc0, %[x];" /* move & clear acc */
"asr.l #1, %[x];" /* no overflow test */
: [x] "+&d" (x)
: [y] "r" (y)
: "cc");
return r;
return x;
}
static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply
"movclr.l %%acc0, %[x];" // move and clear
asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
"movclr.l %%acc0, %[x];" /* move and clear */
: [x] "+&r" (x)
: [y] "r" (y)
: "cc");
@ -60,14 +58,14 @@ static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
ogg_int32_t r;
asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply
"movclr.l %%acc0, %[r];" // get higher half
"mulu.l %[y], %[x];" // get lower half
"asl.l #8, %[r];" // hi << 17
asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
"movclr.l %%acc0, %[r];" /* get higher half */
"mulu.l %[y], %[x];" /* get lower half */
"asl.l #8, %[r];" /* hi<<16, plus one free */
"asl.l #8, %[r];"
"lsr.l #8, %[x];" // (unsigned)lo >> 15
"lsr.l #8, %[x];" /* (unsigned)lo >> 15 */
"lsr.l #7, %[x];"
"or.l %[x], %[r];" // or
"or.l %[x], %[r];" /* logical-or results */
: [r] "=&d" (r), [x] "+d" (x)
: [y] "d" (y)
: "cc");
@ -116,10 +114,28 @@ void XNPROD31(ogg_int32_t a, ogg_int32_t b,
/* is there no better way of doing this using the MAC? */
#if 1 /* Canonical definition */
#define XPROD32(_a, _b, _t, _v, _x, _y) \
{ (_x)=MULT32(_a,_t)+MULT32(_b,_v); \
(_y)=MULT32(_b,_t)-MULT32(_a,_v); }
#else
/* Thom Johansen suggestion; this could loose the lsb by overflow
but does it matter in practice? */
#define XPROD32(_a, _b, _t, _v, _x, _y) \
asm volatile ("mac.l %[a], %[t], %%acc0;" \
"mac.l %[b], %[v], %%acc0;" \
"mac.l %[b], %[t], %%acc1;" \
"msac.l %[a], %[v], %%acc1;" \
"movclr.l %%acc0, %[x];" \
"asr.l #1, %[x];" \
"movclr.l %%acc1, %[y];" \
"asr.l #1, %[y];" \
: [x] "=&d" (_x), [y] "=&d" (_y) \
: [a] "r" (_a), [b] "r" (_b), \
[t] "r" (_t), [v] "r" (_v) \
: "cc");
#endif
/* asm versions of vector multiplication for window.c */
@ -127,41 +143,41 @@ void XNPROD31(ogg_int32_t a, ogg_int32_t b,
static inline
void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
{
asm volatile ("movem.l (%[d]), %%d0-%%d3;" // loop start
"movem.l (%[w]), %%a0-%%a3;" // pre-fetch registers
asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */
"movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */
"lea.l (4*4, %[w]), %[w];"
"bra 1f;" // jump to loop condition
"0:" // loop body
// multiply and load next window values
"bra 1f;" /* jump to loop condition */
"0:" /* loop body */
/* multiply and load next window values */
"mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
"mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
"mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
"mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
"movclr.l %%acc0, %%d0;" // get the products
"movclr.l %%acc0, %%d0;" /* get the products */
"movclr.l %%acc1, %%d1;"
"movclr.l %%acc2, %%d2;"
"movclr.l %%acc3, %%d3;"
// store and advance
/* store and advance */
"movem.l %%d0-%%d3, (%[d]);"
"lea.l (4*4, %[d]), %[d];"
"movem.l (%[d]), %%d0-%%d3;"
"subq.l #4, %[n];" // done 4 elements
"subq.l #4, %[n];" /* done 4 elements */
"1: cmpi.l #4, %[n];"
"bge 0b;"
// multiply final elements
/* multiply final elements */
"tst.l %[n];"
"beq 1f;" // n=0
"beq 1f;" /* n=0 */
"mac.l %%d0, %%a0, %%acc0;"
"movclr.l %%acc0, %%d0;"
"move.l %%d0, (%[d])+;"
"subq.l #1, %[n];"
"beq 1f;" // n=1
"beq 1f;" /* n=1 */
"mac.l %%d1, %%a1, %%acc0;"
"movclr.l %%acc0, %%d1;"
"move.l %%d1, (%[d])+;"
"subq.l #1, %[n];"
"beq 1f;" // n=2
// otherwise n = 3
"beq 1f;" /* n=2 */
/* otherwise n = 3 */
"mac.l %%d2, %%a2, %%acc0;"
"movclr.l %%acc0, %%d2;"
"move.l %%d2, (%[d])+;"
@ -174,41 +190,41 @@ void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
static inline
void mcf5249_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
{
asm volatile ("lea.l (-3*4, %[w]), %[w];" // loop start
"movem.l (%[d]), %%d0-%%d3;" // pre-fetch registers
asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */
"movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */
"movem.l (%[w]), %%a0-%%a3;"
"bra 1f;" // jump to loop condition
"0:" // loop body
// multiply and load next window value
"bra 1f;" /* jump to loop condition */
"0:" /* loop body */
/* multiply and load next window value */
"mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
"mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
"mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
"mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
"movclr.l %%acc0, %%d0;" // get the products
"movclr.l %%acc0, %%d0;" /* get the products */
"movclr.l %%acc1, %%d1;"
"movclr.l %%acc2, %%d2;"
"movclr.l %%acc3, %%d3;"
// store and advance
/* store and advance */
"movem.l %%d0-%%d3, (%[d]);"
"lea.l (4*4, %[d]), %[d];"
"movem.l (%[d]), %%d0-%%d3;"
"subq.l #4, %[n];" // done 4 elements
"subq.l #4, %[n];" /* done 4 elements */
"1: cmpi.l #4, %[n];"
"bge 0b;"
// multiply final elements
/* multiply final elements */
"tst.l %[n];"
"beq 1f;" // n=0
"beq 1f;" /* n=0 */
"mac.l %%d0, %%a3, %%acc0;"
"movclr.l %%acc0, %%d0;"
"move.l %%d0, (%[d])+;"
"subq.l #1, %[n];"
"beq 1f;" // n=1
"beq 1f;" /* n=1 */
"mac.l %%d1, %%a2, %%acc0;"
"movclr.l %%acc0, %%d1;"
"move.l %%d1, (%[d])+;"
"subq.l #1, %[n];"
"beq 1f;" // n=2
// otherwise n = 3
"beq 1f;" /* n=2 */
/* otherwise n = 3 */
"mac.l %%d2, %%a1, %%acc0;"
"movclr.l %%acc0, %%d2;"
"move.l %%d2, (%[d])+;"
@ -226,23 +242,23 @@ void mcf5249_vect_zero(ogg_int32_t *ptr, int n)
"clr.l %%d1;"
"clr.l %%d2;"
"clr.l %%d3;"
// loop start
/* loop start */
"tst.l %[n];"
"bra 1f;"
"0: movem.l %%d0-%%d3, (%[ptr]);"
"lea (4*4, %[ptr]), %[ptr];"
"subq.l #4, %[n];"
"1: bgt 0b;"
// remaing elements
/* remaing elements */
"tst.l %[n];"
"beq 1f;" // n=0
"beq 1f;" /* n=0 */
"clr.l (%[ptr])+;"
"subq.l #1, %[n];"
"beq 1f;" // n=1
"beq 1f;" /* n=1 */
"clr.l (%[ptr])+;"
"subq.l #1, %[n];"
"beq 1f;" // n=2
// otherwise n = 3
"beq 1f;" /* n=2 */
/* otherwise n = 3 */
"clr.l (%[ptr])+;"
"1:"
: [n] "+d" (n), [ptr] "+a" (ptr)
@ -250,6 +266,16 @@ void mcf5249_vect_zero(ogg_int32_t *ptr, int n)
: "%d0","%d1","%d2","%d3","cc","memory");
}
#endif
#ifndef _V_CLIP_MATH
#define _V_CLIP_MATH
/* this is portable C and simple; why not use this as default? */
static inline ogg_int32_t CLIP_TO_15(register ogg_int32_t x) {
register ogg_int32_t hi=32767, lo=-32768;
return (x>=hi ? hi : (x<=lo ? lo : x));
}
#endif
#endif

View file

@ -216,7 +216,8 @@ static int render_point(int x0,int x1,int y0,int y1,int x){
# define XdB(n) (n)
#endif
static ogg_int32_t FLOOR_fromdB_LOOKUP[256] ={
/* keep the floor lookup table in fast IRAM */
static ogg_int32_t FLOOR_fromdB_LOOKUP[256] IDATA_ATTR = {
XdB(0x000000e5), XdB(0x000000f4), XdB(0x00000103), XdB(0x00000114),
XdB(0x00000126), XdB(0x00000139), XdB(0x0000014e), XdB(0x00000163),
XdB(0x0000017a), XdB(0x00000193), XdB(0x000001ad), XdB(0x000001c9),

View file

@ -342,7 +342,7 @@ void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out) {
int step;
#if CONFIG_CPU == MCF5249
mcf5249_init_mac(); /* should be redundant */
/* mcf5249_init_mac(); */ /* should be redundant */
#endif
for (shift=6;!(n&(1<<shift));shift++);

View file

@ -17,7 +17,8 @@
#include "os_types.h"
/* keep lookup tables in fast IRAM */
/* we keep the most used sin cosine table in fast IRAM;
unfortunately, we don't have the space for both tables */
/* {sin(2*i*PI/4096), cos(2*i*PI/4096)}, with i = 0 to 512 */
static LOOKUP_T sincos_lookup0[1026] IDATA_ATTR = {

View file

@ -37,7 +37,7 @@ void* alloca(size_t size);
#ifndef _LOW_ACCURACY_
/* 64 bit multiply */
//#include <sys/types.h>
/* #include <sys/types.h> */
#if BYTE_ORDER==LITTLE_ENDIAN
union magic {

View file

@ -26,8 +26,8 @@
/* IRAM buffer keep the block pcm data; only for windows size upto 2048
for space restrictions. No real compromise, larger window sizes
are only used for very low quality settings (q<0?) */
for space restrictions.
libVorbis 1.1 Oggenc doesn't use larger windows anyway. */
/* max 2 channels on the ihp-1xx (stereo), 2048 samples (2*2048*4=16Kb) */
#define IRAM_PCM_END 2048
#define CHANNELS 2

View file

@ -68,7 +68,7 @@ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2],
long rightend=rightbegin+rn/2;
#if CONFIG_CPU == MCF5249
mcf5249_init_mac(); /* shouldn't be needed, but just in case */
/* mcf5249_init_mac(); */ /* shouldn't be needed, but just in case */
mcf5249_vect_zero(&d[0], leftbegin);
mcf5249_vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin);
mcf5249_vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin);

View file

@ -18,8 +18,10 @@
#include "os_types.h"
/* keep small window tables in fast IRAM */
static LOOKUP_T vwin64[32] IDATA_ATTR = {
/* Oggenc 1.1 seems to use exclusively windows sizes 256, 2048
keep the most common sizes in fast IRAM;
because we have the available space also 128, 512 */
static LOOKUP_T vwin64[32] = {
X(0x001f0003), X(0x01168c98), X(0x030333c8), X(0x05dfe3a4),
X(0x09a49562), X(0x0e45df18), X(0x13b47ef2), X(0x19dcf676),
X(0x20a74d83), X(0x27f7137c), X(0x2fabb05a), X(0x37a1105a),
@ -151,7 +153,7 @@ static LOOKUP_T vwin512[256] IDATA_ATTR = {
X(0x7ffffdcd), X(0x7fffff6d), X(0x7fffffed), X(0x7fffffff),
};
static LOOKUP_T vwin1024[512] IDATA_ATTR = {
static LOOKUP_T vwin1024[512] = {
X(0x00001f02), X(0x0001170e), X(0x00030724), X(0x0005ef40),
X(0x0009cf59), X(0x000ea767), X(0x0014775e), X(0x001b3f2e),
X(0x0022fec8), X(0x002bb618), X(0x00356508), X(0x00400b81),
@ -541,8 +543,6 @@ static LOOKUP_T vwin2048[1024] IDATA_ATTR = {
X(0x7ffffffe), X(0x7fffffff), X(0x7fffffff), X(0x7fffffff),
};
/* The remaining large window sizes are used only for very low
quality Vorbis files so we don't bother to put them in IRAM */
static LOOKUP_T vwin4096[2048] = {
X(0x000001f0), X(0x00001171), X(0x00003072), X(0x00005ef5),
X(0x00009cf8), X(0x0000ea7c), X(0x00014780), X(0x0001b405),