rockbox/apps/codecs/libwmapro/wmapro_math.h

#ifndef _WMAPRO_MATH_H_
#define _WMAPRO_MATH_H_

#include <inttypes.h>

/* rockbox: not used
#define fixtof16(x)       (float)((float)(x) / (float)(1 << 16))
#define fixtof31(x)       (float)((float)(x) / (float)(1 << 31))
#define ftofix16(x)       ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5:0.5)))
#define ftofix31(x)       ((int32_t)((x) * (float)(1 << 31) + ((x) < 0 ? -0.5:0.5)))
*/

#if defined(CPU_ARM)
    /* Calculates: result = (X*Y)>>Z */
    #define fixmulshift(X,Y,Z) \
    ({ \
        int32_t lo; \
        int32_t hi; \
        asm volatile ( \
            "smull %[lo], %[hi], %[x], %[y] \n\t"   /* multiply */ \
            "mov   %[lo], %[lo], lsr %[shr] \n\t"   /* lo >>= Z */ \
            "orr   %[lo], %[lo], %[hi], lsl %[shl]" /* lo |= (hi << (32-Z)) */ \
            : [lo]"=&r"(lo), [hi]"=&r"(hi) \
            : [x]"r"(X), [y]"r"(Y), [shr]"r"(Z), [shl]"r"(32-Z)); \
        lo; \
    })
     
    /* Calculates: result = (X*Y)>>16 */
    #define fixmul16(X,Y) \
     ({ \
        int32_t lo; \
        int32_t hi; \
        asm volatile ( \
           "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
           "mov   %[lo], %[lo], lsr #16    \n\t" /* lo >>= 16 */ \
           "orr   %[lo], %[lo], %[hi], lsl #16"  /* lo |= (hi << 16) */ \
           : [lo]"=&r"(lo), [hi]"=&r"(hi) \
           : [x]"r"(X), [y]"r"(Y)); \
        lo; \
     })
     
    /* Calculates: result = (X*Y)>>24 */
    #define fixmul24(X,Y) \
     ({ \
        int32_t lo; \
        int32_t hi; \
        asm volatile ( \
           "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
           "mov   %[lo], %[lo], lsr #24    \n\t" /* lo >>= 24 */ \
           "orr   %[lo], %[lo], %[hi], lsl #8"   /* lo |= (hi << 8) */ \
           : [lo]"=&r"(lo), [hi]"=&r"(hi) \
           : [x]"r"(X), [y]"r"(Y)); \
        lo; \
     })
     
    /* Calculates: result = (X*Y)>>31 */
    #define fixmul31(X,Y) \
     ({ \
        int32_t lo; \
        int32_t hi; \
        asm volatile ( \
           "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
           "mov   %[lo], %[lo], lsr #31    \n\t" /* lo >>= 31 */ \
           "orr   %[lo], %[lo], %[hi], lsl #1"   /* lo |= (hi << 1) */ \
           : [lo]"=&r"(lo), [hi]"=&r"(hi) \
           : [x]"r"(X), [y]"r"(Y)); \
        lo; \
     })
#elif defined(CPU_COLDFIRE)
    /* Calculates: result = (X*Y)>>Z */
    #define fixmulshift(X,Y,Z) \
    ({ \
        int32_t t1; \
        int32_t t2; \
        asm volatile ( \
            "mac.l   %[x],%[y],%%acc0\n\t" /* multiply */ \
            "mulu.l  %[y],%[x]       \n\t" /* get lower half, avoid emac stall */ \
            "movclr.l %%acc0,%[t1]   \n\t" /* get higher half */ \
            "moveq.l #31,%[t2]       \n\t" \
            "sub.l   %[sh],%[t2]     \n\t" /* t2 = 31 - shift */ \
            "ble.s   1f              \n\t" \
            "asl.l   %[t2],%[t1]     \n\t" /* hi <<= 31 - shift */ \
            "lsr.l   %[sh],%[x]      \n\t" /* (unsigned)lo >>= shift */ \
            "or.l    %[x],%[t1]      \n\t" /* combine result */ \
            "bra.s   2f              \n\t" \
         "1:                         \n\t" \
            "neg.l   %[t2]           \n\t" /* t2 = shift - 31 */ \
            "asr.l   %[t2],%[t1]     \n\t" /* hi >>= t2 */ \
         "2:                         \n" \
        : [t1]"=&d"(t1), [t2]"=&d"(t2) \
        : [x] "d"((X)), [y] "d"((Y)), [sh]"d"((Z))); \
        t1; \
    })

    /* Calculates: result = (X*Y)>>16 */
    #define fixmul16(X,Y) \
    ({ \
        int32_t t, x = (X); \
        asm volatile ( \
            "mac.l    %[x],%[y],%%acc0\n\t" /* multiply */ \
            "mulu.l   %[y],%[x]       \n\t" /* get lower half, avoid emac stall */ \
            "movclr.l %%acc0,%[t]     \n\t" /* get higher half */ \
            "lsr.l    #1,%[t]         \n\t" /* hi >>= 1 to compensate emac shift */ \
            "move.w   %[t],%[x]       \n\t" /* combine halfwords */\
            "swap     %[x]            \n\t" \
            : [t]"=&d"(t), [x] "+d" (x) \
            : [y] "d" ((Y))); \
        x; \
    })
    
    /* Calculates: result = (X*Y)>>24 */
    #define fixmul24(X,Y) \
    ({ \
        int32_t t1, t2; \
        asm volatile ( \
            "mac.l    %[x],%[y],%%acc0 \n\t" /* multiply */ \
            "move.l   %%accext01, %[t1]\n\t" /* get lower 8 bits */ \
            "movclr.l %%acc0,%[t2]     \n\t" /* get higher 24 bits */ \
            "asl.l    #7,%[t2]         \n\t" /* hi <<= 7, plus one free */ \
            "move.b   %[t1],%[t2]      \n\t" /* combine result */ \
            : [t1]"=&d"(t1), [t2]"=&d"(t2) \
            : [x] "d" ((X)), [y] "d" ((Y))); \
        t2; \
    })

    /* Calculates: result = (X*Y)>>32 */
    #define fixmul31(X,Y) \
    ({ \
       int32_t t; \
       asm volatile ( \
          "mac.l %[x], %[y], %%acc0\n\t"   /* multiply */ \
          "movclr.l %%acc0, %[t]\n\t"      /* get higher half as result */ \
          : [t] "=d" (t) \
          : [x] "r" ((X)), [y] "r" ((Y))); \
       t; \
    })
#else
    static inline int32_t fixmulshift(int32_t x, int32_t y, int shamt)
    {
        int64_t temp;
        temp = x;
        temp *= y;
    
        temp >>= shamt;
    
        return (int32_t)temp;
    }
    
    static inline int32_t fixmul31(int32_t x, int32_t y)
    {
        int64_t temp;
        temp = x;
        temp *= y;
    
        temp >>= 31;
    
        return (int32_t)temp;
    }
    
    static inline int32_t fixmul24(int32_t x, int32_t y)
    {
        int64_t temp;
        temp = x;
        temp *= y;
    
        temp >>= 24;
    
        return (int32_t)temp;
    }
    
    static inline int32_t fixmul16(int32_t x, int32_t y)
    {
        int64_t temp;
        temp = x;
        temp *= y;
    
        temp >>= 16;
    
        return (int32_t)temp;
    }
#endif /* CPU_COLDFIRE, CPU_ARM */

#ifdef CPU_COLDFIRE
static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, 
                                   const int32_t *src1, const int32_t *win, 
                                   int len)
{
    int i, j;
    dst += len;
    win += len;
    src0+= len;
        for(i=-len, j=len-1; i<0; i++, j--) {
        int32_t s0 = src0[i];
        int32_t s1 = src1[j];
        int32_t wi = -win[i];
        int32_t wj = -win[j];

        asm volatile ("mac.l    %[s0], %[wj], %%acc0\n\t"
                      "msac.l   %[s1], %[wi], %%acc0\n\t"
                      "mac.l    %[s0], %[wi], %%acc1\n\t"
                      "mac.l    %[s1], %[wj], %%acc1\n\t"
                      "movclr.l %%acc0, %[s0]\n\t"
                      "move.l   %[s0], (%[dst_i])\n\t"
                      "movclr.l %%acc1, %[s0]\n\t"
                      "move.l   %[s0], (%[dst_j])\n\t"
                      : [s0] "+r" (s0) /* this register is clobbered so specify it as an input */
                      : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]),
                        [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj)
                      : "cc", "memory");
    }
}
#else
static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, 
                                   const int32_t *src1, const int32_t *win, 
                                   int len)
{
    int i, j;
    dst += len;
    win += len;
    src0+= len;
    for(i=-len, j=len-1; i<0; i++, j--) {
        int32_t s0 = src0[i]; /* s0 = src0[      0 ... len-1] */
        int32_t s1 = src1[j]; /* s1 = src1[2*len-1 ... len]   */
        int32_t wi = -win[i]; /* wi = -win[      0 ... len-1] */
        int32_t wj = -win[j]; /* wj = -win[2*len-1 ... len]   */
        dst[i] = fixmul31(s0, wj) - fixmul31(s1, wi); /* dst[      0 ... len-1] */
        dst[j] = fixmul31(s0, wi) + fixmul31(s1, wj); /* dst[2*len-1 ... len]   */
    }
}
#endif

static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, 
                                        int32_t mul, int len)
{
    /* len is _always_ a multiple of 4, because len is the difference of sfb's
     * which themselves are always a multiple of 4. */
    int i;
    for (i=0; i<len; i+=4) {
        dst[i  ] = fixmul24(src[i  ], mul);
        dst[i+1] = fixmul24(src[i+1], mul);
        dst[i+2] = fixmul24(src[i+2], mul);
        dst[i+3] = fixmul24(src[i+3], mul);
    }
}

static inline int av_clip(int a, int amin, int amax)
{
    if      (a < amin) return amin;
    else if (a > amax) return amax;
    else               return a;
}
#endif /* _WMAPRO_MATH_H_ */
Complete the conversion of WMA Pro to fixed point. Floating point code is still there for better history and to have a returning point in svn should something go wrong. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27402 a1c6a512-1295-4272-9138-f99709370657 2010-07-12 15:41:10 +00:00			`#ifndef _WMAPRO_MATH_H_`
			`#define _WMAPRO_MATH_H_`

Partial conversion of the wmapro decoder to fixed point arithmetic. Currently inverse quantization & rescaling, imdct and windowing are all in fixed point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27302 a1c6a512-1295-4272-9138-f99709370657 2010-07-05 22:33:37 +00:00			`#include <inttypes.h>`

Submit part of FS#11498. Major speedup for WMA Professional on ARM and Coldfire CPUs. Introduce asm routines for multiplications, move arrays with major impact on decoding speed to IRAM. Speeds up decoding by 25% on PP5022 and 34% on mcf5249. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27582 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 21:43:07 +00:00			`/* rockbox: not used`
Partial conversion of the wmapro decoder to fixed point arithmetic. Currently inverse quantization & rescaling, imdct and windowing are all in fixed point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27302 a1c6a512-1295-4272-9138-f99709370657 2010-07-05 22:33:37 +00:00			`#define fixtof16(x) (float)((float)(x) / (float)(1 << 16))`
Complete the conversion of WMA Pro to fixed point. Floating point code is still there for better history and to have a returning point in svn should something go wrong. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27402 a1c6a512-1295-4272-9138-f99709370657 2010-07-12 15:41:10 +00:00			`#define fixtof31(x) (float)((float)(x) / (float)(1 << 31))`
Partial conversion of the wmapro decoder to fixed point arithmetic. Currently inverse quantization & rescaling, imdct and windowing are all in fixed point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27302 a1c6a512-1295-4272-9138-f99709370657 2010-07-05 22:33:37 +00:00			`#define ftofix16(x) ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5:0.5)))`
Complete the conversion of WMA Pro to fixed point. Floating point code is still there for better history and to have a returning point in svn should something go wrong. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27402 a1c6a512-1295-4272-9138-f99709370657 2010-07-12 15:41:10 +00:00			`#define ftofix31(x) ((int32_t)((x) * (float)(1 << 31) + ((x) < 0 ? -0.5:0.5)))`
Submit part of FS#11498. Major speedup for WMA Professional on ARM and Coldfire CPUs. Introduce asm routines for multiplications, move arrays with major impact on decoding speed to IRAM. Speeds up decoding by 25% on PP5022 and 34% on mcf5249. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27582 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 21:43:07 +00:00			`*/`
Partial conversion of the wmapro decoder to fixed point arithmetic. Currently inverse quantization & rescaling, imdct and windowing are all in fixed point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27302 a1c6a512-1295-4272-9138-f99709370657 2010-07-05 22:33:37 +00:00
Submit part of FS#11498. Major speedup for WMA Professional on ARM and Coldfire CPUs. Introduce asm routines for multiplications, move arrays with major impact on decoding speed to IRAM. Speeds up decoding by 25% on PP5022 and 34% on mcf5249. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27582 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 21:43:07 +00:00			`#if defined(CPU_ARM)`
			`/* Calculates: result = (XY)>>Z /`
			`#define fixmulshift(X,Y,Z) \`
			`({ \`
			`int32_t lo; \`
			`int32_t hi; \`
			`asm volatile ( \`
			`"smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \`
			`"mov %[lo], %[lo], lsr %[shr] \n\t" /* lo >>= Z */ \`
			`"orr %[lo], %[lo], %[hi], lsl %[shl]" /* lo \|= (hi << (32-Z)) */ \`
			`: [lo]"=&r"(lo), [hi]"=&r"(hi) \`
			`: [x]"r"(X), [y]"r"(Y), [shr]"r"(Z), [shl]"r"(32-Z)); \`
			`lo; \`
			`})`

			`/* Calculates: result = (XY)>>16 /`
			`#define fixmul16(X,Y) \`
			`({ \`
			`int32_t lo; \`
			`int32_t hi; \`
			`asm volatile ( \`
			`"smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \`
			`"mov %[lo], %[lo], lsr #16 \n\t" /* lo >>= 16 */ \`
			`"orr %[lo], %[lo], %[hi], lsl #16" /* lo \|= (hi << 16) */ \`
			`: [lo]"=&r"(lo), [hi]"=&r"(hi) \`
			`: [x]"r"(X), [y]"r"(Y)); \`
			`lo; \`
			`})`

			`/* Calculates: result = (XY)>>24 /`
			`#define fixmul24(X,Y) \`
			`({ \`
			`int32_t lo; \`
			`int32_t hi; \`
			`asm volatile ( \`
			`"smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \`
			`"mov %[lo], %[lo], lsr #24 \n\t" /* lo >>= 24 */ \`
			`"orr %[lo], %[lo], %[hi], lsl #8" /* lo \|= (hi << 8) */ \`
			`: [lo]"=&r"(lo), [hi]"=&r"(hi) \`
			`: [x]"r"(X), [y]"r"(Y)); \`
			`lo; \`
			`})`

			`/* Calculates: result = (XY)>>31 /`
			`#define fixmul31(X,Y) \`
			`({ \`
			`int32_t lo; \`
			`int32_t hi; \`
			`asm volatile ( \`
			`"smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \`
			`"mov %[lo], %[lo], lsr #31 \n\t" /* lo >>= 31 */ \`
			`"orr %[lo], %[lo], %[hi], lsl #1" /* lo \|= (hi << 1) */ \`
			`: [lo]"=&r"(lo), [hi]"=&r"(hi) \`
			`: [x]"r"(X), [y]"r"(Y)); \`
			`lo; \`
			`})`
			`#elif defined(CPU_COLDFIRE)`
			`/* Calculates: result = (XY)>>Z /`
			`#define fixmulshift(X,Y,Z) \`
			`({ \`
			`int32_t t1; \`
			`int32_t t2; \`
			`asm volatile ( \`
			`"mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \`
			`"mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \`
			`"movclr.l %%acc0,%[t1] \n\t" /* get higher half */ \`
			`"moveq.l #31,%[t2] \n\t" \`
			`"sub.l %[sh],%[t2] \n\t" /* t2 = 31 - shift */ \`
			`"ble.s 1f \n\t" \`
			`"asl.l %[t2],%[t1] \n\t" /* hi <<= 31 - shift */ \`
			`"lsr.l %[sh],%[x] \n\t" /* (unsigned)lo >>= shift */ \`
			`"or.l %[x],%[t1] \n\t" /* combine result */ \`
			`"bra.s 2f \n\t" \`
			`"1: \n\t" \`
			`"neg.l %[t2] \n\t" /* t2 = shift - 31 */ \`
			`"asr.l %[t2],%[t1] \n\t" /* hi >>= t2 */ \`
			`"2: \n" \`
			`: [t1]"=&d"(t1), [t2]"=&d"(t2) \`
			`: [x] "d"((X)), [y] "d"((Y)), [sh]"d"((Z))); \`
			`t1; \`
			`})`
Partial conversion of the wmapro decoder to fixed point arithmetic. Currently inverse quantization & rescaling, imdct and windowing are all in fixed point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27302 a1c6a512-1295-4272-9138-f99709370657 2010-07-05 22:33:37 +00:00
Submit part of FS#11498. Major speedup for WMA Professional on ARM and Coldfire CPUs. Introduce asm routines for multiplications, move arrays with major impact on decoding speed to IRAM. Speeds up decoding by 25% on PP5022 and 34% on mcf5249. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27582 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 21:43:07 +00:00			`/* Calculates: result = (XY)>>16 /`
			`#define fixmul16(X,Y) \`
			`({ \`
libwmapro: slightly shorter and faster inline asm fixed point multiplication routines, speedup is ~0.5%. Also don't lie to gcc about which vars are changed by the asm. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27584 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 23:00:22 +00:00			`int32_t t, x = (X); \`
Submit part of FS#11498. Major speedup for WMA Professional on ARM and Coldfire CPUs. Introduce asm routines for multiplications, move arrays with major impact on decoding speed to IRAM. Speeds up decoding by 25% on PP5022 and 34% on mcf5249. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27582 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 21:43:07 +00:00			`asm volatile ( \`
libwmapro: slightly shorter and faster inline asm fixed point multiplication routines, speedup is ~0.5%. Also don't lie to gcc about which vars are changed by the asm. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27584 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 23:00:22 +00:00			`"mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \`
			`"mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \`
			`"movclr.l %%acc0,%[t] \n\t" /* get higher half */ \`
			`"lsr.l #1,%[t] \n\t" /* hi >>= 1 to compensate emac shift */ \`
			`"move.w %[t],%[x] \n\t" /* combine halfwords */\`
			`"swap %[x] \n\t" \`
			`: [t]"=&d"(t), [x] "+d" (x) \`
			`: [y] "d" ((Y))); \`
			`x; \`
Submit part of FS#11498. Major speedup for WMA Professional on ARM and Coldfire CPUs. Introduce asm routines for multiplications, move arrays with major impact on decoding speed to IRAM. Speeds up decoding by 25% on PP5022 and 34% on mcf5249. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27582 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 21:43:07 +00:00			`})`

			`/* Calculates: result = (XY)>>24 /`
			`#define fixmul24(X,Y) \`
			`({ \`
libwmapro: tiny tweak fo coldfire fixmul24 for a very slight speedup. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27585 a1c6a512-1295-4272-9138-f99709370657 2010-07-27 06:30:38 +00:00			`int32_t t1, t2; \`
Submit part of FS#11498. Major speedup for WMA Professional on ARM and Coldfire CPUs. Introduce asm routines for multiplications, move arrays with major impact on decoding speed to IRAM. Speeds up decoding by 25% on PP5022 and 34% on mcf5249. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27582 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 21:43:07 +00:00			`asm volatile ( \`
libwmapro: tiny tweak fo coldfire fixmul24 for a very slight speedup. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27585 a1c6a512-1295-4272-9138-f99709370657 2010-07-27 06:30:38 +00:00			`"mac.l %[x],%[y],%%acc0 \n\t" /* multiply */ \`
			`"move.l %%accext01, %[t1]\n\t" /* get lower 8 bits */ \`
			`"movclr.l %%acc0,%[t2] \n\t" /* get higher 24 bits */ \`
			`"asl.l #7,%[t2] \n\t" /* hi <<= 7, plus one free */ \`
			`"move.b %[t1],%[t2] \n\t" /* combine result */ \`
			`: [t1]"=&d"(t1), [t2]"=&d"(t2) \`
			`: [x] "d" ((X)), [y] "d" ((Y))); \`
			`t2; \`
Submit part of FS#11498. Major speedup for WMA Professional on ARM and Coldfire CPUs. Introduce asm routines for multiplications, move arrays with major impact on decoding speed to IRAM. Speeds up decoding by 25% on PP5022 and 34% on mcf5249. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27582 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 21:43:07 +00:00			`})`
Partial conversion of the wmapro decoder to fixed point arithmetic. Currently inverse quantization & rescaling, imdct and windowing are all in fixed point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27302 a1c6a512-1295-4272-9138-f99709370657 2010-07-05 22:33:37 +00:00
Submit part of FS#11498. Major speedup for WMA Professional on ARM and Coldfire CPUs. Introduce asm routines for multiplications, move arrays with major impact on decoding speed to IRAM. Speeds up decoding by 25% on PP5022 and 34% on mcf5249. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27582 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 21:43:07 +00:00			`/* Calculates: result = (XY)>>32 /`
			`#define fixmul31(X,Y) \`
			`({ \`
			`int32_t t; \`
			`asm volatile ( \`
			`"mac.l %[x], %[y], %%acc0\n\t" /* multiply */ \`
			`"movclr.l %%acc0, %[t]\n\t" /* get higher half as result */ \`
			`: [t] "=d" (t) \`
			`: [x] "r" ((X)), [y] "r" ((Y))); \`
			`t; \`
			`})`
			`#else`
			`static inline int32_t fixmulshift(int32_t x, int32_t y, int shamt)`
			`{`
			`int64_t temp;`
			`temp = x;`
			`temp *= y;`

			`temp >>= shamt;`

			`return (int32_t)temp;`
			`}`

			`static inline int32_t fixmul31(int32_t x, int32_t y)`
			`{`
			`int64_t temp;`
			`temp = x;`
			`temp *= y;`

			`temp >>= 31;`

			`return (int32_t)temp;`
			`}`

			`static inline int32_t fixmul24(int32_t x, int32_t y)`
			`{`
			`int64_t temp;`
			`temp = x;`
			`temp *= y;`

			`temp >>= 24;`

			`return (int32_t)temp;`
			`}`

			`static inline int32_t fixmul16(int32_t x, int32_t y)`
			`{`
			`int64_t temp;`
			`temp = x;`
			`temp *= y;`

			`temp >>= 16;`

			`return (int32_t)temp;`
			`}`
			`#endif /* CPU_COLDFIRE, CPU_ARM */`
Partial conversion of the wmapro decoder to fixed point arithmetic. Currently inverse quantization & rescaling, imdct and windowing are all in fixed point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27302 a1c6a512-1295-4272-9138-f99709370657 2010-07-05 22:33:37 +00:00
libwmapro: coldfire asm for vector_fixmul_window, gives a speedup of ~13%, drop the add_bias argument for the vector_fixmul_window function, since it was always 0 git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27573 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 11:15:25 +00:00			`#ifdef CPU_COLDFIRE`
			`static inline void vector_fixmul_window(int32_t dst, const int32_t src0,`
			`const int32_t src1, const int32_t win,`
			`int len)`
			`{`
			`int i, j;`
			`dst += len;`
			`win += len;`
			`src0+= len;`
			`for(i=-len, j=len-1; i<0; i++, j--) {`
			`int32_t s0 = src0[i];`
			`int32_t s1 = src1[j];`
			`int32_t wi = -win[i];`
			`int32_t wj = -win[j];`
Partial conversion of the wmapro decoder to fixed point arithmetic. Currently inverse quantization & rescaling, imdct and windowing are all in fixed point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27302 a1c6a512-1295-4272-9138-f99709370657 2010-07-05 22:33:37 +00:00
libwmapro: coldfire asm for vector_fixmul_window, gives a speedup of ~13%, drop the add_bias argument for the vector_fixmul_window function, since it was always 0 git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27573 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 11:15:25 +00:00			`asm volatile ("mac.l %[s0], %[wj], %%acc0\n\t"`
			`"msac.l %[s1], %[wi], %%acc0\n\t"`
			`"mac.l %[s0], %[wi], %%acc1\n\t"`
			`"mac.l %[s1], %[wj], %%acc1\n\t"`
			`"movclr.l %%acc0, %[s0]\n\t"`
			`"move.l %[s0], (%[dst_i])\n\t"`
			`"movclr.l %%acc1, %[s0]\n\t"`
			`"move.l %[s0], (%[dst_j])\n\t"`
			`: [s0] "+r" (s0) /* this register is clobbered so specify it as an input */`
			`: [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]),`
			`[s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj)`
			`: "cc", "memory");`
			`}`
			`}`
			`#else`
libwmapro : Rename all FIXED occurrances to int32_t and remove types.h git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27454 a1c6a512-1295-4272-9138-f99709370657 2010-07-17 08:00:13 +00:00			`static inline void vector_fixmul_window(int32_t dst, const int32_t src0,`
			`const int32_t src1, const int32_t win,`
libwmapro: coldfire asm for vector_fixmul_window, gives a speedup of ~13%, drop the add_bias argument for the vector_fixmul_window function, since it was always 0 git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27573 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 11:15:25 +00:00			`int len)`
Partial conversion of the wmapro decoder to fixed point arithmetic. Currently inverse quantization & rescaling, imdct and windowing are all in fixed point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27302 a1c6a512-1295-4272-9138-f99709370657 2010-07-05 22:33:37 +00:00			`{`
			`int i, j;`
			`dst += len;`
			`win += len;`
			`src0+= len;`
Submit next part of FS#11498. Unroll loop for minor speedup of libwmapro on ARM (1%). git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27595 a1c6a512-1295-4272-9138-f99709370657 2010-07-28 18:15:53 +00:00			`for(i=-len, j=len-1; i<0; i++, j--) {`
			`int32_t s0 = src0[i]; /* s0 = src0[ 0 ... len-1] */`
			`int32_t s1 = src1[j]; /* s1 = src1[2len-1 ... len] /`
			`int32_t wi = -win[i]; /* wi = -win[ 0 ... len-1] */`
			`int32_t wj = -win[j]; /* wj = -win[2len-1 ... len] /`
			`dst[i] = fixmul31(s0, wj) - fixmul31(s1, wi); /* dst[ 0 ... len-1] */`
			`dst[j] = fixmul31(s0, wi) + fixmul31(s1, wj); /* dst[2len-1 ... len] /`
libwmapro: coldfire asm for vector_fixmul_window, gives a speedup of ~13%, drop the add_bias argument for the vector_fixmul_window function, since it was always 0 git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27573 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 11:15:25 +00:00			`}`
Partial conversion of the wmapro decoder to fixed point arithmetic. Currently inverse quantization & rescaling, imdct and windowing are all in fixed point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27302 a1c6a512-1295-4272-9138-f99709370657 2010-07-05 22:33:37 +00:00			`}`
libwmapro: coldfire asm for vector_fixmul_window, gives a speedup of ~13%, drop the add_bias argument for the vector_fixmul_window function, since it was always 0 git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27573 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 11:15:25 +00:00			`#endif`
Partial conversion of the wmapro decoder to fixed point arithmetic. Currently inverse quantization & rescaling, imdct and windowing are all in fixed point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27302 a1c6a512-1295-4272-9138-f99709370657 2010-07-05 22:33:37 +00:00
Submit part of FS#11498. Major speedup for WMA Professional on ARM and Coldfire CPUs. Introduce asm routines for multiplications, move arrays with major impact on decoding speed to IRAM. Speeds up decoding by 25% on PP5022 and 34% on mcf5249. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27582 a1c6a512-1295-4272-9138-f99709370657 2010-07-26 21:43:07 +00:00			`static inline void vector_fixmul_scalar(int32_t dst, const int32_t src,`
			`int32_t mul, int len)`
Partial conversion of the wmapro decoder to fixed point arithmetic. Currently inverse quantization & rescaling, imdct and windowing are all in fixed point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27302 a1c6a512-1295-4272-9138-f99709370657 2010-07-05 22:33:37 +00:00			`{`
Submit next part of FS#11498. Unroll loop for minor speedup of libwmapro on ARM (1%). git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27595 a1c6a512-1295-4272-9138-f99709370657 2010-07-28 18:15:53 +00:00			`/* len is _always_ a multiple of 4, because len is the difference of sfb's`
			`* which themselves are always a multiple of 4. */`
Partial conversion of the wmapro decoder to fixed point arithmetic. Currently inverse quantization & rescaling, imdct and windowing are all in fixed point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27302 a1c6a512-1295-4272-9138-f99709370657 2010-07-05 22:33:37 +00:00			`int i;`
Submit next part of FS#11498. Unroll loop for minor speedup of libwmapro on ARM (1%). git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27595 a1c6a512-1295-4272-9138-f99709370657 2010-07-28 18:15:53 +00:00			`for (i=0; i<len; i+=4) {`
			`dst[i ] = fixmul24(src[i ], mul);`
			`dst[i+1] = fixmul24(src[i+1], mul);`
			`dst[i+2] = fixmul24(src[i+2], mul);`
			`dst[i+3] = fixmul24(src[i+3], mul);`
			`}`
Partial conversion of the wmapro decoder to fixed point arithmetic. Currently inverse quantization & rescaling, imdct and windowing are all in fixed point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27302 a1c6a512-1295-4272-9138-f99709370657 2010-07-05 22:33:37 +00:00			`}`
Complete the conversion of WMA Pro to fixed point. Floating point code is still there for better history and to have a returning point in svn should something go wrong. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27402 a1c6a512-1295-4272-9138-f99709370657 2010-07-12 15:41:10 +00:00
Initial cleanup for libwmapro. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27425 a1c6a512-1295-4272-9138-f99709370657 2010-07-15 05:38:09 +00:00			`static inline int av_clip(int a, int amin, int amax)`
			`{`
			`if (a < amin) return amin;`
			`else if (a > amax) return amax;`
			`else return a;`
			`}`
Complete the conversion of WMA Pro to fixed point. Floating point code is still there for better history and to have a returning point in svn should something go wrong. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27402 a1c6a512-1295-4272-9138-f99709370657 2010-07-12 15:41:10 +00:00			`#endif /* _WMAPRO_MATH_H_ */`