Another minor ARM speedup for libwmapro. Drop lsb of multiplication result in fixmul31(). The difference to current implementation is +/-1 in the output signal. Same routines are used for other codecs and in the codec lib as well.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27728 a1c6a512-1295-4272-9138-f99709370657
2010-08-05 21:59:29 +00:00 · 2010-08-05 21:59:29 +00:00 · 4b49ef2ade
commit 4b49ef2ade
parent cc7fac27b5
1 changed files with 3 additions and 4 deletions
--- a/apps/codecs/libwmapro/wmapro_math.h
+++ b/apps/codecs/libwmapro/wmapro_math.h
@ -53,18 +53,17 @@
        lo; \
     })
     
-    /* Calculates: result = (X*Y)>>31 */
+    /* Calculates: result = (X*Y)>>31, loose 1 bit precision */
    #define fixmul31(X,Y) \
     ({ \
        int32_t lo; \
        int32_t hi; \
        asm volatile ( \
           "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
-           "mov   %[lo], %[lo], lsr #31    \n\t" /* lo >>= 31 */ \
-           "orr   %[lo], %[lo], %[hi], lsl #1"   /* lo |= (hi << 1) */ \
+           "mov   %[hi], %[hi], lsl #1"          /* hi <<= 1 */ \
           : [lo]"=&r"(lo), [hi]"=&r"(hi) \
           : [x]"r"(X), [y]"r"(Y)); \
-        lo; \
+        hi; \
     })
 #elif defined(CPU_COLDFIRE)
    /* Calculates: result = (X*Y)>>Z */