From 2e9c77cc2af6a03f1f01866a0c5ce8cfa4e249cf Mon Sep 17 00:00:00 2001 From: Jens Arnold Date: Fri, 19 Oct 2007 07:30:55 +0000 Subject: [PATCH] APE codec: Further optimised filtering yields 3..4% speedup for -c2000 (now 135% realtime), -c3000 (now 97% realtime) and higher modes. Single 32 bit stores are faster than movem/lea in IRAM. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15200 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/demac/libdemac/vector_math16_cf.h | 71 ++++++++++--------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/apps/codecs/demac/libdemac/vector_math16_cf.h b/apps/codecs/demac/libdemac/vector_math16_cf.h index 85258c97a5..937462c293 100644 --- a/apps/codecs/demac/libdemac/vector_math16_cf.h +++ b/apps/codecs/demac/libdemac/vector_math16_cf.h @@ -38,31 +38,34 @@ static inline void vector_add(int16_t* v1, int16_t* v2) "moveq.l %[cnt], %%d5 \n" "1: \n" #endif - "movem.l (%[v1]), %%d0-%%d3 \n" "movem.l (%[v2]), %%a0-%%a3 \n" - - ADDHALFREGS(%%a0, %%d0) - ADDHALFREGS(%%a1, %%d1) - ADDHALFREGS(%%a2, %%d2) - ADDHALFREGS(%%a3, %%d3) - - "movem.l %%d0-%%d3, (%[v1]) \n" - "lea.l (16, %[v1]), %[v1] \n" "movem.l (%[v1]), %%d0-%%d3 \n" + ADDHALFREGS(%%a0, %%d0) + "move.l %%d0, (%[v1])+ \n" + ADDHALFREGS(%%a1, %%d1) + "move.l %%d1, (%[v1])+ \n" + ADDHALFREGS(%%a2, %%d2) + "move.l %%d2, (%[v1])+ \n" + ADDHALFREGS(%%a3, %%d3) + "move.l %%d3, (%[v1])+ \n" + "lea.l (16, %[v2]), %[v2] \n" + "movem.l (%[v2]), %%a0-%%a3 \n" - + "movem.l (%[v1]), %%d0-%%d3 \n" ADDHALFREGS(%%a0, %%d0) + "move.l %%d0, (%[v1])+ \n" ADDHALFREGS(%%a1, %%d1) + "move.l %%d1, (%[v1])+ \n" ADDHALFREGS(%%a2, %%d2) + "move.l %%d2, (%[v1])+ \n" ADDHALFREGS(%%a3, %%d3) - - "movem.l %%d0-%%d3, (%[v1]) \n" + "move.l %%d3, (%[v1])+ \n" #if ORDER > 16 - "lea.l (16, %[v1]), %[v1] \n" "lea.l (16, %[v2]), %[v2] \n" + "subq.l #1, %%d5 \n" - "bne.s 1b \n" + "bne.w 1b \n" #endif : /* outputs */ [v1]"+a"(v1), @@ -89,31 +92,34 @@ static inline void vector_sub(int16_t* v1, int16_t* v2) "moveq.l %[cnt], %%d5 \n" "1: \n" #endif - "movem.l (%[v1]), %%a0-%%a3 \n" "movem.l (%[v2]), %%d1-%%d4 \n" - - SUBHALFREGS(%%a0, %%d1, %%d0) - SUBHALFREGS(%%a1, %%d2, %%d1) - SUBHALFREGS(%%a2, %%d3, %%d2) - SUBHALFREGS(%%a3, %%d4, %%d3) - - "movem.l %%d0-%%d3, (%[v1]) \n" - "lea.l (16, %[v1]), %[v1] \n" "movem.l (%[v1]), %%a0-%%a3 \n" + SUBHALFREGS(%%a0, %%d1, %%d0) + "move.l %%d0, (%[v1])+ \n" + SUBHALFREGS(%%a1, %%d2, %%d1) + "move.l %%d1, (%[v1])+ \n" + SUBHALFREGS(%%a2, %%d3, %%d2) + "move.l %%d2, (%[v1])+ \n" + SUBHALFREGS(%%a3, %%d4, %%d3) + "move.l %%d3, (%[v1])+ \n" + "lea.l (16, %[v2]), %[v2] \n" - "movem.l (%[v2]), %%d1-%%d4 \n" - - SUBHALFREGS(%%a0, %%d1, %%d0) - SUBHALFREGS(%%a1, %%d2, %%d1) - SUBHALFREGS(%%a2, %%d3, %%d2) - SUBHALFREGS(%%a3, %%d4, %%d3) - "movem.l %%d0-%%d3, (%[v1]) \n" + "movem.l (%[v2]), %%d1-%%d4 \n" + "movem.l (%[v1]), %%a0-%%a3 \n" + SUBHALFREGS(%%a0, %%d1, %%d0) + "move.l %%d0, (%[v1])+ \n" + SUBHALFREGS(%%a1, %%d2, %%d1) + "move.l %%d1, (%[v1])+ \n" + SUBHALFREGS(%%a2, %%d3, %%d2) + "move.l %%d2, (%[v1])+ \n" + SUBHALFREGS(%%a3, %%d4, %%d3) + "move.l %%d3, (%[v1])+ \n" #if ORDER > 16 - "lea.l (16, %[v1]), %[v1] \n" "lea.l (16, %[v2]), %[v2] \n" + "subq.l #1, %%d5 \n" - "bne.s 1b \n" + "bne.w 1b \n" #endif : /* outputs */ [v1]"+a"(v1), @@ -160,6 +166,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) #if ORDER > 32 "mac.w %%d2u, %%d3u, (%[v1])+, %%d0, %%acc0\n" "mac.w %%d2l, %%d3l, (%[v2])+, %%d1, %%acc0\n" + "subq.l #1, %[res] \n" "bne.w 1b \n" #else