Further optimization and minor clean up of atrac codec: Unroll iqmf_dewindowing for non-ARM speeds up decoder by +10% on H300. Removed some non-used arrays. Codec is still not fully realtime on Coldfire targets.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24648 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Andree Buschmann 2010-02-14 14:38:18 +00:00
parent bb13650d66
commit f5a0d61a1b
3 changed files with 78 additions and 16 deletions

View file

@ -98,6 +98,20 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM;
* @param in input buffer * @param in input buffer
* @param win windowing coefficients * @param win windowing coefficients
* @param nIn size of spectrum buffer * @param nIn size of spectrum buffer
* Reference implementation:
*
* for (j = nIn; j != 0; j--) {
* s1 = fixmul32(in[0], win[0]);
* s2 = fixmul32(in[1], win[1]);
* for (i = 2; i < 48; i += 2) {
* s1 += fixmul31(in[i ], win[i ]);
* s2 += fixmul31(in[i+1], win[i+1]);
* }
* out[0] = s2;
* out[1] = s1;
* in += 2;
* out += 2;
* }
*/ */
#if defined(CPU_ARM) #if defined(CPU_ARM)
@ -116,15 +130,61 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM;
int32_t i, j, s1, s2; int32_t i, j, s1, s2;
for (j = nIn; j != 0; j--) { for (j = nIn; j != 0; j--) {
/* i=0 */ i = 0;
s1 = fixmul31(win[0], in[0]); /* 0.. 7 */
s2 = fixmul31(win[1], in[1]); s1 = fixmul31(win[i], in[i]); i++;
s2 = fixmul31(win[i], in[i]); i++;
/* i=2..46 */ s1 += fixmul31(win[i], in[i]); i++;
for (i = 2; i < 48; i += 2) { s2 += fixmul31(win[i], in[i]); i++;
s1 += fixmul31(win[i ], in[i ]); s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i+1], in[i+1]); s2 += fixmul31(win[i], in[i]); i++;
} s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
/* 8..15 */
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
/* 16..23 */
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
/* 24..31 */
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
/* 32..39 */
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
/* 40..47 */
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]); i++;
s1 += fixmul31(win[i], in[i]); i++;
s2 += fixmul31(win[i], in[i]);
out[0] = s2; out[0] = s2;
out[1] = s1; out[1] = s1;

View file

@ -117,18 +117,18 @@ static const int8_t decTable1[18] = {0,0, 0,1, 0,-1, 1,0, -1,0, 1,1, 1,-1, -1,1,
/* tables for the scalefactor decoding */ /* tables for the scalefactor decoding */
/* not needed anymore
static const float iMaxQuant[8] = { static const float iMaxQuant[8] = {
0.0, 1.0/1.5, 1.0/2.5, 1.0/3.5, 1.0/4.5, 1.0/7.5, 1.0/15.5, 1.0/31.5 0.0, 1.0/1.5, 1.0/2.5, 1.0/3.5, 1.0/4.5, 1.0/7.5, 1.0/15.5, 1.0/31.5
}; };
*/
static const uint16_t subbandTab[33] = { static const uint16_t subbandTab[33] = {
0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224,
256, 288, 320, 352, 384, 416, 448, 480, 512, 576, 640, 704, 768, 896, 1024 256, 288, 320, 352, 384, 416, 448, 480, 512, 576, 640, 704, 768, 896, 1024
}; };
/* transform data */ /* transform data */
/* not needed anymore
static const float qmf_48tap_half[24] = { static const float qmf_48tap_half[24] = {
-0.00001461907, -0.00009205479, -0.000056157569, 0.00030117269, -0.00001461907, -0.00009205479, -0.000056157569, 0.00030117269,
0.0002422519,-0.00085293897, -0.0005205574, 0.0020340169, 0.0002422519,-0.00085293897, -0.0005205574, 0.0020340169,
@ -137,8 +137,9 @@ static const float qmf_48tap_half[24] = {
-0.007801671, -0.034090221, 0.01880949, 0.054326009, -0.007801671, -0.034090221, 0.01880949, 0.054326009,
-0.043596379, -0.099384367, 0.13207909, 0.46424159 -0.043596379, -0.099384367, 0.13207909, 0.46424159
}; };
*/
/* joint stereo related tables */ /* joint stereo related tables */
/* not needed anymore
static const float matrixCoeffs[8] = {0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0}; static const float matrixCoeffs[8] = {0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0};
*/
#endif /* AVCODEC_ATRAC3DATA_H */ #endif /* AVCODEC_ATRAC3DATA_H */

View file

@ -9,6 +9,8 @@
#include "atrac3.h" #include "atrac3.h"
#include "../librm/rm.h" #include "../librm/rm.h"
ATRAC3Context q IBSS_ATTR;
static unsigned char wav_header[44]={ static unsigned char wav_header[44]={
'R','I','F','F',// 0 - ChunkID 'R','I','F','F',// 0 - ChunkID
0,0,0,0, // 4 - ChunkSize (filesize-8) 0,0,0,0, // 4 - ChunkSize (filesize-8)
@ -101,7 +103,6 @@ int main(int argc, char *argv[])
int16_t outbuf[2048]; int16_t outbuf[2048];
uint16_t fs,sps,h; uint16_t fs,sps,h;
uint32_t packet_count; uint32_t packet_count;
ATRAC3Context q;
RMContext rmctx; RMContext rmctx;
RMPacket pkt; RMPacket pkt;