Submit FS#11365. Speed up mp3 decoding on ARM processors. It is faster to use the C-implementation of dct32 compiled with -O1 as the asm implementation of dct32 compiled with -O2. Configuration for Coldfire processors is untouched. In the new configuration the stack of the COP decoding thread needs to be increased on dualcore targets. Speed is increasing by up to 0.9 MHz (-O2 against -O1 using eabi).

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@26746 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Andree Buschmann 2010-06-10 19:02:27 +00:00
parent 2c9f4fad72
commit 00e01e82cb
4 changed files with 15 additions and 4 deletions

View file

@ -14,6 +14,5 @@ imdct_mcf5249.S
#endif
#if defined(CPU_ARM)
imdct_l_arm.S
dct32_arm.S
synth_full_arm.S
#endif

View file

@ -11,7 +11,18 @@
# (one for codec, one for mpegplayer)
# so a little trickery is necessary
MADFLAGS = $(CODECFLAGS) -UDEBUG -DNDEBUG -O2 -I$(APPSDIR)/codecs/libmad -DHAVE_LIMITS_H
# Extract optimization level ('-O') from compile flags. Will be set later.
MADFLAGS = $(filter-out -O%,$(CODECFLAGS)) -I$(APPSDIR)/codecs/libmad
MADFLAGS += -UDEBUG -DNDEBUG -DHAVE_LIMITS_H
# libmad is faster on ARM-targets with -O1 than -O2
ifeq ($(CPU),arm)
MADFLAGS += -O1
else
MADFLAGS += -O2
endif
# MPEGplayer
MPEGMADFLAGS = $(MADFLAGS) -DMPEGPLAYER
# libmad

View file

@ -67,7 +67,8 @@ void mad_synth_mute(struct mad_synth *synth)
}
}
#ifdef FPM_ARM
#if 0 /* dct32 asm implementation is slower on current arm systems */
/* #ifdef FPM_ARM */
void dct32(mad_fixed_t const in[32], unsigned int slot,
mad_fixed_t lo[16][8], mad_fixed_t hi[16][8]);

View file

@ -202,7 +202,7 @@ static void set_elapsed(struct mp3entry* id3)
* Run the synthesis filter on the COProcessor
*/
static int mad_synth_thread_stack[DEFAULT_STACK_SIZE/sizeof(int)/2] IBSS_ATTR;
static int mad_synth_thread_stack[DEFAULT_STACK_SIZE/sizeof(int)] IBSS_ATTR;
static const unsigned char * const mad_synth_thread_name = "mp3dec";
static unsigned int mad_synth_thread_id = 0;