libtremor: Implement a memory configuration for targets that don't use separate iram for codecs.

Such targets would previously default to using the configuration for targets with small iram which uses an extra memcpy per block.
This saves 2MHz decoding a 128kbps vorbis file on the Gigabeat S and saves a bit of codec buffer.
Patch from FS#11268, also replaces patch from FS#12147.


git-svn-id: svn://svn.rockbox.org/rockbox/trunk@29976 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Nils Wallménius 2011-06-06 13:27:12 +00:00
parent 684b74147f
commit e62b9a9aae
4 changed files with 67 additions and 40 deletions

View file

@ -40,8 +40,8 @@ static int ilog(unsigned int v){
static ogg_int32_t* _pcmp [CHANNELS] IBSS_ATTR;
static ogg_int32_t* _pcmbp[CHANNELS] IBSS_ATTR;
static ogg_int32_t* _pcmret[CHANNELS] IBSS_ATTR;
/* save original pointer returned by malloc so we can free it easily */
static ogg_int32_t* _first_pcm = NULL;
/* save original pointers returned by malloc so we can free it easily */
static ogg_int32_t* pcm_copy[CHANNELS] = {NULL};
/* pcm accumulator examples (not exhaustive):
@ -165,18 +165,21 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
v->vi=vi;
b->modebits=ilog(ci->modes);
#ifdef TREMOR_USE_IRAM
/* allocate IRAM buffer for the PCM data generated by synthesis */
iram_malloc_init();
v->first_pcm = iram_malloc(vi->channels*ci->blocksizes[1]*sizeof(ogg_int32_t));
/* when can't allocate IRAM buffer, allocate normal RAM buffer */
if(v->first_pcm == NULL){
_first_pcm = _ogg_malloc(vi->channels*ci->blocksizes[1]*sizeof(ogg_int32_t));
v->first_pcm= _first_pcm;
if(v->first_pcm == NULL)
#endif
{
pcm_copy[0] = _ogg_malloc(vi->channels*ci->blocksizes[1]*sizeof(ogg_int32_t));
v->first_pcm = pcm_copy[0];
}
v->centerW=0;
/* Vorbis I uses only window type 0 */
b_size[0]=ci->blocksizes[0]/2;
b_size[1]=ci->blocksizes[1]/2;
@ -222,6 +225,8 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
_pcmp[1]=NULL;
_pcmbp[0]=NULL;
_pcmbp[1]=NULL;
#ifdef TREMOR_USE_IRAM
if(NULL != (v->iram_double_pcm = iram_malloc(vi->channels*v->pcm_storage*sizeof(ogg_int32_t))))
{
/* one-time initialisation at codec start or on switch from
@ -230,12 +235,15 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
v->pcm[i]=&v->iram_double_pcm[i*v->pcm_storage];
}
else
#endif
{
/* one-time initialisation at codec start or on switch from
blocksizes that fit in IRAM_PCM_END to those that don't */
/* save copy of the pointer so we can free it easily later */
pcm_copy[1] = _ogg_calloc(vi->channels*v->pcm_storage,sizeof(*v->pcm[i]));
for(i=0;i<vi->channels;i++)
v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i]));
}
v->pcm[i] = pcm_copy[1]+i*v->pcm_storage;
}
/* all 1 (large block) or 0 (small block) */
/* explicitly set for the sake of clarity */
@ -285,10 +293,16 @@ int vorbis_synthesis_restart(vorbis_dsp_state *v){
(if we're using double pcm buffer) and will need to reset them */
v->reset_pcmb = true;
/* also reset our copy of the double buffer pointers if we have one */
#ifdef TREMOR_USE_IRAM
if(v->iram_double_pcm)
{
for(i=0;i<vi->channels;i++)
v->pcm[i]=&v->iram_double_pcm[i*v->pcm_storage];
}
#else
for(i=0;i<vi->channels;i++)
v->pcm[i] = pcm_copy[1]+i*v->pcm_storage;
#endif
return(0);
}
@ -306,14 +320,15 @@ void vorbis_dsp_clear(vorbis_dsp_state *v){
codec_setup_info *ci=(codec_setup_info *)(vi?vi->codec_setup:NULL);
private_state *b=(private_state *)v->backend_state;
if(_first_pcm != NULL)
_ogg_free(_first_pcm);
if(NULL == v->iram_double_pcm && vi != NULL)
if(vi != NULL)
{
/* pcm buffer came from oggmalloc rather than iram */
for(i=0;i<vi->channels;i++)
if(v->pcm[i])_ogg_free(v->pcm[i]);
for(i=0;i<2;i++)
if(pcm_copy[i])
{
_ogg_free(pcm_copy[i]);
pcm_copy[i] = NULL;
}
}
/* free mode lookups; these are actually vorbis_look_mapping structs */
@ -345,7 +360,9 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
private_state *b=v->backend_state;
int j;
#ifdef TREMOR_USE_IRAM
bool iram_pcm_doublebuffer = (NULL != v->iram_double_pcm);
#endif
if(v->pcm_current>v->pcm_returned && v->pcm_returned!=-1)return(OV_EINVAL);
@ -369,16 +386,16 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
int n0=ci->blocksizes[0]/2;
int n1=ci->blocksizes[1]/2;
if(iram_pcm_doublebuffer)
{
prevCenter = ln;
}
else
#ifdef TREMOR_USE_IRAM
if(!iram_pcm_doublebuffer)
{
prevCenter = v->centerW;
v->centerW = n1 - v->centerW;
}
else
#endif
prevCenter = ln;
/* overlap/add PCM */
/* nb nothing to overlap with on first block so don't bother */
if(LIKELY(v->pcm_returned!=-1))
@ -410,20 +427,9 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
}
}
}
#ifdef TREMOR_USE_IRAM
/* the copy section */
if(iram_pcm_doublebuffer)
{
/* just flip the pointers over as we have a double buffer in iram */
ogg_int32_t *p;
p=v->pcm[0];
v->pcm[0]=vb->pcm[0];
vb->pcm[0] = p;
p=v->pcm[1];
v->pcm[1]=vb->pcm[1];
vb->pcm[1] = p;
}
else
if(!iram_pcm_doublebuffer)
{
for(j=0;j<vi->channels;j++)
{
@ -434,7 +440,19 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
vect_copy(v->pcm[j]+v->centerW, vb->pcm[j]+n, n);
}
}
else
#endif
{
/* just flip the pointers over as we have a double buffer in iram */
ogg_int32_t *p;
p=v->pcm[0];
v->pcm[0]=vb->pcm[0];
vb->pcm[0] = p;
p=v->pcm[1];
v->pcm[1]=vb->pcm[1];
vb->pcm[1] = p;
}
/* deal with initial packet state; we do this using the explicit
pcm_returned==-1 flag otherwise we're sensitive to first block
being short or long */
@ -448,7 +466,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
}
}
/* track the frame number... This is for convenience, but also
making sure our last packet doesn't end with added padding. If
the last packet is partial, the number of samples we'll have to

View file

@ -23,9 +23,13 @@
#define ICODE_ATTR_TREMOR_NOT_MDCT ICODE_ATTR
#endif
/* Enable special handling of buffers in faster ram, not usefull when no
such different ram is available. There are 3 different memory configurations
* No special iram, uses double buffers to avoid copying data
* Small special iram, copies buffers to run hottest processing in iram
* Large iram, uses double buffers in iram */
#ifdef USE_IRAM
#define TREMOR_USE_IRAM
#endif
/* Define CPU of large IRAM (PP5022/5024, MCF5250) */
#if (CONFIG_CPU == PP5022) || (CONFIG_CPU == PP5024) || defined(CPU_S5L870X) || (CONFIG_CPU == MCF5250)
@ -34,13 +38,14 @@
* TOTAL : 41984 */
#define IRAM_IBSS_SIZE 41984
/* Define CPU of Normal IRAM (96KB) (and SIM also) */
/* Define CPU of Normal IRAM (96KB) */
#else
/* PCM_BUFFER : 16384 Byte (2048*2*4) *
* WINDOW_LOOKUP : 4608 Byte (128*4 + 1024*4) *
* TOTAL : 20992 */
#define IRAM_IBSS_SIZE 20992
#endif
#endif
/* max 2 channels */
#define CHANNELS 2

View file

@ -79,7 +79,9 @@ typedef struct vorbis_dsp_state{
void *backend_state;
ogg_int32_t *first_pcm; /* PCM buffer (for normal RAM or IRAM)*/
#ifdef TREMOR_USE_IRAM
ogg_int32_t *iram_double_pcm; /* PCM 2nd buffer for IRAM */
#endif
bool reset_pcmb;
} vorbis_dsp_state;

View file

@ -60,6 +60,7 @@ void ogg_free(void* ptr)
tlsf_free(ptr);
}
#ifdef TREMOR_USE_IRAM
/* Allocate IRAM buffer */
static unsigned char iram_buff[IRAM_IBSS_SIZE] IBSS_ATTR MEM_ALIGN_ATTR;
static size_t iram_remain;
@ -82,3 +83,4 @@ void *iram_malloc(size_t size){
return x;
}
#endif