SPC Codec: Run SPC emulation on COP and audio sample processing on CPU on dual-core PortalPlayer targets.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15673 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
984a6b0236
commit
1a41c8afee
5 changed files with 552 additions and 128 deletions
|
@ -162,7 +162,19 @@ struct codec_api ci = {
|
|||
invalidate_icache,
|
||||
#endif
|
||||
|
||||
NULL, /* struct sp_data *dsp */
|
||||
NULL, /* struct dsp_config *dsp */
|
||||
|
||||
#if NUM_CORES > 1
|
||||
create_thread,
|
||||
thread_thaw,
|
||||
thread_wait,
|
||||
semaphore_init,
|
||||
semaphore_wait,
|
||||
semaphore_release,
|
||||
event_init,
|
||||
event_wait,
|
||||
event_set_state,
|
||||
#endif
|
||||
};
|
||||
|
||||
void codec_get_full_path(char *path, const char *codec_root_fn)
|
||||
|
|
|
@ -80,7 +80,7 @@
|
|||
#define CODEC_ENC_MAGIC 0x52454E43 /* RENC */
|
||||
|
||||
/* increase this every time the api struct changes */
|
||||
#define CODEC_API_VERSION 20
|
||||
#define CODEC_API_VERSION 21
|
||||
|
||||
/* update this to latest version if a change to the api struct breaks
|
||||
backwards compatibility (and please take the opportunity to sort in any
|
||||
|
@ -236,6 +236,23 @@ struct codec_api {
|
|||
#endif
|
||||
|
||||
struct dsp_config *dsp;
|
||||
|
||||
#if NUM_CORES > 1
|
||||
struct thread_entry *
|
||||
(*create_thread)(void (*function)(void), void* stack,
|
||||
int stack_size, unsigned flags, const char *name
|
||||
IF_PRIO(, int priority)
|
||||
IF_COP(, unsigned int core));
|
||||
|
||||
void (*thread_thaw)(struct thread_entry *thread);
|
||||
void (*thread_wait)(struct thread_entry *thread);
|
||||
void (*semaphore_init)(struct semaphore *s, int max, int start);
|
||||
void (*semaphore_wait)(struct semaphore *s);
|
||||
void (*semaphore_release)(struct semaphore *s);
|
||||
void (*event_init)(struct event *e, unsigned int flags);
|
||||
void (*event_wait)(struct event *e, unsigned int for_state);
|
||||
void (*event_set_state)(struct event *e, unsigned int state);
|
||||
#endif /* NUM_CORES */
|
||||
};
|
||||
|
||||
/* codec header */
|
||||
|
|
|
@ -185,12 +185,253 @@ static int LoadID666(unsigned char *buf) {
|
|||
}
|
||||
|
||||
/**************** Codec ****************/
|
||||
|
||||
static int32_t samples[WAV_CHUNK_SIZE*2] IBSS_ATTR;
|
||||
|
||||
static struct Spc_Emu spc_emu IDATA_ATTR;
|
||||
|
||||
enum {SAMPLE_RATE = 32000};
|
||||
static struct Spc_Emu spc_emu IDATA_ATTR CACHEALIGN_ATTR;
|
||||
|
||||
#if SPC_DUAL_CORE
|
||||
/** Implementations for pipelined dual-core operation **/
|
||||
static int spc_emu_thread_stack[DEFAULT_STACK_SIZE/sizeof(int)]
|
||||
CACHEALIGN_ATTR;
|
||||
|
||||
static const unsigned char * const spc_emu_thread_name = "spc emu";
|
||||
static struct thread_entry *emu_thread_p;
|
||||
|
||||
enum
|
||||
{
|
||||
SPC_EMU_AUDIO = 0,
|
||||
SPC_EMU_LOAD,
|
||||
SPC_EMU_QUIT,
|
||||
};
|
||||
|
||||
struct spc_load
|
||||
{
|
||||
uint8_t *buf;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
/* sample queue */
|
||||
#define WAV_NUM_CHUNKS 2
|
||||
#define WAV_CHUNK_MASK (WAV_NUM_CHUNKS-1)
|
||||
struct sample_queue_chunk
|
||||
{
|
||||
long id;
|
||||
union
|
||||
{
|
||||
intptr_t data;
|
||||
int32_t audio[WAV_CHUNK_SIZE*2];
|
||||
};
|
||||
};
|
||||
|
||||
static struct
|
||||
{
|
||||
int head, tail;
|
||||
struct semaphore emu_sem_head;
|
||||
struct semaphore emu_sem_tail;
|
||||
struct event emu_evt_reply;
|
||||
intptr_t retval;
|
||||
struct sample_queue_chunk wav_chunk[WAV_NUM_CHUNKS];
|
||||
} sample_queue NOCACHEBSS_ATTR;
|
||||
|
||||
static inline void samples_release_wrbuf(void)
|
||||
{
|
||||
sample_queue.tail++;
|
||||
ci->semaphore_release(&sample_queue.emu_sem_head);
|
||||
}
|
||||
|
||||
static inline struct sample_queue_chunk * samples_get_wrbuf(void)
|
||||
{
|
||||
ci->semaphore_wait(&sample_queue.emu_sem_tail);
|
||||
return &sample_queue.wav_chunk[sample_queue.tail & WAV_CHUNK_MASK];
|
||||
}
|
||||
|
||||
static inline void samples_release_rdbuf(void)
|
||||
{
|
||||
if (sample_queue.head != sample_queue.tail) {
|
||||
sample_queue.head++;
|
||||
}
|
||||
|
||||
ci->semaphore_release(&sample_queue.emu_sem_tail);
|
||||
}
|
||||
|
||||
static inline int32_t * samples_get_rdbuf(void)
|
||||
{
|
||||
ci->semaphore_wait(&sample_queue.emu_sem_head);
|
||||
|
||||
if (ci->stop_codec || ci->new_track)
|
||||
{
|
||||
/* Told to stop. Buffer must be released. */
|
||||
samples_release_rdbuf();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return sample_queue.wav_chunk[sample_queue.head & WAV_CHUNK_MASK].audio;
|
||||
}
|
||||
|
||||
static intptr_t emu_thread_send_msg(long id, intptr_t data)
|
||||
{
|
||||
struct sample_queue_chunk *chunk;
|
||||
/* Grab an audio output buffer */
|
||||
ci->semaphore_wait(&sample_queue.emu_sem_head);
|
||||
chunk = &sample_queue.wav_chunk[sample_queue.head & WAV_CHUNK_MASK];
|
||||
/* Place a message in it instead of audio */
|
||||
chunk->id = id;
|
||||
chunk->data = data;
|
||||
/* Release it to the emu thread */
|
||||
samples_release_rdbuf();
|
||||
/* Wait for a response */
|
||||
ci->event_wait(&sample_queue.emu_evt_reply, STATE_SIGNALED);
|
||||
return sample_queue.retval;
|
||||
}
|
||||
|
||||
/* thread function */
|
||||
static bool emu_thread_process_msg(struct sample_queue_chunk *chunk)
|
||||
{
|
||||
long id = chunk->id;
|
||||
bool ret = id != SPC_EMU_QUIT;
|
||||
|
||||
chunk->id = SPC_EMU_AUDIO; /* Reset chunk type to audio */
|
||||
sample_queue.retval = 0;
|
||||
|
||||
if (id == SPC_EMU_LOAD)
|
||||
{
|
||||
struct spc_load *ld = (struct spc_load *)chunk->data;
|
||||
invalidate_icache();
|
||||
SPC_Init(&spc_emu);
|
||||
sample_queue.retval = SPC_load_spc(&spc_emu, ld->buf, ld->size);
|
||||
}
|
||||
|
||||
/* Empty the audio queue */
|
||||
/* This is a dirty hack a timeout based wait would make unnescessary but
|
||||
still safe because the other thread is known to be waiting for a reply
|
||||
and is not using the objects. */
|
||||
ci->semaphore_init(&sample_queue.emu_sem_tail, 2, 2);
|
||||
ci->semaphore_init(&sample_queue.emu_sem_head, 2, 0);
|
||||
sample_queue.head = sample_queue.tail = 0;
|
||||
ci->event_set_state(&sample_queue.emu_evt_reply, STATE_SIGNALED);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void spc_emu_thread(void)
|
||||
{
|
||||
CPU_Init(&spc_emu);
|
||||
|
||||
while (1) {
|
||||
/* get a buffer for output */
|
||||
struct sample_queue_chunk *chunk = samples_get_wrbuf();
|
||||
|
||||
if (chunk->id != SPC_EMU_AUDIO) {
|
||||
/* This chunk doesn't contain audio but a command */
|
||||
if (!emu_thread_process_msg(chunk))
|
||||
break;
|
||||
/* Have to re-get this pointer to keep semaphore counts correct */
|
||||
continue;
|
||||
}
|
||||
|
||||
ENTER_TIMER(render);
|
||||
/* fill samples buffer */
|
||||
if ( SPC_play(&spc_emu, WAV_CHUNK_SIZE*2, chunk->audio) )
|
||||
assert( false );
|
||||
EXIT_TIMER(render);
|
||||
|
||||
/* done so release it to output */
|
||||
samples_release_wrbuf();
|
||||
ci->yield();
|
||||
}
|
||||
}
|
||||
|
||||
static bool spc_emu_start(void)
|
||||
{
|
||||
emu_thread_p = ci->create_thread(spc_emu_thread, spc_emu_thread_stack,
|
||||
sizeof(spc_emu_thread_stack), CREATE_THREAD_FROZEN,
|
||||
spc_emu_thread_name IF_PRIO(, PRIORITY_PLAYBACK), COP);
|
||||
|
||||
if (emu_thread_p == NULL)
|
||||
return false;
|
||||
|
||||
/* Initialize audio queue as full to prevent emu thread from trying to run the
|
||||
emulator before loading something */
|
||||
ci->event_init(&sample_queue.emu_evt_reply,
|
||||
EVENT_AUTOMATIC | STATE_NONSIGNALED);
|
||||
ci->semaphore_init(&sample_queue.emu_sem_tail, 2, 0);
|
||||
ci->semaphore_init(&sample_queue.emu_sem_head, 2, 2);
|
||||
sample_queue.head = 0;
|
||||
sample_queue.tail = 2;
|
||||
|
||||
/* Start it running */
|
||||
ci->thread_thaw(emu_thread_p);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* load a new program on the emu thread */
|
||||
static inline int load_spc_buffer(uint8_t *buf, size_t size)
|
||||
{
|
||||
struct spc_load ld = { buf, size };
|
||||
flush_icache();
|
||||
return emu_thread_send_msg(SPC_EMU_LOAD, (intptr_t)&ld);
|
||||
}
|
||||
|
||||
static inline void spc_emu_quit(void)
|
||||
{
|
||||
emu_thread_send_msg(SPC_EMU_QUIT, 0);
|
||||
/* Wait for emu thread to be killed */
|
||||
ci->thread_wait(emu_thread_p);
|
||||
}
|
||||
|
||||
static inline bool spc_play_get_samples(int32_t **samples)
|
||||
{
|
||||
/* obtain filled samples buffer */
|
||||
*samples = samples_get_rdbuf();
|
||||
return *samples != NULL;
|
||||
}
|
||||
|
||||
static inline void spc_play_send_samples(int32_t *samples)
|
||||
{
|
||||
ci->pcmbuf_insert(samples, samples+WAV_CHUNK_SIZE, WAV_CHUNK_SIZE);
|
||||
/* done with chunk so release it to emu thread */
|
||||
samples_release_rdbuf();
|
||||
}
|
||||
|
||||
#else /* !SPC_DUAL_CORE */
|
||||
/** Implementations for single-core operation **/
|
||||
int32_t wav_chunk[WAV_CHUNK_SIZE*2] IBSS_ATTR;
|
||||
|
||||
/* load a new program into emu */
|
||||
static inline int load_spc_buffer(uint8_t *buf, size_t size)
|
||||
{
|
||||
SPC_Init(&spc_emu);
|
||||
return SPC_load_spc(&spc_emu, buf, size);
|
||||
}
|
||||
|
||||
static inline bool spc_emu_start(void)
|
||||
{
|
||||
#ifdef CPU_COLDFIRE
|
||||
/* signed integer mode with saturation */
|
||||
coldfire_set_macsr(EMAC_SATURATE);
|
||||
#endif
|
||||
CPU_Init(&spc_emu);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void spc_play_send_samples(int32_t *samples)
|
||||
{
|
||||
ci->pcmbuf_insert(samples, samples+WAV_CHUNK_SIZE, WAV_CHUNK_SIZE);
|
||||
}
|
||||
|
||||
#define spc_emu_quit()
|
||||
#define samples_release_rdbuf()
|
||||
|
||||
static inline bool spc_play_get_samples(int32_t **samples)
|
||||
{
|
||||
ENTER_TIMER(render);
|
||||
/* fill samples buffer */
|
||||
if ( SPC_play(&spc_emu,WAV_CHUNK_SIZE*2,wav_chunk) )
|
||||
assert( false );
|
||||
EXIT_TIMER(render);
|
||||
*samples = wav_chunk;
|
||||
return true;
|
||||
}
|
||||
#endif /* SPC_DUAL_CORE */
|
||||
|
||||
/* The main decoder loop */
|
||||
static int play_track( void )
|
||||
|
@ -206,7 +447,7 @@ static int play_track( void )
|
|||
fadedec=0x7fffffffl/(fadeendsample-fadestartsample)+1;
|
||||
|
||||
ENTER_TIMER(total);
|
||||
|
||||
|
||||
while ( 1 )
|
||||
{
|
||||
ci->yield();
|
||||
|
@ -224,14 +465,12 @@ static int play_track( void )
|
|||
}
|
||||
ci->seek_complete();
|
||||
}
|
||||
|
||||
ENTER_TIMER(render);
|
||||
/* fill samples buffer */
|
||||
if ( SPC_play(&spc_emu,WAV_CHUNK_SIZE*2,samples) )
|
||||
assert( false );
|
||||
EXIT_TIMER(render);
|
||||
|
||||
sampleswritten+=WAV_CHUNK_SIZE;
|
||||
|
||||
int32_t *samples;
|
||||
if (!spc_play_get_samples(&samples))
|
||||
break;
|
||||
|
||||
sampleswritten += WAV_CHUNK_SIZE;
|
||||
|
||||
/* is track timed? */
|
||||
if (ci->global_settings->repeat_mode!=REPEAT_ONE && ci->id3->length) {
|
||||
|
@ -241,11 +480,11 @@ static int play_track( void )
|
|||
/* fade? */
|
||||
if (curtime>ID666.length)
|
||||
{
|
||||
#ifdef CPU_COLDFIRE
|
||||
#ifdef CPU_COLDFIRE
|
||||
/* Have to switch modes to do this */
|
||||
long macsr = coldfire_get_macsr();
|
||||
coldfire_set_macsr(EMAC_SATURATE | EMAC_FRACTIONAL | EMAC_ROUND);
|
||||
#endif
|
||||
#endif
|
||||
int i;
|
||||
for (i=0;i<WAV_CHUNK_SIZE;i++) {
|
||||
if (lasttimesample+i>fadestartsample) {
|
||||
|
@ -256,42 +495,43 @@ static int play_track( void )
|
|||
fadevol-=fadedec;
|
||||
}
|
||||
}
|
||||
#ifdef CPU_COLDFIRE
|
||||
#ifdef CPU_COLDFIRE
|
||||
coldfire_set_macsr(macsr);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
/* end? */
|
||||
if (lasttimesample>=fadeendsample)
|
||||
{
|
||||
samples_release_rdbuf();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ci->pcmbuf_insert(samples, samples+WAV_CHUNK_SIZE, WAV_CHUNK_SIZE);
|
||||
spc_play_send_samples(samples);
|
||||
|
||||
if (ci->global_settings->repeat_mode!=REPEAT_ONE)
|
||||
ci->set_elapsed(sampleswritten*1000LL/SAMPLE_RATE);
|
||||
ci->set_elapsed(sampleswritten*1000LL/SAMPLE_RATE);
|
||||
else
|
||||
ci->set_elapsed(0);
|
||||
}
|
||||
|
||||
EXIT_TIMER(total);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* this is the codec entry point */
|
||||
enum codec_status codec_main(void)
|
||||
{
|
||||
#ifdef CPU_COLDFIRE
|
||||
/* signed integer mode with saturation */
|
||||
coldfire_set_macsr(EMAC_SATURATE);
|
||||
#endif
|
||||
CPU_Init(&spc_emu);
|
||||
enum codec_status stat = CODEC_ERROR;
|
||||
|
||||
if (!spc_emu_start())
|
||||
goto codec_quit;
|
||||
|
||||
do
|
||||
{
|
||||
DEBUGF("SPC: next_track\n");
|
||||
if (codec_init()) {
|
||||
return CODEC_ERROR;
|
||||
goto codec_quit;
|
||||
}
|
||||
DEBUGF("SPC: after init\n");
|
||||
|
||||
|
@ -301,7 +541,7 @@ enum codec_status codec_main(void)
|
|||
|
||||
/* wait for track info to load */
|
||||
while (!*ci->taginfo_ready && !ci->stop_codec)
|
||||
ci->sleep(1);
|
||||
ci->yield();
|
||||
|
||||
codec_set_replaygain(ci->id3);
|
||||
|
||||
|
@ -313,20 +553,19 @@ enum codec_status codec_main(void)
|
|||
size_t buffersize;
|
||||
uint8_t* buffer = ci->request_buffer(&buffersize, ci->filesize);
|
||||
if (!buffer) {
|
||||
return CODEC_ERROR;
|
||||
goto codec_quit;
|
||||
}
|
||||
|
||||
DEBUGF("SPC: read size = 0x%lx\n",(unsigned long)buffersize);
|
||||
do
|
||||
{
|
||||
SPC_Init(&spc_emu);
|
||||
if (SPC_load_spc(&spc_emu,buffer,buffersize)) {
|
||||
if (load_spc_buffer(buffer, buffersize)) {
|
||||
DEBUGF("SPC load failure\n");
|
||||
return CODEC_ERROR;
|
||||
goto codec_quit;
|
||||
}
|
||||
|
||||
LoadID666(buffer+0x2e);
|
||||
|
||||
|
||||
if (ci->global_settings->repeat_mode!=REPEAT_ONE && ID666.length==0) {
|
||||
ID666.length=3*60*1000; /* 3 minutes */
|
||||
ID666.fade=5*1000; /* 5 seconds */
|
||||
|
@ -340,12 +579,16 @@ enum codec_status codec_main(void)
|
|||
|
||||
reset_profile_timers();
|
||||
}
|
||||
|
||||
while ( play_track() );
|
||||
|
||||
print_timers(ci->id3->path);
|
||||
}
|
||||
while ( ci->request_next_track() );
|
||||
|
||||
stat = CODEC_OK;
|
||||
|
||||
codec_quit:
|
||||
spc_emu_quit();
|
||||
|
||||
return CODEC_OK;
|
||||
return stat;
|
||||
}
|
||||
|
|
|
@ -32,38 +32,51 @@
|
|||
|
||||
/** Basic configuration options **/
|
||||
|
||||
#define SPC_DUAL_CORE 1
|
||||
|
||||
#if !defined(SPC_DUAL_CORE) || NUM_CORES == 1
|
||||
#undef SPC_DUAL_CORE
|
||||
#define SPC_DUAL_CORE 0
|
||||
#endif
|
||||
|
||||
/* TGB is the only target fast enough for gaussian and realtime BRR decode */
|
||||
/* echo is almost fast enough but not quite */
|
||||
#ifndef TOSHIBA_GIGABEAT_F
|
||||
/* Cache BRR waves */
|
||||
#define SPC_BRRCACHE 1
|
||||
|
||||
/* Disable gaussian interpolation */
|
||||
#define SPC_NOINTERP 1
|
||||
|
||||
#ifndef CPU_COLDFIRE
|
||||
/* Disable echo processing */
|
||||
#define SPC_NOECHO 1
|
||||
#else
|
||||
/* Enable echo processing */
|
||||
#define SPC_NOECHO 0
|
||||
#endif
|
||||
#else
|
||||
#if defined(TOSHIBA_GIGABEAT_F) || defined(SIMULATOR)
|
||||
/* Don't cache BRR waves */
|
||||
#define SPC_BRRCACHE 0
|
||||
|
||||
/* Allow gaussian interpolation */
|
||||
#define SPC_NOINTERP 0
|
||||
|
||||
|
||||
/* Allow echo processing */
|
||||
#define SPC_NOECHO 0
|
||||
#endif
|
||||
#elif defined(CPU_COLDFIRE)
|
||||
/* Cache BRR waves */
|
||||
#define SPC_BRRCACHE 1
|
||||
|
||||
/* Disable gaussian interpolation */
|
||||
#define SPC_NOINTERP 1
|
||||
|
||||
/* Samples per channel per iteration */
|
||||
#ifdef CPU_COLDFIRE
|
||||
#define WAV_CHUNK_SIZE 1024
|
||||
/* Allow echo processing */
|
||||
#define SPC_NOECHO 0
|
||||
#elif defined (CPU_PP) && SPC_DUAL_CORE
|
||||
/* Cache BRR waves */
|
||||
#define SPC_BRRCACHE 1
|
||||
|
||||
/* Disable gaussian interpolation */
|
||||
#define SPC_NOINTERP 1
|
||||
|
||||
/* Allow echo processing */
|
||||
#define SPC_NOECHO 0
|
||||
#else
|
||||
#define WAV_CHUNK_SIZE 2048
|
||||
/* Cache BRR waves */
|
||||
#define SPC_BRRCACHE 1
|
||||
|
||||
/* Disable gaussian interpolation */
|
||||
#define SPC_NOINTERP 1
|
||||
|
||||
/* Disable echo processing */
|
||||
#define SPC_NOECHO 1
|
||||
#endif
|
||||
|
||||
#ifdef CPU_ARM
|
||||
|
@ -72,6 +85,26 @@
|
|||
|
||||
#undef IDATA_ATTR
|
||||
#define IDATA_ATTR
|
||||
|
||||
#undef ICONST_ATTR
|
||||
#define ICONST_ATTR
|
||||
|
||||
#undef IBSS_ATTR
|
||||
#define IBSS_ATTR
|
||||
|
||||
#if SPC_DUAL_CORE
|
||||
#undef NOCACHEBSS_ATTR
|
||||
#define NOCACHEBSS_ATTR __attribute__ ((section(".ibss")))
|
||||
#undef NOCACHEDATA_ATTR
|
||||
#define NOCACHEDATA_ATTR __attribute__((section(".idata")))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Samples per channel per iteration */
|
||||
#if defined(CPU_PP) && NUM_CORES == 1
|
||||
#define WAV_CHUNK_SIZE 2048
|
||||
#else
|
||||
#define WAV_CHUNK_SIZE 1024
|
||||
#endif
|
||||
|
||||
/**************** Little-endian handling ****************/
|
||||
|
@ -231,16 +264,26 @@ extern int16_t BRRcache [BRR_CACHE_SIZE];
|
|||
|
||||
enum { FIR_BUF_HALF = 8 };
|
||||
|
||||
#ifdef CPU_COLDFIRE
|
||||
#if defined(CPU_COLDFIRE)
|
||||
/* global because of the large aligment requirement for hardware masking -
|
||||
* L-R interleaved 16-bit samples for easy loading and mac.w use.
|
||||
*/
|
||||
enum
|
||||
{
|
||||
FIR_BUF_SIZE = FIR_BUF_HALF * sizeof ( int32_t ),
|
||||
FIR_BUF_MASK = ~FIR_BUF_SIZE
|
||||
FIR_BUF_CNT = FIR_BUF_HALF,
|
||||
FIR_BUF_SIZE = FIR_BUF_CNT * sizeof ( int32_t ),
|
||||
FIR_BUF_ALIGN = FIR_BUF_SIZE * 2,
|
||||
FIR_BUF_MASK = ~((FIR_BUF_ALIGN / 2) | (sizeof ( int32_t ) - 1))
|
||||
};
|
||||
#endif /* CPU_COLDFIRE */
|
||||
#elif defined (CPU_ARM)
|
||||
enum
|
||||
{
|
||||
FIR_BUF_CNT = FIR_BUF_HALF * 2 * 2,
|
||||
FIR_BUF_SIZE = FIR_BUF_CNT * sizeof ( int32_t ),
|
||||
FIR_BUF_ALIGN = FIR_BUF_SIZE,
|
||||
FIR_BUF_MASK = ~((FIR_BUF_ALIGN / 2) | (sizeof ( int32_t ) * 2 - 1))
|
||||
};
|
||||
#endif /* CPU_* */
|
||||
|
||||
struct Spc_Dsp
|
||||
{
|
||||
|
@ -257,14 +300,19 @@ struct Spc_Dsp
|
|||
int noise_count;
|
||||
uint16_t noise; /* also read as int16_t */
|
||||
|
||||
#ifdef CPU_COLDFIRE
|
||||
#if defined(CPU_COLDFIRE)
|
||||
/* circularly hardware masked address */
|
||||
int32_t *fir_ptr;
|
||||
/* wrapped address just behind current position -
|
||||
allows mac.w to increment and mask fir_ptr */
|
||||
int32_t *last_fir_ptr;
|
||||
/* copy of echo FIR constants as int16_t for use with mac.w */
|
||||
int16_t fir_coeff[VOICE_COUNT];
|
||||
int16_t fir_coeff [VOICE_COUNT];
|
||||
#elif defined (CPU_ARM)
|
||||
/* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */
|
||||
int32_t *fir_ptr;
|
||||
/* copy of echo FIR constants as int32_t, for faster access */
|
||||
int32_t fir_coeff [VOICE_COUNT];
|
||||
#else
|
||||
/* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */
|
||||
int fir_pos; /* (0 to 7) */
|
||||
|
|
|
@ -25,14 +25,13 @@
|
|||
#include "spc_codec.h"
|
||||
#include "spc_profiler.h"
|
||||
|
||||
#ifdef CPU_COLDFIRE
|
||||
static int32_t fir_buf[FIR_BUF_HALF]
|
||||
__attribute__ ((aligned (FIR_BUF_SIZE*2))) IBSS_ATTR;
|
||||
#if defined(CPU_COLDFIRE) || defined (CPU_ARM)
|
||||
int32_t fir_buf[FIR_BUF_CNT]
|
||||
__attribute__ ((aligned (FIR_BUF_ALIGN*1))) IBSS_ATTR;
|
||||
#endif
|
||||
|
||||
#if SPC_BRRCACHE
|
||||
/* a little extra for samples that go past end */
|
||||
int16_t BRRcache [0x20000 + 32];
|
||||
int16_t BRRcache [BRR_CACHE_SIZE];
|
||||
#endif
|
||||
|
||||
void DSP_write( struct Spc_Dsp* this, int i, int data )
|
||||
|
@ -58,11 +57,12 @@ void DSP_write( struct Spc_Dsp* this, int i, int data )
|
|||
|
||||
/* if ( n < -32768 ) out = -32768; */
|
||||
/* if ( n > 32767 ) out = 32767; */
|
||||
#define CLAMP16( n, out )\
|
||||
{\
|
||||
if ( (int16_t) n != n )\
|
||||
out = 0x7FFF ^ (n >> 31);\
|
||||
}
|
||||
#define CLAMP16( n ) \
|
||||
({ \
|
||||
if ( (int16_t) n != n ) \
|
||||
n = 0x7FFF ^ (n >> 31); \
|
||||
n; \
|
||||
})
|
||||
|
||||
#if SPC_BRRCACHE
|
||||
static void decode_brr( struct Spc_Dsp* this, unsigned start_addr,
|
||||
|
@ -181,7 +181,7 @@ static void decode_brr( struct Spc_Dsp* this, unsigned start_addr,
|
|||
smp2 = smp1;
|
||||
}
|
||||
|
||||
CLAMP16( delta, delta );
|
||||
delta = CLAMP16( delta );
|
||||
smp1 = (int16_t) (delta * 2); /* sign-extend */
|
||||
}
|
||||
while ( (offset += 4) != 0 );
|
||||
|
@ -359,7 +359,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
|||
#define VOICE_RATE(x) (INT16A(raw_voice->rate) & 0x3FFF)
|
||||
#define IF_RBE(...)
|
||||
#endif /* ROCKBOX_BIG_ENDIAN */
|
||||
|
||||
|
||||
#if !SPC_NOINTERP
|
||||
int const slow_gaussian = (this->r.g.pitch_mods >> 1) |
|
||||
this->r.g.noise_enables;
|
||||
|
@ -431,7 +431,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
|||
|
||||
/* Envelope */
|
||||
{
|
||||
int const env_range = 0x800;
|
||||
int const ENV_RANGE = 0x800;
|
||||
int env_mode = voice->env_mode;
|
||||
int adsr0 = raw_voice->adsr [0];
|
||||
int env_timer;
|
||||
|
@ -482,14 +482,14 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
|||
|
||||
int envx = voice->envx;
|
||||
|
||||
int const step = env_range / 64;
|
||||
int const step = ENV_RANGE / 64;
|
||||
envx += step;
|
||||
if ( t == 15 )
|
||||
envx += env_range / 2 - step;
|
||||
envx += ENV_RANGE / 2 - step;
|
||||
|
||||
if ( envx >= env_range )
|
||||
if ( envx >= ENV_RANGE )
|
||||
{
|
||||
envx = env_range - 1;
|
||||
envx = ENV_RANGE - 1;
|
||||
voice->env_mode = state_decay;
|
||||
}
|
||||
voice->envx = envx;
|
||||
|
@ -516,7 +516,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
|||
int mode = t >> 5;
|
||||
if ( mode <= 5 ) /* decay */
|
||||
{
|
||||
int step = env_range / 64;
|
||||
int step = ENV_RANGE / 64;
|
||||
if ( mode == 5 ) /* exponential */
|
||||
{
|
||||
envx--; /* envx *= 255 / 256 */
|
||||
|
@ -531,14 +531,14 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
|||
}
|
||||
else /* attack */
|
||||
{
|
||||
int const step = env_range / 64;
|
||||
int const step = ENV_RANGE / 64;
|
||||
envx += step;
|
||||
if ( mode == 7 &&
|
||||
envx >= env_range * 3 / 4 + step )
|
||||
envx += env_range / 256 - step;
|
||||
envx >= ENV_RANGE * 3 / 4 + step )
|
||||
envx += ENV_RANGE / 256 - step;
|
||||
|
||||
if ( envx >= env_range )
|
||||
envx = env_range - 1;
|
||||
if ( envx >= ENV_RANGE )
|
||||
envx = ENV_RANGE - 1;
|
||||
}
|
||||
voice->envx = envx;
|
||||
/* TODO: should this be 8? */
|
||||
|
@ -550,7 +550,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
|||
else /* state_release */
|
||||
{
|
||||
int envx = voice->envx;
|
||||
if ( (envx -= env_range / 256) > 0 )
|
||||
if ( (envx -= ENV_RANGE / 256) > 0 )
|
||||
{
|
||||
voice->envx = envx;
|
||||
raw_voice->envx = envx >> 8;
|
||||
|
@ -683,7 +683,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
|||
smp2 = smp1;
|
||||
}
|
||||
|
||||
CLAMP16( delta, delta );
|
||||
delta = CLAMP16( delta );
|
||||
smp1 = (int16_t) (delta * 2); /* sign-extend */
|
||||
}
|
||||
while ( (offset += 4) != 0 );
|
||||
|
@ -778,7 +778,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
|||
output = (output + rev [1] * interp [2]) >> 12;
|
||||
output = (int16_t) (output * 2);
|
||||
output += ((rev [0] * interp [3]) >> 12) * 2;
|
||||
CLAMP16( output, output );
|
||||
output = CLAMP16( output );
|
||||
}
|
||||
output = (output * voice->envx) >> 11 & ~1;
|
||||
|
||||
|
@ -788,7 +788,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
|||
prev_outx = output;
|
||||
raw_voice->outx = (int8_t) (output >> 8);
|
||||
}
|
||||
#else
|
||||
#else /* SPCNOINTERP */
|
||||
/* two-point linear interpolation */
|
||||
#ifdef CPU_COLDFIRE
|
||||
int amp_0 = (int16_t)this->noise;
|
||||
|
@ -822,7 +822,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
|||
/* output = y0 + (result >> 12) */
|
||||
"asr.l %[sh], %[y1] \r\n"
|
||||
"add.l %[y0], %[y1] \r\n"
|
||||
: [f]"+&d"(f), [y0]"=&a"(y0), [y1]"=&d"(amp_0)
|
||||
: [f]"+d"(f), [y0]"=&a"(y0), [y1]"=&d"(amp_0)
|
||||
: [s]"a"(voice->samples), [sh]"d"(12)
|
||||
);
|
||||
}
|
||||
|
@ -861,17 +861,49 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
|||
"movclr.l %%acc1, %[amp_1] \r\n"
|
||||
: [amp_0]"=r"(amp_0), [amp_1]"=r"(amp_1)
|
||||
);
|
||||
#else
|
||||
#elif defined (CPU_ARM)
|
||||
int amp_0, amp_1;
|
||||
|
||||
if ( (this->r.g.noise_enables & vbit) != 0 ) {
|
||||
amp_0 = *(int16_t *)&this->noise;
|
||||
} else {
|
||||
uint32_t f = voice->position;
|
||||
amp_0 = (uint32_t)voice->samples;
|
||||
|
||||
/* Try this one out on ARM and see - similar to above but the asm
|
||||
on coldfire removes a redundant register load worth 1 or 2%;
|
||||
switching to loading two samples at once may help too. That's
|
||||
done above and while 6 to 7% faster on cf over two 16 bit loads
|
||||
it makes it endian dependant.
|
||||
|
||||
measured small improvement (~1.5%) - hcs
|
||||
*/
|
||||
asm volatile(
|
||||
"mov %[y1], %[f], lsr #12 \r\n"
|
||||
"eor %[f], %[f], %[y1], lsl #12 \r\n"
|
||||
"add %[y1], %[y0], %[y1], lsl #1 \r\n"
|
||||
"ldrsh %[y0], [%[y1], #2] \r\n"
|
||||
"ldrsh %[y1], [%[y1], #4] \r\n"
|
||||
"sub %[y1], %[y1], %[y0] \r\n"
|
||||
"mul %[f], %[y1], %[f] \r\n"
|
||||
"add %[y0], %[y0], %[f], asr #12 \r\n"
|
||||
: [f]"+r"(f), [y0]"+r"(amp_0), [y1]"=&r"(amp_1)
|
||||
);
|
||||
}
|
||||
|
||||
voice->position += rate;
|
||||
|
||||
asm volatile(
|
||||
"mul %[amp_1], %[amp_0], %[envx] \r\n"
|
||||
"mov %[amp_0], %[amp_1], asr #11 \r\n"
|
||||
"mov %[amp_1], %[amp_0], asr #8 \r\n"
|
||||
: [amp_0]"+r"(amp_0), [amp_1]"=&r"(amp_1)
|
||||
: [envx]"r"(voice->envx)
|
||||
);
|
||||
|
||||
prev_outx = amp_0;
|
||||
raw_voice->outx = (int8_t)amp_1;
|
||||
|
||||
asm volatile(
|
||||
"mul %[amp_1], %[amp_0], %[vol_1] \r\n"
|
||||
"mul %[amp_0], %[vol_0], %[amp_0] \r\n"
|
||||
: [amp_0]"+r"(amp_0), [amp_1]"+r"(amp_1)
|
||||
: [vol_0]"r"((int)voice->volume[0]),
|
||||
[vol_1]"r"((int)voice->volume[1])
|
||||
);
|
||||
#else /* Unoptimized CPU */
|
||||
int output;
|
||||
|
||||
if ( (this->r.g.noise_enables & vbit) == 0 )
|
||||
|
@ -884,19 +916,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
|||
}
|
||||
|
||||
voice->position += rate;
|
||||
|
||||
/* old version */
|
||||
#if 0
|
||||
int fraction = voice->position & 0xFFF;
|
||||
short const* const pos = voice->samples + (voice->position >> 12);
|
||||
voice->position += rate;
|
||||
int output =
|
||||
(pos [2] * fraction + pos [1] * (0x1000 - fraction)) >> 12;
|
||||
/* no interpolation (hardly faster, and crappy sounding) */
|
||||
/*int output = pos [0];*/
|
||||
if ( this->r.g.noise_enables & vbit )
|
||||
output = *(int16_t*) &this->noise;
|
||||
#endif
|
||||
|
||||
output = (output * voice->envx) >> 11;
|
||||
|
||||
/* duplicated here to give compiler more to run in parallel */
|
||||
|
@ -905,8 +925,8 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
|||
|
||||
prev_outx = output;
|
||||
raw_voice->outx = (int8_t) (output >> 8);
|
||||
#endif /* CPU_COLDFIRE */
|
||||
#endif
|
||||
#endif /* CPU_* */
|
||||
#endif /* SPCNOINTERP */
|
||||
|
||||
#if SPC_BRRCACHE
|
||||
if ( voice->position >= voice->wave_end )
|
||||
|
@ -1033,7 +1053,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
|||
"or.l %[sh], %[e0] \r\n"
|
||||
/* save final feedback into echo buffer */
|
||||
"move.l %[e0], (%[echo_ptr]) \r\n"
|
||||
: [e0]"+&d"(echo_0), [e1]"+&d"(echo_1)
|
||||
: [e0]"+d"(echo_0), [e1]"+d"(echo_1)
|
||||
: [out_0]"r"(out_0), [out_1]"r"(out_1),
|
||||
[ef]"r"((int)this->r.g.echo_feedback),
|
||||
[echo_ptr]"a"((int32_t *)echo_ptr),
|
||||
|
@ -1056,7 +1076,88 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
|||
out_buf [ 0] = out_0;
|
||||
out_buf [WAV_CHUNK_SIZE] = out_1;
|
||||
out_buf ++;
|
||||
#else /* !CPU_COLDFIRE */
|
||||
#elif defined (CPU_ARM)
|
||||
/* Read feedback from echo buffer */
|
||||
int echo_pos = this->echo_pos;
|
||||
uint8_t* const echo_ptr = RAM +
|
||||
((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF);
|
||||
echo_pos += 4;
|
||||
if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 )
|
||||
echo_pos = 0;
|
||||
this->echo_pos = echo_pos;
|
||||
|
||||
int fb_0 = GET_LE16SA( echo_ptr );
|
||||
int fb_1 = GET_LE16SA( echo_ptr + 2 );
|
||||
|
||||
/* Keep last 8 samples */
|
||||
int32_t *fir_ptr = this->fir_ptr;
|
||||
|
||||
/* Apply FIR */
|
||||
asm volatile (
|
||||
"str %[fb_0], [%[fir_p]], #4 \r\n"
|
||||
"str %[fb_1], [%[fir_p]], #4 \r\n"
|
||||
/* duplicate at +8 eliminates wrap checking below */
|
||||
"str %[fb_0], [%[fir_p], #56] \r\n"
|
||||
"str %[fb_1], [%[fir_p], #60] \r\n"
|
||||
: [fir_p]"+r"(fir_ptr)
|
||||
: [fb_0]"r"(fb_0), [fb_1]"r"(fb_1)
|
||||
);
|
||||
|
||||
this->fir_ptr = (int32_t *)((intptr_t)fir_ptr & FIR_BUF_MASK);
|
||||
int32_t *fir_coeff = this->fir_coeff;
|
||||
|
||||
asm volatile (
|
||||
"ldmia %[fir_c]!, { r0-r1 } \r\n"
|
||||
"ldmia %[fir_p]!, { r4-r5 } \r\n"
|
||||
"mul %[fb_0], r0, %[fb_0] \r\n"
|
||||
"mul %[fb_1], r0, %[fb_1] \r\n"
|
||||
"mla %[fb_0], r4, r1, %[fb_0] \r\n"
|
||||
"mla %[fb_1], r5, r1, %[fb_1] \r\n"
|
||||
"ldmia %[fir_c]!, { r0-r1 } \r\n"
|
||||
"ldmia %[fir_p]!, { r2-r5 } \r\n"
|
||||
"mla %[fb_0], r2, r0, %[fb_0] \r\n"
|
||||
"mla %[fb_1], r3, r0, %[fb_1] \r\n"
|
||||
"mla %[fb_0], r4, r1, %[fb_0] \r\n"
|
||||
"mla %[fb_1], r5, r1, %[fb_1] \r\n"
|
||||
"ldmia %[fir_c]!, { r0-r1 } \r\n"
|
||||
"ldmia %[fir_p]!, { r2-r5 } \r\n"
|
||||
"mla %[fb_0], r2, r0, %[fb_0] \r\n"
|
||||
"mla %[fb_1], r3, r0, %[fb_1] \r\n"
|
||||
"mla %[fb_0], r4, r1, %[fb_0] \r\n"
|
||||
"mla %[fb_1], r5, r1, %[fb_1] \r\n"
|
||||
"ldmia %[fir_c]!, { r0-r1 } \r\n"
|
||||
"ldmia %[fir_p]!, { r2-r5 } \r\n"
|
||||
"mla %[fb_0], r2, r0, %[fb_0] \r\n"
|
||||
"mla %[fb_1], r3, r0, %[fb_1] \r\n"
|
||||
"mla %[fb_0], r4, r1, %[fb_0] \r\n"
|
||||
"mla %[fb_1], r5, r1, %[fb_1] \r\n"
|
||||
: [fb_0]"+r"(fb_0), [fb_1]"+r"(fb_1),
|
||||
[fir_p]"+r"(fir_ptr), [fir_c]"+r"(fir_coeff)
|
||||
:
|
||||
: "r0", "r1", "r2", "r3", "r4", "r5"
|
||||
);
|
||||
|
||||
/* Generate output */
|
||||
int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0)
|
||||
>> global_muting;
|
||||
int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1)
|
||||
>> global_muting;
|
||||
|
||||
out_buf [ 0] = amp_0;
|
||||
out_buf [WAV_CHUNK_SIZE] = amp_1;
|
||||
out_buf ++;
|
||||
|
||||
if ( !(this->r.g.flags & 0x20) )
|
||||
{
|
||||
/* Feedback into echo buffer */
|
||||
int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
|
||||
int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
|
||||
e0 = CLAMP16( e0 );
|
||||
SET_LE16A( echo_ptr , e0 );
|
||||
e1 = CLAMP16( e1 );
|
||||
SET_LE16A( echo_ptr + 2, e1 );
|
||||
}
|
||||
#else /* Unoptimized CPU */
|
||||
/* Read feedback from echo buffer */
|
||||
int echo_pos = this->echo_pos;
|
||||
uint8_t* const echo_ptr = RAM +
|
||||
|
@ -1102,25 +1203,25 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
|
|||
out_buf [WAV_CHUNK_SIZE] = amp_1;
|
||||
out_buf ++;
|
||||
|
||||
/* Feedback into echo buffer */
|
||||
int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
|
||||
int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
|
||||
if ( !(this->r.g.flags & 0x20) )
|
||||
{
|
||||
CLAMP16( e0, e0 );
|
||||
/* Feedback into echo buffer */
|
||||
int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
|
||||
int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
|
||||
e0 = CLAMP16( e0 );
|
||||
SET_LE16A( echo_ptr , e0 );
|
||||
CLAMP16( e1, e1 );
|
||||
e1 = CLAMP16( e1 );
|
||||
SET_LE16A( echo_ptr + 2, e1 );
|
||||
}
|
||||
#endif /* CPU_COLDFIRE */
|
||||
#else
|
||||
#endif /* CPU_* */
|
||||
#else /* SPCNOECHO == 1*/
|
||||
/* Generate output */
|
||||
int amp_0 = (chans_0 * global_vol_0) >> global_muting;
|
||||
int amp_1 = (chans_1 * global_vol_1) >> global_muting;
|
||||
out_buf [ 0] = amp_0;
|
||||
out_buf [WAV_CHUNK_SIZE] = amp_1;
|
||||
out_buf ++;
|
||||
#endif
|
||||
#endif /* SPCNOECHO */
|
||||
}
|
||||
while ( --count );
|
||||
#if 0
|
||||
|
@ -1155,10 +1256,13 @@ void DSP_reset( struct Spc_Dsp* this )
|
|||
this->wave_entry [i].start_addr = -1;
|
||||
#endif
|
||||
|
||||
#ifdef CPU_COLDFIRE
|
||||
this->fir_ptr = fir_buf;
|
||||
#if defined(CPU_COLDFIRE)
|
||||
this->fir_ptr = fir_buf;
|
||||
this->last_fir_ptr = &fir_buf [7];
|
||||
ci->memset( fir_buf, 0, sizeof fir_buf );
|
||||
#elif defined (CPU_ARM)
|
||||
this->fir_ptr = fir_buf;
|
||||
ci->memset( fir_buf, 0, sizeof fir_buf );
|
||||
#else
|
||||
this->fir_pos = 0;
|
||||
ci->memset( this->fir_buf, 0, sizeof this->fir_buf );
|
||||
|
|
Loading…
Reference in a new issue