rockbox/lib/rbcodec/dsp/tdspeed.c
Michael Sevakis 78a45b47de Cleanup and simplify latest DSP code incarnation.
Some things can just be a bit simpler in handling the list of stages
and some things, especially format change handling, can be simplified
for each stage implementation. Format changes are sent through the
configure() callback.

Hide some internal details and variables from processing stages and
let the core deal with it.

Do some miscellaneous cleanup and keep things a bit better factored.

Change-Id: I19dd8ce1d0b792ba914d426013088a49a52ecb7e
2013-05-04 13:43:33 -04:00

615 lines
18 KiB
C

/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006 by Nicolas Pitre <nico@cam.org>
* Copyright (C) 2006-2007 by Stéphane Doyon <s.doyon@videotron.ca>
* Copyright (C) 2012 Michael Sevakis
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "platform.h"
#include "config.h"
#include "sound.h"
#include "core_alloc.h"
#include "dsp-util.h"
#include "dsp_proc_entry.h"
#include "tdspeed.h"
#ifndef assert
#define assert(cond)
#endif
#define TIMESTRETCH_SET_FACTOR (DSP_PROC_SETTING+DSP_PROC_TIMESTRETCH)
#define MIN_RATE 8000
#define MAX_RATE 48000 /* double buffer for double rate */
#define MINFREQ 100
#define MAX_INPUTCOUNT 512 /* Max input count so dst doesn't overflow */
#define FIXED_BUFCOUNT 3072 /* 48KHz factor 3.0 */
#define FIXED_OUTBUFCOUNT 4096
#define NBUFFERS 4
enum tdspeed_ops
{
TDSOP_PROCESS,
TDSOP_LAST,
TDSOP_PURGE,
};
static struct tdspeed_state_s
{
struct dsp_proc_entry *this; /* this stage */
int channels; /* number of audio channels */
int32_t samplerate; /* current samplerate of input data */
int32_t factor; /* stretch factor (perdecimille) */
int32_t shift_max; /* maximum displacement on a frame */
int32_t src_step; /* source window pace */
int32_t dst_step; /* destination window pace */
int32_t dst_order; /* power of two for dst_step */
int32_t ovl_shift; /* overlap buffer frame shift */
int32_t ovl_size; /* overlap buffer used size */
int32_t *ovl_buff[2]; /* overlap buffer (L+R) */
} tdspeed_state;
static int32_t *buffers[NBUFFERS] = { NULL, NULL, NULL, NULL };
static const int buffer_sizes[NBUFFERS] =
{
FIXED_BUFCOUNT * sizeof(int32_t),
FIXED_BUFCOUNT * sizeof(int32_t),
FIXED_OUTBUFCOUNT * sizeof(int32_t),
FIXED_OUTBUFCOUNT * sizeof(int32_t)
};
#define overlap_buffer (&buffers[0])
#define outbuf (&buffers[2])
#define out_size FIXED_OUTBUFCOUNT
/* Processed buffer passed out to later stages */
static struct dsp_buffer dsp_outbuf;
/* Blend overlapping frame samples according to position */
#if defined(CPU_COLDFIRE)
static inline int32_t blend_frame_samples(int32_t curr, int32_t prev,
int i, int j, int order)
{
int32_t a0, a1;
asm (
"mac.l %2, %3, %%acc0 \n" /* acc = curr*(i<<(30-order)) >> 23 */
"mac.l %4, %5, %%acc0 \n" /* acc += prev*(j<<(30-order)) >> 23 */
"moveq.l #1, %0 \n" /* Prepare mask */
"move.l %%accext01, %1 \n" /* Get extension bits */
"lsr.l #7, %1 \n" /* Get bit 7 of LSb extension ... */
"and.l %0, %1 \n" /* ... into bit 0 */
"movclr.l %%acc0, %0 \n" /* Get result >> 8 */
"asl.l #1, %0 \n" /* Everything x2 */
"or.l %1, %0 \n" /* Insert proper LSb from extension */
: "=d"(a0), "=d"(a1)
: "r"(curr), "r"(i << order),
"r"(prev), "r"(j << order));
return a0;
}
#else
/* Generic */
static inline int32_t blend_frame_samples(int32_t curr, int32_t prev,
int i, int j, int order)
{
return (curr * (int64_t)i + prev * (int64_t)j) >> order;
}
#endif /* CPU_* */
/* Discard all data */
static void tdspeed_flush(void)
{
struct tdspeed_state_s *st = &tdspeed_state;
st->ovl_size = 0;
st->ovl_shift = 0;
dsp_outbuf.remcount = 0; /* Dump remaining output */
}
static bool tdspeed_update(int32_t samplerate, int32_t factor)
{
struct tdspeed_state_s *st = &tdspeed_state;
/* Save parameters we'll need later if format changes */
st->samplerate = samplerate;
st->factor = factor;
/* just discard remaining input data */
st->ovl_size = 0;
st->ovl_shift = 0;
/* Check parameters */
if (factor == PITCH_SPEED_100)
return false;
if (samplerate < MIN_RATE || samplerate > MAX_RATE)
return false;
if (factor < STRETCH_MIN || factor > STRETCH_MAX)
return false;
st->dst_step = samplerate / MINFREQ;
if (factor > PITCH_SPEED_100)
st->dst_step = st->dst_step * PITCH_SPEED_100 / factor;
st->dst_order = 1;
while (st->dst_step >>= 1)
st->dst_order++;
st->dst_step = (1 << st->dst_order);
#ifdef CPU_COLDFIRE
/* blend_frame_samples works in s0.31 mode. Also must shift by
one less bit before mac in order not to overflow. */
st->dst_order = 30 - st->dst_order;
#endif
st->src_step = st->dst_step * factor / PITCH_SPEED_100;
st->shift_max = (st->dst_step > st->src_step) ?
st->dst_step : st->src_step;
st->ovl_buff[0] = overlap_buffer[0];
st->ovl_buff[1] = overlap_buffer[1]; /* ignored if mono */
return true;
}
static int tdspeed_apply(int32_t *buf_out[2], int32_t *buf_in[2],
int data_len, enum tdspeed_ops op, int *consumed)
/* data_len in samples */
{
struct tdspeed_state_s *const st = &tdspeed_state;
int32_t src_frame_sz = st->shift_max + st->dst_step;
if (st->dst_step > st->src_step)
src_frame_sz += st->dst_step - st->src_step;
int32_t *dest[2];
int32_t next_frame, prev_frame;
/* deal with overlap data first, if any */
if (st->ovl_size)
{
int32_t have = st->ovl_size;
if (st->ovl_shift > 0)
have -= st->ovl_shift;
/* append just enough data to have all of the overlap buffer consumed */
int32_t steps = (have - 1) / st->src_step;
int32_t copy = steps * st->src_step + src_frame_sz - have;
if (copy < src_frame_sz - st->dst_step)
copy += st->src_step; /* one more step to allow for pregap data */
if (copy > data_len)
copy = data_len;
assert(st->ovl_size + copy <= FIXED_BUFCOUNT);
for (int ch = 0; ch < st->channels; ch++)
{
memcpy(st->ovl_buff[ch] + st->ovl_size, buf_in[ch],
copy * sizeof(int32_t));
}
if (consumed)
*consumed = copy;
if (op == TDSOP_PROCESS && have + copy < src_frame_sz)
{
/* still not enough to process at least one frame */
st->ovl_size += copy;
return 0;
}
/* recursively call ourselves to process the overlap buffer */
have = st->ovl_size;
st->ovl_size = 0;
assert(have + copy <= FIXED_BUFCOUNT);
if (copy == data_len)
{
return tdspeed_apply(buf_out, st->ovl_buff, have + copy,
op, NULL);
}
int i = tdspeed_apply(buf_out, st->ovl_buff, have + copy,
TDSOP_LAST, NULL);
dest[0] = buf_out[0] + i;
dest[1] = buf_out[1] + i;
/* readjust pointers to account for data already consumed */
next_frame = copy - src_frame_sz + st->src_step;
prev_frame = next_frame - st->ovl_shift;
}
else
{
dest[0] = buf_out[0];
dest[1] = buf_out[1];
next_frame = prev_frame = 0;
if (st->ovl_shift > 0)
next_frame = st->ovl_shift;
else
prev_frame = -st->ovl_shift;
}
st->ovl_shift = 0;
/* process all complete frames */
while (data_len - next_frame >= src_frame_sz)
{
/* find frame overlap by autocorelation */
int const INC1 = 8;
int const INC2 = 32;
int64_t min_delta = INT64_MAX; /* most positive */
int shift = 0;
assert(next_frame + st->shift_max - 1 + st->dst_step <= data_len);
assert(prev_frame + st->dst_step <= data_len);
for (int i = 0; i < st->shift_max; i += INC1)
{
int64_t delta = 0;
for (int ch = 0; ch < st->channels; ch++)
{
int32_t *curr = buf_in[ch] + next_frame + i;
int32_t *prev = buf_in[ch] + prev_frame;
for (int j = 0; j < st->dst_step;
j += INC2, curr += INC2, prev += INC2)
{
delta += ad_s32(*curr, *prev);
if (delta >= min_delta)
goto skip;
}
}
min_delta = delta;
shift = i;
skip:;
}
/* overlap fading-out previous frame with fading-in current frame */
for (int ch = 0; ch < st->channels; ch++)
{
int32_t *curr = buf_in[ch] + next_frame + shift;
int32_t *prev = buf_in[ch] + prev_frame;
int32_t *d = dest[ch];
assert(next_frame + shift + st->dst_step <= data_len);
assert(prev_frame + st->dst_step <= data_len);
assert(dest[ch] - buf_out[ch] + st->dst_step <= out_size);
for (int i = 0, j = st->dst_step; j; i++, j--)
{
assert(d < buf_out[ch] + out_size);
*d++ = blend_frame_samples(*curr++, *prev++, i, j,
st->dst_order);
}
dest[ch] = d;
}
/* adjust pointers for next frame */
prev_frame = next_frame + shift + st->dst_step;
next_frame += st->src_step;
/* here next_frame - prev_frame = src_step - dst_step - shift */
assert(next_frame - prev_frame == st->src_step - st->dst_step - shift);
} /* while */
/* now deal with remaining partial frames */
switch (op)
{
case TDSOP_PROCESS:
{
/* preserve remaining data + needed overlap data for next call */
st->ovl_shift = next_frame - prev_frame;
int i = (st->ovl_shift < 0) ? next_frame : prev_frame;
st->ovl_size = data_len - i;
assert(st->ovl_size <= FIXED_BUFCOUNT);
for (int ch = 0; ch < st->channels; ch++)
{
memmove(st->ovl_buff[ch], buf_in[ch] + i,
st->ovl_size * sizeof(int32_t));
}
if (consumed)
*consumed = data_len;
break;
} /* TDSOP_PROCESS: */
case TDSOP_LAST:
{
/* special overlap buffer processing: remember frame shift only */
st->ovl_shift = next_frame - prev_frame;
break;
} /* TDSOP_LAST: */
case TDSOP_PURGE:
{
/* last call: purge all remaining data to output buffer */
int i = data_len - prev_frame;
for (int ch = 0; ch < st->channels; ch++)
{
assert(dest[ch] + i <= buf_out[ch] + out_size);
memcpy(dest[ch], buf_in[ch] + prev_frame, i * sizeof(int32_t));
dest[ch] += i;
}
if (consumed)
*consumed += i;
break;
} /* TDSOP_PURGE: */
} /* switch */
return dest[0] - buf_out[0];
}
/** DSP interface **/
/* Enable or disable the availability of timestretch */
void dsp_timestretch_enable(bool enabled)
{
if (enabled != !tdspeed_state.this)
return; /* No change */
struct dsp_config *dsp = dsp_get_config(CODEC_IDX_AUDIO);
dsp_proc_enable(dsp, DSP_PROC_TIMESTRETCH, enabled);
}
/* Set the timestretch ratio */
void dsp_set_timestretch(int32_t percent)
{
struct tdspeed_state_s *st = &tdspeed_state;
if (!st->this)
return; /* not enabled */
if (percent <= 0)
percent = PITCH_SPEED_100;
if (percent == st->factor)
return; /* no change */
struct dsp_config *dsp = dsp_get_config(CODEC_IDX_AUDIO);
dsp_configure(dsp, TIMESTRETCH_SET_FACTOR, percent);
}
/* Return the timestretch ratio */
int32_t dsp_get_timestretch(void)
{
return tdspeed_state.factor;
}
/* Return whether or not timestretch is enabled and initialized */
bool dsp_timestretch_available(void)
{
return !!tdspeed_state.this;
}
/* Apply timestretch to the input buffer and switch to our output buffer */
static void tdspeed_process(struct dsp_proc_entry *this,
struct dsp_buffer **buf_p)
{
struct dsp_buffer *src = *buf_p;
struct dsp_buffer *dst = &dsp_outbuf;
*buf_p = dst; /* switch to our buffer */
int count = dst->remcount;
if (count > 0)
return; /* output remains from an earlier call */
dst->p32[0] = outbuf[0];
dst->p32[1] = outbuf[src->format.num_channels - 1];
if (src->remcount > 0)
{
dst->bufcount = 0; /* use this to get consumed src */
count = tdspeed_apply(dst->p32, src->p32,
MIN(src->remcount, MAX_INPUTCOUNT),
TDSOP_PROCESS, &dst->bufcount);
/* advance src by samples consumed */
if (dst->bufcount > 0)
dsp_advance_buffer32(src, dst->bufcount);
}
/* else purged dsp_outbuf */
dst->remcount = count;
/* inherit in-place processed mask from source buffer */
dst->proc_mask = src->proc_mask;
(void)this;
}
/* Process format changes and settings changes */
static intptr_t tdspeed_new_format(struct dsp_proc_entry *this,
struct dsp_config *dsp,
struct sample_format *format)
{
struct dsp_buffer *dst = &dsp_outbuf;
if (dst->remcount > 0)
return PROC_NEW_FORMAT_TRANSITION;
DSP_PRINT_FORMAT(DSP_PROC_TIMESTRETCH, *format);
bool active = dsp_proc_active(dsp, DSP_PROC_TIMESTRETCH);
struct tdspeed_state_s *st = &tdspeed_state;
int channels = format->num_channels;
if (format->codec_frequency != st->samplerate)
{
/* relevent parameters are changing - all overlap will be discarded */
st->channels = channels;
DEBUGF(" DSP_PROC_TIMESTRETCH- new settings: "
"ch:%u chz: %u, %d.%02d%%\n",
channels,
format->codec_frequency,
st->factor / 100, st->factor % 100);
active = tdspeed_update(format->codec_frequency, st->factor);
dsp_proc_activate(dsp, DSP_PROC_TIMESTRETCH, active);
}
else if (active && channels != st->channels)
{
/* channel count transistion - have to make old data in overlap
buffer compatible with new format */
DEBUGF(" DSP_PROC_TIMESTRETCH- new ch count: %u=>%u\n",
st->channels, channels);
st->channels = channels;
if (channels > 1)
{
/* mono->stereo: Process the old mono as stereo now */
memcpy(st->ovl_buff[1], st->ovl_buff[0],
st->ovl_size * sizeof (int32_t));
}
else
{
/* stereo->mono: Process the old stereo as mono now */
for (int i = 0; i < st->ovl_size; i++)
{
st->ovl_buff[0][i] = st->ovl_buff[0][i] / 2 +
st->ovl_buff[1][i] / 2;
}
}
}
dst->format = *format;
if (active)
return PROC_NEW_FORMAT_OK;
/* Nothing to do */
DEBUGF(" DSP_PROC_RESAMPLE- deactivated\n");
return PROC_NEW_FORMAT_DEACTIVATED;
(void)this;
}
static void INIT_ATTR tdspeed_dsp_init(struct tdspeed_state_s *st,
enum dsp_ids dsp_id)
{
/* everything is at 100% until dsp_set_timestretch is called with
some other value and timestretch is enabled at the time */
if (dsp_id == CODEC_IDX_AUDIO)
st->factor = PITCH_SPEED_100;
}
/* DSP message hook */
static intptr_t tdspeed_configure(struct dsp_proc_entry *this,
struct dsp_config *dsp,
unsigned int setting,
intptr_t value)
{
intptr_t retval = 0;
struct tdspeed_state_s *st = &tdspeed_state;
switch (setting)
{
case DSP_INIT:
tdspeed_dsp_init(st, (enum dsp_ids)value);
break;
case DSP_FLUSH:
tdspeed_flush();
break;
case DSP_PROC_INIT:
if (!tdspeed_alloc_buffers(buffers, buffer_sizes, NBUFFERS))
return -1; /* fail the init */
st->this = this;
dsp_proc_set_in_place(dsp, DSP_PROC_TIMESTRETCH, false);
this->process = tdspeed_process;
break;
case DSP_PROC_CLOSE:
st->this = NULL;
st->factor = PITCH_SPEED_100;
dsp_outbuf.remcount = 0;
tdspeed_free_buffers(buffers, NBUFFERS);
break;
case DSP_PROC_NEW_FORMAT:
retval = tdspeed_new_format(this, dsp, (struct sample_format *)value);
break;
case TIMESTRETCH_SET_FACTOR:
st->samplerate = 0;
st->factor = (int32_t)value;
dsp_proc_want_format_update(dsp, DSP_PROC_TIMESTRETCH);
break;
}
return retval;
}
void tdspeed_move(int i, void* current, void* new)
{
ptrdiff_t shift = (int32_t *)new - (int32_t *)current;
int32_t **p32 = dsp_outbuf.p32;
switch (i)
{
case 0: case 1:
/* moving overlap (input) buffers */
tdspeed_state.ovl_buff[i] = new;
break;
case 2:
/* moving outbuf left channel and dsp_outbuf.p32[0] */
if (p32[0] == p32[1])
p32[1] += shift; /* mono mode */
p32[0] += shift;
break;
case 3:
/* moving outbuf right channel and dsp_outbuf.p32[1] */
p32[1] += shift;
break;
}
buffers[i] = new;
}
/* Database entry */
DSP_PROC_DB_ENTRY(TIMESTRETCH,
tdspeed_configure);