rockbox/lib/rbcodec/dsp/tdspeed.c
Aidan MacDonald 6e794c9a2d rbcodec dsp: Refactor DSP init routines, restore INIT_ATTR
Refactor DSP init routines so there is a dedicated init function
for the stages that need it. Remove the DSP_INIT configure message.
This allows the init code to be safely marked INIT_ATTR, saving a
bit of code size, and allowing the linker to verify that there are
no unsafe references to the init routines.

Change-Id: I1702f0f579bbb300a6fe7d0e67b13aa2e9dd7f8a
2022-12-23 12:47:10 -05:00

613 lines
18 KiB
C

/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006 by Nicolas Pitre <nico@cam.org>
* Copyright (C) 2006-2007 by Stéphane Doyon <s.doyon@videotron.ca>
* Copyright (C) 2012 Michael Sevakis
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "rbcodecconfig.h"
#include "platform.h"
#include "sound.h"
#include "core_alloc.h"
#include "dsp-util.h"
#include "dsp_proc_entry.h"
#include "tdspeed.h"
#ifndef assert
#define assert(cond)
#endif
#define TIMESTRETCH_SET_FACTOR (DSP_PROC_SETTING+DSP_PROC_TIMESTRETCH)
#define MIN_RATE 8000
#define MAX_RATE 48000 /* double buffer for double rate */
#define MINFREQ 100
#define MAX_INPUTCOUNT 512 /* Max input count so dst doesn't overflow */
#define FIXED_BUFCOUNT 3072 /* 48KHz factor 3.0 */
#define FIXED_OUTBUFCOUNT 4096
enum tdspeed_ops
{
TDSOP_PROCESS,
TDSOP_LAST,
TDSOP_PURGE,
};
static struct tdspeed_state_s
{
struct dsp_proc_entry *this; /* this stage */
int channels; /* number of audio channels */
int32_t samplerate; /* current samplerate of input data */
int32_t factor; /* stretch factor (perdecimille) */
int32_t shift_max; /* maximum displacement on a frame */
int32_t src_step; /* source window pace */
int32_t dst_step; /* destination window pace */
int32_t dst_order; /* power of two for dst_step */
int32_t ovl_shift; /* overlap buffer frame shift */
int32_t ovl_size; /* overlap buffer used size */
int32_t *ovl_buff[2]; /* overlap buffer (L+R) */
} tdspeed_state;
static int32_t *buffers[TDSPEED_NBUFFERS] = { NULL, NULL, NULL, NULL };
static const int buffer_sizes[TDSPEED_NBUFFERS] =
{
FIXED_BUFCOUNT * sizeof(int32_t),
FIXED_BUFCOUNT * sizeof(int32_t),
FIXED_OUTBUFCOUNT * sizeof(int32_t),
FIXED_OUTBUFCOUNT * sizeof(int32_t)
};
#define overlap_buffer (&buffers[0])
#define outbuf (&buffers[2])
#define out_size FIXED_OUTBUFCOUNT
/* Processed buffer passed out to later stages */
static struct dsp_buffer dsp_outbuf;
/* Blend overlapping frame samples according to position */
#if defined(CPU_COLDFIRE)
static inline int32_t blend_frame_samples(int32_t curr, int32_t prev,
int i, int j, int order)
{
int32_t a0, a1;
asm (
"mac.l %2, %3, %%acc0 \n" /* acc = curr*(i<<(30-order)) >> 23 */
"mac.l %4, %5, %%acc0 \n" /* acc += prev*(j<<(30-order)) >> 23 */
"moveq.l #1, %0 \n" /* Prepare mask */
"move.l %%accext01, %1 \n" /* Get extension bits */
"lsr.l #7, %1 \n" /* Get bit 7 of LSb extension ... */
"and.l %0, %1 \n" /* ... into bit 0 */
"movclr.l %%acc0, %0 \n" /* Get result >> 8 */
"asl.l #1, %0 \n" /* Everything x2 */
"or.l %1, %0 \n" /* Insert proper LSb from extension */
: "=d"(a0), "=d"(a1)
: "r"(curr), "r"(i << order),
"r"(prev), "r"(j << order));
return a0;
}
#else
/* Generic */
static inline int32_t blend_frame_samples(int32_t curr, int32_t prev,
int i, int j, int order)
{
return (curr * (int64_t)i + prev * (int64_t)j) >> order;
}
#endif /* CPU_* */
/* Discard all data */
static void tdspeed_flush(void)
{
struct tdspeed_state_s *st = &tdspeed_state;
st->ovl_size = 0;
st->ovl_shift = 0;
dsp_outbuf.remcount = 0; /* Dump remaining output */
}
static bool tdspeed_update(int32_t samplerate, int32_t factor)
{
struct tdspeed_state_s *st = &tdspeed_state;
/* Save parameters we'll need later if format changes */
st->samplerate = samplerate;
st->factor = factor;
/* just discard remaining input data */
st->ovl_size = 0;
st->ovl_shift = 0;
/* Check parameters */
if (factor == PITCH_SPEED_100)
return false;
if (samplerate < MIN_RATE || samplerate > MAX_RATE)
return false;
if (factor < STRETCH_MIN || factor > STRETCH_MAX)
return false;
st->dst_step = samplerate / MINFREQ;
if (factor > PITCH_SPEED_100)
st->dst_step = st->dst_step * PITCH_SPEED_100 / factor;
st->dst_order = 1;
while (st->dst_step >>= 1)
st->dst_order++;
st->dst_step = (1 << st->dst_order);
#ifdef CPU_COLDFIRE
/* blend_frame_samples works in s0.31 mode. Also must shift by
one less bit before mac in order not to overflow. */
st->dst_order = 30 - st->dst_order;
#endif
st->src_step = st->dst_step * factor / PITCH_SPEED_100;
st->shift_max = (st->dst_step > st->src_step) ?
st->dst_step : st->src_step;
st->ovl_buff[0] = overlap_buffer[0];
st->ovl_buff[1] = overlap_buffer[1]; /* ignored if mono */
return true;
}
static int tdspeed_apply(int32_t *buf_out[2], int32_t *buf_in[2],
int data_len, enum tdspeed_ops op, int *consumed)
/* data_len in samples */
{
struct tdspeed_state_s *const st = &tdspeed_state;
int32_t src_frame_sz = st->shift_max + st->dst_step;
if (st->dst_step > st->src_step)
src_frame_sz += st->dst_step - st->src_step;
int32_t *dest[2];
int32_t next_frame, prev_frame;
/* deal with overlap data first, if any */
if (st->ovl_size)
{
int32_t have = st->ovl_size;
if (st->ovl_shift > 0)
have -= st->ovl_shift;
/* append just enough data to have all of the overlap buffer consumed */
int32_t steps = (have - 1) / st->src_step;
int32_t copy = steps * st->src_step + src_frame_sz - have;
if (copy < src_frame_sz - st->dst_step)
copy += st->src_step; /* one more step to allow for pregap data */
if (copy > data_len)
copy = data_len;
assert(st->ovl_size + copy <= FIXED_BUFCOUNT);
for (int ch = 0; ch < st->channels; ch++)
{
memcpy(st->ovl_buff[ch] + st->ovl_size, buf_in[ch],
copy * sizeof(int32_t));
}
if (consumed)
*consumed = copy;
if (op == TDSOP_PROCESS && have + copy < src_frame_sz)
{
/* still not enough to process at least one frame */
st->ovl_size += copy;
return 0;
}
/* recursively call ourselves to process the overlap buffer */
have = st->ovl_size;
st->ovl_size = 0;
assert(have + copy <= FIXED_BUFCOUNT);
if (copy == data_len)
{
return tdspeed_apply(buf_out, st->ovl_buff, have + copy,
op, NULL);
}
int i = tdspeed_apply(buf_out, st->ovl_buff, have + copy,
TDSOP_LAST, NULL);
dest[0] = buf_out[0] + i;
dest[1] = buf_out[1] + i;
/* readjust pointers to account for data already consumed */
next_frame = copy - src_frame_sz + st->src_step;
prev_frame = next_frame - st->ovl_shift;
}
else
{
dest[0] = buf_out[0];
dest[1] = buf_out[1];
next_frame = prev_frame = 0;
if (st->ovl_shift > 0)
next_frame = st->ovl_shift;
else
prev_frame = -st->ovl_shift;
}
st->ovl_shift = 0;
/* process all complete frames */
while (data_len - next_frame >= src_frame_sz)
{
/* find frame overlap by autocorelation */
int const INC1 = 8;
int const INC2 = 32;
int64_t min_delta = INT64_MAX; /* most positive */
int shift = 0;
assert(next_frame + st->shift_max - 1 + st->dst_step <= data_len);
assert(prev_frame + st->dst_step <= data_len);
for (int i = 0; i < st->shift_max; i += INC1)
{
int64_t delta = 0;
for (int ch = 0; ch < st->channels; ch++)
{
int32_t *curr = buf_in[ch] + next_frame + i;
int32_t *prev = buf_in[ch] + prev_frame;
for (int j = 0; j < st->dst_step;
j += INC2, curr += INC2, prev += INC2)
{
delta += ad_s32(*curr, *prev);
if (delta >= min_delta)
goto skip;
}
}
min_delta = delta;
shift = i;
skip:;
}
/* overlap fading-out previous frame with fading-in current frame */
for (int ch = 0; ch < st->channels; ch++)
{
int32_t *curr = buf_in[ch] + next_frame + shift;
int32_t *prev = buf_in[ch] + prev_frame;
int32_t *d = dest[ch];
assert(next_frame + shift + st->dst_step <= data_len);
assert(prev_frame + st->dst_step <= data_len);
assert(dest[ch] - buf_out[ch] + st->dst_step <= out_size);
for (int i = 0, j = st->dst_step; j; i++, j--)
{
assert(d < buf_out[ch] + out_size);
*d++ = blend_frame_samples(*curr++, *prev++, i, j,
st->dst_order);
}
dest[ch] = d;
}
/* adjust pointers for next frame */
prev_frame = next_frame + shift + st->dst_step;
next_frame += st->src_step;
/* here next_frame - prev_frame = src_step - dst_step - shift */
assert(next_frame - prev_frame == st->src_step - st->dst_step - shift);
} /* while */
/* now deal with remaining partial frames */
switch (op)
{
case TDSOP_PROCESS:
{
/* preserve remaining data + needed overlap data for next call */
st->ovl_shift = next_frame - prev_frame;
int i = (st->ovl_shift < 0) ? next_frame : prev_frame;
st->ovl_size = data_len - i;
assert(st->ovl_size <= FIXED_BUFCOUNT);
for (int ch = 0; ch < st->channels; ch++)
{
memmove(st->ovl_buff[ch], buf_in[ch] + i,
st->ovl_size * sizeof(int32_t));
}
if (consumed)
*consumed = data_len;
break;
} /* TDSOP_PROCESS: */
case TDSOP_LAST:
{
/* special overlap buffer processing: remember frame shift only */
st->ovl_shift = next_frame - prev_frame;
break;
} /* TDSOP_LAST: */
case TDSOP_PURGE:
{
/* last call: purge all remaining data to output buffer */
int i = data_len - prev_frame;
for (int ch = 0; ch < st->channels; ch++)
{
assert(dest[ch] + i <= buf_out[ch] + out_size);
memcpy(dest[ch], buf_in[ch] + prev_frame, i * sizeof(int32_t));
dest[ch] += i;
}
if (consumed)
*consumed += i;
break;
} /* TDSOP_PURGE: */
} /* switch */
return dest[0] - buf_out[0];
}
/** DSP interface **/
/* Enable or disable the availability of timestretch */
void dsp_timestretch_enable(bool enabled)
{
if (enabled != !tdspeed_state.this)
return; /* No change */
struct dsp_config *dsp = dsp_get_config(CODEC_IDX_AUDIO);
dsp_proc_enable(dsp, DSP_PROC_TIMESTRETCH, enabled);
}
/* Set the timestretch ratio */
void dsp_set_timestretch(int32_t percent)
{
struct tdspeed_state_s *st = &tdspeed_state;
if (!st->this)
return; /* not enabled */
if (percent <= 0)
percent = PITCH_SPEED_100;
if (percent == st->factor)
return; /* no change */
struct dsp_config *dsp = dsp_get_config(CODEC_IDX_AUDIO);
dsp_configure(dsp, TIMESTRETCH_SET_FACTOR, percent);
}
/* Return the timestretch ratio */
int32_t dsp_get_timestretch(void)
{
return tdspeed_state.factor;
}
/* Return whether or not timestretch is enabled and initialized */
bool dsp_timestretch_available(void)
{
return !!tdspeed_state.this;
}
/* Apply timestretch to the input buffer and switch to our output buffer */
static void tdspeed_process(struct dsp_proc_entry *this,
struct dsp_buffer **buf_p)
{
struct dsp_buffer *src = *buf_p;
struct dsp_buffer *dst = &dsp_outbuf;
*buf_p = dst; /* switch to our buffer */
int count = dst->remcount;
if (count > 0)
return; /* output remains from an earlier call */
dst->p32[0] = outbuf[0];
dst->p32[1] = outbuf[src->format.num_channels - 1];
if (src->remcount > 0)
{
dst->bufcount = 0; /* use this to get consumed src */
count = tdspeed_apply(dst->p32, src->p32,
MIN(src->remcount, MAX_INPUTCOUNT),
TDSOP_PROCESS, &dst->bufcount);
/* advance src by samples consumed */
if (dst->bufcount > 0)
dsp_advance_buffer32(src, dst->bufcount);
}
/* else purged dsp_outbuf */
dst->remcount = count;
/* inherit in-place processed mask from source buffer */
dst->proc_mask = src->proc_mask;
(void)this;
}
/* Process format changes and settings changes */
static intptr_t tdspeed_new_format(struct dsp_proc_entry *this,
struct dsp_config *dsp,
struct sample_format *format)
{
struct dsp_buffer *dst = &dsp_outbuf;
if (dst->remcount > 0)
return PROC_NEW_FORMAT_TRANSITION;
DSP_PRINT_FORMAT(DSP_PROC_TIMESTRETCH, *format);
bool active = dsp_proc_active(dsp, DSP_PROC_TIMESTRETCH);
struct tdspeed_state_s *st = &tdspeed_state;
int channels = format->num_channels;
if (format->codec_frequency != st->samplerate)
{
/* relevent parameters are changing - all overlap will be discarded */
st->channels = channels;
DEBUGF(" DSP_PROC_TIMESTRETCH- new settings: "
"ch:%u chz: %u, %d.%02d%%\n",
channels,
format->codec_frequency,
st->factor / 100, st->factor % 100);
active = tdspeed_update(format->codec_frequency, st->factor);
dsp_proc_activate(dsp, DSP_PROC_TIMESTRETCH, active);
}
else if (active && channels != st->channels)
{
/* channel count transistion - have to make old data in overlap
buffer compatible with new format */
DEBUGF(" DSP_PROC_TIMESTRETCH- new ch count: %u=>%u\n",
st->channels, channels);
st->channels = channels;
if (channels > 1)
{
/* mono->stereo: Process the old mono as stereo now */
memcpy(st->ovl_buff[1], st->ovl_buff[0],
st->ovl_size * sizeof (int32_t));
}
else
{
/* stereo->mono: Process the old stereo as mono now */
for (int i = 0; i < st->ovl_size; i++)
{
st->ovl_buff[0][i] = st->ovl_buff[0][i] / 2 +
st->ovl_buff[1][i] / 2;
}
}
}
dst->format = *format;
if (active)
return PROC_NEW_FORMAT_OK;
/* Nothing to do */
DEBUGF(" DSP_PROC_RESAMPLE- deactivated\n");
return PROC_NEW_FORMAT_DEACTIVATED;
(void)this;
}
void dsp_timestretch_init(struct dsp_config *dsp, unsigned int dsp_id)
{
(void)dsp;
struct tdspeed_state_s *st = &tdspeed_state;
/* everything is at 100% until dsp_set_timestretch is called with
some other value and timestretch is enabled at the time */
if (dsp_id == CODEC_IDX_AUDIO)
st->factor = PITCH_SPEED_100;
}
/* DSP message hook */
static intptr_t tdspeed_configure(struct dsp_proc_entry *this,
struct dsp_config *dsp,
unsigned int setting,
intptr_t value)
{
intptr_t retval = 0;
struct tdspeed_state_s *st = &tdspeed_state;
switch (setting)
{
case DSP_FLUSH:
tdspeed_flush();
break;
case DSP_PROC_INIT:
if (!tdspeed_alloc_buffers(buffers, buffer_sizes, TDSPEED_NBUFFERS))
return -1; /* fail the init */
st->this = this;
dsp_proc_set_in_place(dsp, DSP_PROC_TIMESTRETCH, false);
this->process = tdspeed_process;
break;
case DSP_PROC_CLOSE:
st->this = NULL;
st->factor = PITCH_SPEED_100;
dsp_outbuf.remcount = 0;
tdspeed_free_buffers(buffers, TDSPEED_NBUFFERS);
break;
case DSP_PROC_NEW_FORMAT:
retval = tdspeed_new_format(this, dsp, (struct sample_format *)value);
break;
case TIMESTRETCH_SET_FACTOR:
st->samplerate = 0;
st->factor = (int32_t)value;
dsp_proc_want_format_update(dsp, DSP_PROC_TIMESTRETCH);
break;
}
return retval;
}
void tdspeed_move(int i, void* current, void* new)
{
ptrdiff_t shift = (int32_t *)new - (int32_t *)current;
int32_t **p32 = dsp_outbuf.p32;
switch (i)
{
case 0: case 1:
/* moving overlap (input) buffers */
tdspeed_state.ovl_buff[i] = new;
break;
case 2:
/* moving outbuf left channel and dsp_outbuf.p32[0] */
if (p32[0] == p32[1])
p32[1] += shift; /* mono mode */
p32[0] += shift;
break;
case 3:
/* moving outbuf right channel and dsp_outbuf.p32[1] */
p32[1] += shift;
break;
}
buffers[i] = new;
}
/* Database entry */
DSP_PROC_DB_ENTRY(TIMESTRETCH,
tdspeed_configure);