/*************************************************************************** * __________ __ ___. * Open \______ \ ____ ____ | | _\_ |__ _______ ___ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ * \/ \/ \/ \/ \/ * $Id$ * * Copyright (C) 2005 Miika Pekkarinen * * All files in this archive are subject to the GNU General Public License. * See the file COPYING in the source tree root for full license agreement. * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * ****************************************************************************/ #include #include #include "dsp.h" #include "kernel.h" #include "playback.h" #include "system.h" #include "settings.h" #include "replaygain.h" #include "debug.h" /* The "dither" code to convert the 24-bit samples produced by libmad was * taken from the coolplayer project - coolplayer.sourceforge.net */ /* 16-bit samples are scaled based on these constants. The shift should be * no more than 15. */ #define WORD_SHIFT 12 #define WORD_FRACBITS 27 #define NATIVE_DEPTH 16 #define SAMPLE_BUF_SIZE 256 #define RESAMPLE_BUF_SIZE (256 * 4) /* Enough for 11,025 Hz -> 44,100 Hz*/ #define DEFAULT_REPLAYGAIN 0x01000000 #if defined(CPU_COLDFIRE) && !defined(SIMULATOR) #define INIT() asm volatile ("move.l #0xb0, %macsr") /* frac, round, clip */ /* Multiply two S.31 fractional integers and return the sign bit and the * 31 most significant bits of the result. */ #define FRACMUL(x, y) \ ({ \ long t; \ asm volatile ("mac.l %[a], %[b], %%acc0\n\t" \ "movclr.l %%acc0, %[t]\n\t" \ : [t] "=r" (t) : [a] "r" (x), [b] "r" (y)); \ t; \ }) /* Multiply one S.31-bit and one S8.23 fractional integer and return the * sign bit and the 31 most significant bits of the result. */ #define FRACMUL_8(x, y) \ ({ \ long t; \ long u; \ asm volatile ("mac.l %[a], %[b], %%acc0\n\t" \ "move.l %%accext01, %[u]\n\t" \ "movclr.l %%acc0, %[t]\n\t" \ : [t] "=r" (t), [u] "=r" (u) : [a] "r" (x), [b] "r" (y)); \ (t << 8) | (u & 0xff); \ }) /* Multiply one S.31-bit and one S8.23 fractional integer and return the * sign bit and the 31 most significant bits of the result. Load next value * to multiply with into x from s (and increase s); x must contain the * initial value. */ #define FRACMUL_8_LOOP(x, y, s) \ ({ \ long t; \ long u; \ asm volatile ("mac.l %[a], %[b], (%[c])+, %[a], %%acc0\n\t" \ "move.l %%accext01, %[u]\n\t" \ "movclr.l %%acc0, %[t]\n\t" \ : [a] "+r" (x), [c] "+a" (s), [t] "=r" (t), [u] "=r" (u) \ : [b] "r" (y)); \ (t << 8) | (u & 0xff); \ }) #else #define INIT() #define FRACMUL(x, y) (long) (((((long long) (x)) * ((long long) (y))) >> 31)) #define FRACMUL_8(x, y) (long) (((((long long) (x)) * ((long long) (y))) >> 23)) #define FRACMUL_8_LOOP(x, y, s) \ ({ \ long t = x; \ x = *(s)++; \ (long) (((((long long) (t)) * ((long long) (y))) >> 23)); \ }) #endif struct dsp_config { long frequency; long clip_min; long clip_max; long track_gain; long album_gain; long track_peak; long album_peak; long replaygain; /* Note that this is in S8.23 format. */ int sample_depth; int sample_bytes; int stereo_mode; int frac_bits; bool dither_enabled; bool new_gain; }; struct resample_data { long last_sample; long phase; long delta; }; struct dither_data { long error[3]; long random; }; static struct dsp_config dsp_conf[2] IDATA_ATTR; static struct dither_data dither_data[2] IDATA_ATTR; static struct resample_data resample_data[2][2] IDATA_ATTR; extern int current_codec; struct dsp_config *dsp; /* The internal format is 32-bit samples, non-interleaved, stereo. This * format is similar to the raw output from several codecs, so the amount * of copying needed is minimized for that case. */ static long sample_buf[SAMPLE_BUF_SIZE] IDATA_ATTR; static long resample_buf[RESAMPLE_BUF_SIZE] IDATA_ATTR; /* Convert at most count samples to the internal format, if needed. Returns * number of samples ready for further processing. Updates src to point * past the samples "consumed" and dst is set to point to the samples to * consume. Note that for mono, dst[0] equals dst[1], as there is no point * in processing the same data twice. */ static int convert_to_internal(char* src[], int count, long* dst[]) { count = MIN(SAMPLE_BUF_SIZE / 2, count); if ((dsp->sample_depth <= NATIVE_DEPTH) || (dsp->stereo_mode == STEREO_INTERLEAVED)) { dst[0] = &sample_buf[0]; dst[1] = (dsp->stereo_mode == STEREO_MONO) ? dst[0] : &sample_buf[SAMPLE_BUF_SIZE / 2]; } else { dst[0] = (long*) src[0]; dst[1] = (long*) ((dsp->stereo_mode == STEREO_MONO) ? src[0] : src[1]); } if (dsp->sample_depth <= NATIVE_DEPTH) { short* s0 = (short*) src[0]; long* d0 = dst[0]; long* d1 = dst[1]; int scale = WORD_SHIFT; int i; if (dsp->stereo_mode == STEREO_INTERLEAVED) { for (i = 0; i < count; i++) { *d0++ = *s0++ << scale; *d1++ = *s0++ << scale; } } else if (dsp->stereo_mode == STEREO_NONINTERLEAVED) { short* s1 = (short*) src[1]; for (i = 0; i < count; i++) { *d0++ = *s0++ << scale; *d1++ = *s1++ << scale; } } else { for (i = 0; i < count; i++) { *d0++ = *s0++ << scale; } } } else if (dsp->stereo_mode == STEREO_INTERLEAVED) { long* s0 = (long*) src[0]; long* d0 = dst[0]; long* d1 = dst[1]; int i; for (i = 0; i < count; i++) { *d0++ = *s0++; *d1++ = *s0++; } } if (dsp->stereo_mode == STEREO_NONINTERLEAVED) { src[0] += count * dsp->sample_bytes; src[1] += count * dsp->sample_bytes; } else if (dsp->stereo_mode == STEREO_INTERLEAVED) { src[0] += count * dsp->sample_bytes * 2; } else { src[0] += count * dsp->sample_bytes; } return count; } /* Linear resampling that introduces a one sample delay, because of our * inability to look into the future at the end of a frame. */ static long downsample(long *dst, long *src, int count, struct resample_data *r) { long phase = r->phase; long delta = r->delta; long last_sample = r->last_sample; int pos = phase >> 16; int i = 1; /* Do we need last sample of previous frame for interpolation? */ if (pos > 0) { last_sample = src[pos - 1]; } *dst++ = last_sample + FRACMUL((phase & 0xffff) << 15, src[pos] - last_sample); phase += delta; while ((pos = phase >> 16) < count) { *dst++ = src[pos - 1] + FRACMUL((phase & 0xffff) << 15, src[pos] - src[pos - 1]); phase += delta; i++; } /* Wrap phase accumulator back to start of next frame. */ r->phase = phase - (count << 16); r->delta = delta; r->last_sample = src[count - 1]; return i; } static long upsample(long *dst, long *src, int count, struct resample_data *r) { long phase = r->phase; long delta = r->delta; long last_sample = r->last_sample; int i = 0; int pos; while ((pos = phase >> 16) == 0) { *dst++ = last_sample + FRACMUL((phase & 0xffff) << 15, src[pos] - last_sample); phase += delta; i++; } while ((pos = phase >> 16) < count) { *dst++ = src[pos - 1] + FRACMUL((phase & 0xffff) << 15, src[pos] - src[pos - 1]); phase += delta; i++; } /* Wrap phase accumulator back to start of next frame. */ r->phase = phase - (count << 16); r->delta = delta; r->last_sample = src[count - 1]; return i; } /* Resample count stereo samples. Updates the src array, if resampling is * done, to refer to the resampled data. Returns number of stereo samples * for further processing. */ static inline int resample(long* src[], int count) { long new_count; if (dsp->frequency != NATIVE_FREQUENCY) { long* d0 = &resample_buf[0]; /* Only process the second channel if needed. */ long* d1 = (src[0] == src[1]) ? d0 : &resample_buf[RESAMPLE_BUF_SIZE / 2]; if (dsp->frequency < NATIVE_FREQUENCY) { new_count = upsample(d0, src[0], count, &resample_data[current_codec][0]); if (d0 != d1) { upsample(d1, src[1], count, &resample_data[current_codec][1]); } } else { new_count = downsample(d0, src[0], count, &resample_data[current_codec][0]); if (d0 != d1) { downsample(d1, src[1], count, &resample_data[current_codec][1]); } } src[0] = d0; src[1] = d1; } else { new_count = count; } return new_count; } static inline long clip_sample(long sample, long min, long max) { if (sample > max) { sample = max; } else if (sample < min) { sample = min; } return sample; } /* The "dither" code to convert the 24-bit samples produced by libmad was * taken from the coolplayer project - coolplayer.sourceforge.net */ static long dither_sample(long sample, long bias, long mask, struct dither_data* dither) { long output; long random; long min; long max; /* Noise shape and bias */ sample += dither->error[0] - dither->error[1] + dither->error[2]; dither->error[2] = dither->error[1]; dither->error[1] = dither->error[0] / 2; output = sample + bias; /* Dither */ random = dither->random * 0x0019660dL + 0x3c6ef35fL; sample += (random & mask) - (dither->random & mask); dither->random = random; /* Clip and quantize */ min = dsp->clip_min; max = dsp->clip_max; sample = clip_sample(sample, min, max); output = clip_sample(output, min, max) & ~mask; /* Error feedback */ dither->error[0] = sample - output; return output; } /* Apply a constant gain to the samples (e.g., for ReplayGain). May update * the src array if gain was applied. * Note that this must be called before the resampler. */ static void apply_gain(long* src[], int count) { if (dsp->replaygain) { long* s0 = src[0]; long* s1 = src[1]; long* d0 = &sample_buf[0]; long* d1 = (s0 == s1) ? d0 : &sample_buf[SAMPLE_BUF_SIZE / 2]; long gain = dsp->replaygain; long s; long i; src[0] = d0; src[1] = d1; s = *s0++; for (i = 0; i < count; i++) { *d0++ = FRACMUL_8_LOOP(s, gain, s0); } if (src [0] != src [1]) { s = *s1++; for (i = 0; i < count; i++) { *d1++ = FRACMUL_8_LOOP(s, gain, s1); } } } } static void write_samples(short* dst, long* src[], int count) { long* s0 = src[0]; long* s1 = src[1]; int scale = dsp->frac_bits + 1 - NATIVE_DEPTH; if (dsp->dither_enabled) { long bias = (1L << (dsp->frac_bits - NATIVE_DEPTH)); long mask = (1L << scale) - 1; while (count-- > 0) { *dst++ = (short) (dither_sample(*s0++, bias, mask, &dither_data[0]) >> scale); *dst++ = (short) (dither_sample(*s1++, bias, mask, &dither_data[1]) >> scale); } } else { long min = dsp->clip_min; long max = dsp->clip_max; while (count-- > 0) { *dst++ = (short) (clip_sample(*s0++, min, max) >> scale); *dst++ = (short) (clip_sample(*s1++, min, max) >> scale); } } } /* Process and convert src audio to dst based on the DSP configuration, * reading size bytes of audio data. dst is assumed to be large enough; use * dst_get_dest_size() to get the required size. src is an array of * pointers; for mono and interleaved stereo, it contains one pointer to the * start of the audio data; for non-interleaved stereo, it contains two * pointers, one for each audio channel. Returns number of bytes written to * dest. */ long dsp_process(char* dst, char* src[], long size) { long* tmp[2]; long written = 0; long factor; int samples; dsp = &dsp_conf[current_codec]; factor = (dsp->stereo_mode != STEREO_MONO) ? 2 : 1; size /= dsp->sample_bytes * factor; INIT(); dsp_set_replaygain(false); while (size > 0) { samples = convert_to_internal(src, size, tmp); size -= samples; apply_gain(tmp, samples); samples = resample(tmp, samples); write_samples((short*) dst, tmp, samples); written += samples; dst += samples * sizeof(short) * 2; yield(); } return written * sizeof(short) * 2; } /* Given size bytes of input data, calculate the maximum number of bytes of * output data that would be generated (the calculation is not entirely * exact and rounds upwards to be on the safe side; during resampling, * the number of samples generated depends on the current state of the * resampler). */ /* dsp_input_size MUST be called afterwards */ long dsp_output_size(long size) { dsp = &dsp_conf[current_codec]; if (dsp->sample_depth > NATIVE_DEPTH) { size /= 2; } if (dsp->frequency != NATIVE_FREQUENCY) { size = (long) ((((unsigned long) size * NATIVE_FREQUENCY) + (dsp->frequency - 1)) / dsp->frequency); } /* round to the next multiple of 2 (these are shorts) */ size = (size + 1) & ~1; if (dsp->stereo_mode == STEREO_MONO) { size *= 2; } /* now we have the size in bytes for two resampled channels, * and the size in (short) must not exceed RESAMPLE_BUF_SIZE to * avoid resample buffer overflow. One must call dsp_input_size() * to get the correct input buffer size. */ if (size > RESAMPLE_BUF_SIZE*2) size = RESAMPLE_BUF_SIZE*2; return size; } /* Given size bytes of output buffer, calculate number of bytes of input * data that would be consumed in order to fill the output buffer. */ long dsp_input_size(long size) { dsp = &dsp_conf[current_codec]; /* convert to number of output stereo samples. */ size /= 2; /* Mono means we need half input samples to fill the output buffer */ if (dsp->stereo_mode == STEREO_MONO) size /= 2; /* size is now the number of resampled input samples. Convert to original input samples. */ if (dsp->frequency != NATIVE_FREQUENCY) { /* Use the real resampling delta = * (unsigned long) dsp->frequency * 65536 / NATIVE_FREQUENCY, and * round towards zero to avoid buffer overflows. */ size = ((unsigned long)size * resample_data[current_codec][0].delta) >> 16; } /* Convert back to bytes. */ if (dsp->sample_depth > NATIVE_DEPTH) size *= 4; else size *= 2; return size; } int dsp_stereo_mode(void) { dsp = &dsp_conf[current_codec]; return dsp->stereo_mode; } bool dsp_configure(int setting, void *value) { dsp = &dsp_conf[current_codec]; switch (setting) { case DSP_SET_FREQUENCY: memset(&resample_data[current_codec][0], 0, sizeof(struct resample_data) * 2); /* Fall through!!! */ case DSP_SWITCH_FREQUENCY: dsp->frequency = ((int) value == 0) ? NATIVE_FREQUENCY : (int) value; resample_data[current_codec][0].delta = resample_data[current_codec][1].delta = (unsigned long) dsp->frequency * 65536 / NATIVE_FREQUENCY; break; case DSP_SET_CLIP_MIN: dsp->clip_min = (long) value; break; case DSP_SET_CLIP_MAX: dsp->clip_max = (long) value; break; case DSP_SET_SAMPLE_DEPTH: dsp->sample_depth = (long) value; if (dsp->sample_depth <= NATIVE_DEPTH) { dsp->frac_bits = WORD_FRACBITS; dsp->sample_bytes = sizeof(short); dsp->clip_max = ((1 << WORD_FRACBITS) - 1); dsp->clip_min = -((1 << WORD_FRACBITS)); } else { dsp->frac_bits = (long) value; dsp->sample_bytes = sizeof(long); } break; case DSP_SET_STEREO_MODE: dsp->stereo_mode = (long) value; break; case DSP_RESET: dsp->dither_enabled = false; dsp->stereo_mode = STEREO_NONINTERLEAVED; dsp->clip_max = ((1 << WORD_FRACBITS) - 1); dsp->clip_min = -((1 << WORD_FRACBITS)); dsp->track_gain = 0; dsp->album_gain = 0; dsp->track_peak = 0; dsp->album_peak = 0; dsp->frequency = NATIVE_FREQUENCY; dsp->sample_depth = NATIVE_DEPTH; dsp->frac_bits = WORD_FRACBITS; dsp->new_gain = true; break; case DSP_DITHER: memset(dither_data, 0, sizeof(dither_data)); dsp->dither_enabled = (bool) value; break; case DSP_SET_TRACK_GAIN: dsp->track_gain = (long) value; dsp->new_gain = true; break; case DSP_SET_ALBUM_GAIN: dsp->album_gain = (long) value; dsp->new_gain = true; break; case DSP_SET_TRACK_PEAK: dsp->track_peak = (long) value; dsp->new_gain = true; break; case DSP_SET_ALBUM_PEAK: dsp->album_peak = (long) value; dsp->new_gain = true; break; default: return 0; } return 1; } void dsp_set_replaygain(bool always) { dsp = &dsp_conf[current_codec]; if (always || dsp->new_gain) { long gain = 0; dsp->new_gain = false; if (global_settings.replaygain || global_settings.replaygain_noclip) { long peak; if (global_settings.replaygain) { gain = (global_settings.replaygain_track || !dsp->album_gain) ? dsp->track_gain : dsp->album_gain; if (global_settings.replaygain_preamp) { long preamp = get_replaygain_int( global_settings.replaygain_preamp * 10); gain = (long) ((((int64_t) gain * preamp)) >> 24); } } peak = (global_settings.replaygain_track || !dsp->album_peak) ? dsp->track_peak : dsp->album_peak; if (gain == 0) { /* So that noclip can work even with no gain information. */ gain = DEFAULT_REPLAYGAIN; } if (global_settings.replaygain_noclip && (peak != 0) && ((((int64_t) gain * peak) >> 24) >= DEFAULT_REPLAYGAIN)) { gain = (((int64_t) DEFAULT_REPLAYGAIN << 24) / peak); } if (gain == DEFAULT_REPLAYGAIN) { /* Nothing to do, disable processing. */ gain = 0; } } /* Store in S8.23 format to simplify calculations. */ dsp->replaygain = gain >> 1; } }