rockbox/rbutil/rbutilqt/base/ttscarbon.cpp
Dominik Riebeling 4d2ce949b3 Use cutelogger for Rockbox Utility internal trace.
Change tracing from qDebug() to use cutelogger, which is available under the
LGPL2.1. This allows to automatically add filename and line number to the log,
and also provides multiple log levels.

Change-Id: I5dbdaf902ba54ea99f07ae10a07467c52fdac910
2013-11-04 23:32:52 +01:00

443 lines
14 KiB
C++

/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
*
* Copyright (C) 2010 by Dominik Riebeling
*
* All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include <QtCore>
#include "ttsbase.h"
#include "ttscarbon.h"
#include "encttssettings.h"
#include "rbsettings.h"
#include <CoreFoundation/CoreFoundation.h>
#include <ApplicationServices/ApplicationServices.h>
#include <Carbon/Carbon.h>
#include <unistd.h>
#include <sys/stat.h>
#include <inttypes.h>
#include "Logger.h"
TTSCarbon::TTSCarbon(QObject* parent) : TTSBase(parent)
{
}
TTSBase::Capabilities TTSCarbon::capabilities()
{
return TTSBase::CanSpeak;
}
bool TTSCarbon::configOk()
{
return true;
}
bool TTSCarbon::start(QString *errStr)
{
(void)errStr;
VoiceSpec vspec;
VoiceSpec* vspecref = NULL;
VoiceDescription vdesc;
OSErr error;
QString selectedVoice
= RbSettings::subValue("carbon", RbSettings::TtsVoice).toString();
SInt16 numVoices;
SInt16 voiceIndex;
error = CountVoices(&numVoices);
for(voiceIndex = 1; voiceIndex < numVoices; ++voiceIndex) {
error = GetIndVoice(voiceIndex, &vspec);
error = GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
// name is pascal string, i.e. the first byte is the length.
QString name = QString::fromLocal8Bit((const char*)&vdesc.name[1],
vdesc.name[0]);
if(name == selectedVoice) {
vspecref = &vspec;
if(vdesc.script != -1)
m_voiceScript = (CFStringBuiltInEncodings)vdesc.script;
else
m_voiceScript = (CFStringBuiltInEncodings)vdesc.reserved[0];
break;
}
}
if(voiceIndex == numVoices) {
// voice not found. Add user notification here and proceed with
// system default voice.
LOG_WARNING() << "Selected voice not found, using system default!";
GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
if(vdesc.script != -1)
m_voiceScript = (CFStringBuiltInEncodings)vdesc.script;
else
m_voiceScript = (CFStringBuiltInEncodings)vdesc.reserved[0];
}
error = NewSpeechChannel(vspecref, &m_channel);
//SetSpeechInfo(channel, soSpeechDoneCallBack, speechDone);
Fixed rate = (Fixed)(0x10000 * RbSettings::subValue("carbon",
RbSettings::TtsSpeed).toInt());
if(rate != 0)
SetSpeechRate(m_channel, rate);
Fixed pitch = (Fixed)(0x10000 * RbSettings::subValue("carbon",
RbSettings::TtsPitch).toInt());
if(pitch != 0)
SetSpeechPitch(m_channel, pitch);
return (error == 0) ? true : false;
}
bool TTSCarbon::stop(void)
{
DisposeSpeechChannel(m_channel);
return true;
}
void TTSCarbon::generateSettings(void)
{
QStringList voiceNames;
QString systemVoice;
SInt16 numVoices;
OSErr error;
VoiceSpec vspec;
VoiceDescription vdesc;
// get system voice
error = GetVoiceDescription(NULL, &vdesc, sizeof(vdesc));
systemVoice
= QString::fromLocal8Bit((const char*)&vdesc.name[1], vdesc.name[0]);
// get list of all voices
CountVoices(&numVoices);
for(SInt16 i = 1; i < numVoices; ++i) {
error = GetIndVoice(i, &vspec);
error = GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
// name is pascal string, i.e. the first byte is the length.
QString name
= QString::fromLocal8Bit((const char*)&vdesc.name[1], vdesc.name[0]);
voiceNames.append(name.trimmed());
}
// voice
EncTtsSetting* setting;
QString voice
= RbSettings::subValue("carbon", RbSettings::TtsVoice).toString();
if(voice.isEmpty())
voice = systemVoice;
setting = new EncTtsSetting(this, EncTtsSetting::eSTRINGLIST,
tr("Voice:"), voice, voiceNames, EncTtsSetting::eNOBTN);
insertSetting(ConfigVoice, setting);
// speed
int speed = RbSettings::subValue("carbon", RbSettings::TtsSpeed).toInt();
setting = new EncTtsSetting(this, EncTtsSetting::eINT,
tr("Speed (words/min):"), speed, 80, 500,
EncTtsSetting::eNOBTN);
insertSetting(ConfigSpeed, setting);
// pitch
int pitch = RbSettings::subValue("carbon", RbSettings::TtsPitch).toInt();
setting = new EncTtsSetting(this, EncTtsSetting::eINT,
tr("Pitch (0 for default):"), pitch, 0, 65,
EncTtsSetting::eNOBTN);
insertSetting(ConfigPitch, setting);
}
void TTSCarbon::saveSettings(void)
{
// save settings in user config
RbSettings::setSubValue("carbon", RbSettings::TtsVoice,
getSetting(ConfigVoice)->current().toString());
RbSettings::setSubValue("carbon", RbSettings::TtsSpeed,
getSetting(ConfigSpeed)->current().toInt());
RbSettings::setSubValue("carbon", RbSettings::TtsPitch,
getSetting(ConfigPitch)->current().toInt());
RbSettings::sync();
}
/** @brief create wav file from text using the selected TTS voice.
*/
TTSStatus TTSCarbon::voice(QString text, QString wavfile, QString* errStr)
{
TTSStatus status = NoError;
OSErr error;
char* tmpfile = NULL;
if(!wavfile.isEmpty()) {
QString aifffile = wavfile + ".aiff";
// FIXME: find out why we need to do this.
// Create a local copy of the temporary file filename.
// Not doing so causes weird issues (path contains trailing spaces)
unsigned int len = aifffile.size() + 1;
tmpfile = (char*)malloc(len * sizeof(char));
strncpy(tmpfile, aifffile.toLocal8Bit().constData(), len);
CFStringRef tmpfileref = CFStringCreateWithCString(kCFAllocatorDefault,
tmpfile, kCFStringEncodingUTF8);
CFURLRef urlref = CFURLCreateWithFileSystemPath(kCFAllocatorDefault,
tmpfileref, kCFURLPOSIXPathStyle, false);
SetSpeechInfo(m_channel, soOutputToFileWithCFURL, urlref);
}
// speak it.
// Convert the string to the encoding requested by the voice. Do this
// via CFString, as this allows to directly use the destination encoding
// as CFString uses the same values as the voice.
// allocate enough space to allow storing the string in a 2 byte encoding
unsigned int textlen = 2 * text.length() + 1;
char* textbuf = (char*)calloc(textlen, sizeof(char));
char* utf8data = (char*)text.toUtf8().constData();
int utf8bytes = text.toUtf8().size();
CFStringRef cfstring = CFStringCreateWithBytes(kCFAllocatorDefault,
(UInt8*)utf8data, utf8bytes,
kCFStringEncodingUTF8, (Boolean)false);
CFIndex usedBuf = 0;
CFRange range;
range.location = 0; // character in string to start.
range.length = text.length(); // number of _characters_ in string
// FIXME: check if converting between encodings was lossless.
CFStringGetBytes(cfstring, range, m_voiceScript, ' ',
false, (UInt8*)textbuf, textlen, &usedBuf);
error = SpeakText(m_channel, textbuf, (unsigned long)usedBuf);
while(SpeechBusy()) {
// FIXME: add small delay here to make calls less frequent
QCoreApplication::processEvents();
}
if(error != 0) {
*errStr = tr("Could not voice string");
status = FatalError;
}
free(textbuf);
CFRelease(cfstring);
if(!wavfile.isEmpty()) {
// convert the temporary aiff file to wav
if(status == NoError
&& convertAiffToWav(tmpfile, wavfile.toLocal8Bit().constData()) != 0) {
*errStr = tr("Could not convert intermediate file");
status = FatalError;
}
// remove temporary aiff file
unlink(tmpfile);
free(tmpfile);
}
return status;
}
unsigned long TTSCarbon::be2u32(unsigned char* buf)
{
return (buf[0]&0xff)<<24 | (buf[1]&0xff)<<16 | (buf[2]&0xff)<<8 | (buf[3]&0xff);
}
unsigned long TTSCarbon::be2u16(unsigned char* buf)
{
return buf[1]&0xff | (buf[0]&0xff)<<8;
}
unsigned char* TTSCarbon::u32tobuf(unsigned char* buf, uint32_t val)
{
buf[0] = val & 0xff;
buf[1] = (val>> 8) & 0xff;
buf[2] = (val>>16) & 0xff;
buf[3] = (val>>24) & 0xff;
return buf;
}
unsigned char* TTSCarbon::u16tobuf(unsigned char* buf, uint16_t val)
{
buf[0] = val & 0xff;
buf[1] = (val>> 8) & 0xff;
return buf;
}
/** @brief convert 80 bit extended ("long double") to int.
* This is simplified to handle the usual audio sample rates. Everything else
* might break. If the value isn't supported it will return 0.
* Conversion taken from Rockbox aiff codec.
*/
unsigned int TTSCarbon::extended2int(unsigned char* buf)
{
unsigned int result = 0;
/* value negative? */
if(buf[0] & 0x80)
return 0;
/* check exponent. Int can handle up to 2^31. */
int exponent = buf[0] << 8 | buf[1];
if(exponent < 0x4000 || exponent > (0x4000 + 30))
return 0;
result = ((buf[2]<<24) | (buf[3]<<16) | (buf[4]<<8) | buf[5]) + 1;
result >>= (16 + 14 - buf[1]);
return result;
}
/** @brief Convert aiff file to wav. Returns 0 on success.
*/
int TTSCarbon::convertAiffToWav(const char* aiff, const char* wav)
{
struct commchunk {
unsigned long chunksize;
unsigned short channels;
unsigned long frames;
unsigned short size;
int rate;
};
struct ssndchunk {
unsigned long chunksize;
unsigned long offset;
unsigned long blocksize;
};
FILE* in;
FILE* out;
unsigned char obuf[4];
unsigned char* buf;
/* minimum file size for a valid aiff file is 46 bytes:
* - FORM chunk: 12 bytes
* - COMM chunk: 18 bytes
* - SSND chunk: 16 bytes (with no actual data)
*/
struct stat filestat;
stat(aiff, &filestat);
if(filestat.st_size < 46)
return -1;
/* read input file into memory */
buf = (unsigned char*)malloc(filestat.st_size * sizeof(unsigned char));
if(!buf) /* error out if malloc() failed */
return -1;
in = fopen(aiff, "rb");
if(fread(buf, 1, filestat.st_size, in) < filestat.st_size) {
printf("could not read file: not enought bytes read\n");
fclose(in);
free(buf);
return -1;
}
fclose(in);
/* check input file format */
if(memcmp(buf, "FORM", 4) | memcmp(&buf[8], "AIFF", 4)) {
printf("No valid AIFF header found.\n");
free(buf);
return -1;
}
/* read COMM chunk */
unsigned char* commstart = &buf[12];
struct commchunk comm;
if(memcmp(commstart, "COMM", 4)) {
printf("COMM chunk not at beginning.\n");
free(buf);
return -1;
}
comm.chunksize = be2u32(&commstart[4]);
comm.channels = be2u16(&commstart[8]);
comm.frames = be2u32(&commstart[10]);
comm.size = be2u16(&commstart[14]);
comm.rate = extended2int(&commstart[16]);
/* find SSND as next chunk */
unsigned char* ssndstart = commstart + 8 + comm.chunksize;
while(memcmp(ssndstart, "SSND", 4) && ssndstart < (buf + filestat.st_size)) {
printf("Skipping chunk.\n");
ssndstart += be2u32(&ssndstart[4]) + 8;
}
if(ssndstart > (buf + filestat.st_size)) {
free(buf);
return -1;
}
struct ssndchunk ssnd;
ssnd.chunksize = be2u32(&ssndstart[4]);
ssnd.offset = be2u32(&ssndstart[8]);
ssnd.blocksize = be2u32(&ssndstart[12]);
/* Calculate the total length of the resulting RIFF chunk.
* The length is given by frames * samples * bytes/sample.
* We need to add:
* - 16 bytes: fmt chunk header
* - 8 bytes: data chunk header
* - 4 bytes: wave chunk identifier
*/
out = fopen(wav, "wb+");
/* write the wav header */
unsigned short blocksize = comm.channels * (comm.size >> 3);
unsigned long rifflen = blocksize * comm.frames + 28;
fwrite("RIFF", 1, 4, out);
fwrite(u32tobuf(obuf, rifflen), 1, 4, out);
fwrite("WAVE", 1, 4, out);
/* write the fmt chunk and chunk size (always 16) */
/* write fmt chunk header:
* header, size (always 0x10, format code (always 0x0001)
*/
fwrite("fmt \x10\x00\x00\x00\x01\x00", 1, 10, out);
/* number of channels (2 bytes) */
fwrite(u16tobuf(obuf, comm.channels), 1, 2, out);
/* sampling rate (4 bytes) */
fwrite(u32tobuf(obuf, comm.rate), 1, 4, out);
/* data rate, i.e. bytes/sec */
fwrite(u32tobuf(obuf, comm.rate * blocksize), 1, 4, out);
/* data block size */
fwrite(u16tobuf(obuf, blocksize), 1, 2, out);
/* bits per sample */
fwrite(u16tobuf(obuf, comm.size), 1, 2, out);
/* write the data chunk */
/* chunk id */
fwrite("data", 1, 4, out);
/* chunk size: 4 bytes. */
unsigned long cs = blocksize * comm.frames;
fwrite(u32tobuf(obuf, cs), 1, 4, out);
/* write data */
unsigned char* data = ssndstart;
unsigned long pos = ssnd.chunksize;
/* byteswap if samples are 16 bit */
if(comm.size == 16) {
while(pos) {
obuf[1] = *data++ & 0xff;
obuf[0] = *data++ & 0xff;
fwrite(obuf, 1, 2, out);
pos -= 2;
}
}
/* 8 bit samples have need no conversion so we can bulk copy.
* Everything that is not 16 bit is considered 8. */
else {
fwrite(data, 1, pos, out);
}
/* number of bytes has to be even, even if chunksize is not. */
if(cs % 2) {
fwrite(obuf, 1, 1, out);
}
fclose(out);
free(buf);
return 0;
}