rockbox/rbutil/rbutilqt/base/ttscarbon.cpp

444 lines
14 KiB
C++
Raw Normal View History

/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
*
* Copyright (C) 2010 by Dominik Riebeling
*
* All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include <QtCore>
#include "ttsbase.h"
#include "ttscarbon.h"
#include "encttssettings.h"
#include "rbsettings.h"
#include <CoreFoundation/CoreFoundation.h>
#include <ApplicationServices/ApplicationServices.h>
#include <Carbon/Carbon.h>
#include <unistd.h>
#include <sys/stat.h>
#include <inttypes.h>
#include "Logger.h"
TTSCarbon::TTSCarbon(QObject* parent) : TTSBase(parent)
{
}
TTSBase::Capabilities TTSCarbon::capabilities()
{
return TTSBase::CanSpeak;
}
bool TTSCarbon::configOk()
{
return true;
}
bool TTSCarbon::start(QString *errStr)
{
(void)errStr;
VoiceSpec vspec;
VoiceSpec* vspecref = NULL;
VoiceDescription vdesc;
OSErr error;
QString selectedVoice
= RbSettings::subValue("carbon", RbSettings::TtsVoice).toString();
SInt16 numVoices;
SInt16 voiceIndex;
error = CountVoices(&numVoices);
for(voiceIndex = 1; voiceIndex < numVoices; ++voiceIndex) {
error = GetIndVoice(voiceIndex, &vspec);
error = GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
// name is pascal string, i.e. the first byte is the length.
QString name = QString::fromLocal8Bit((const char*)&vdesc.name[1],
vdesc.name[0]);
if(name == selectedVoice) {
vspecref = &vspec;
if(vdesc.script != -1)
m_voiceScript = (CFStringBuiltInEncodings)vdesc.script;
else
m_voiceScript = (CFStringBuiltInEncodings)vdesc.reserved[0];
break;
}
}
if(voiceIndex == numVoices) {
// voice not found. Add user notification here and proceed with
// system default voice.
LOG_WARNING() << "Selected voice not found, using system default!";
GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
if(vdesc.script != -1)
m_voiceScript = (CFStringBuiltInEncodings)vdesc.script;
else
m_voiceScript = (CFStringBuiltInEncodings)vdesc.reserved[0];
}
error = NewSpeechChannel(vspecref, &m_channel);
//SetSpeechInfo(channel, soSpeechDoneCallBack, speechDone);
Fixed rate = (Fixed)(0x10000 * RbSettings::subValue("carbon",
RbSettings::TtsSpeed).toInt());
if(rate != 0)
SetSpeechRate(m_channel, rate);
Fixed pitch = (Fixed)(0x10000 * RbSettings::subValue("carbon",
RbSettings::TtsPitch).toInt());
if(pitch != 0)
SetSpeechPitch(m_channel, pitch);
return (error == 0) ? true : false;
}
bool TTSCarbon::stop(void)
{
DisposeSpeechChannel(m_channel);
return true;
}
void TTSCarbon::generateSettings(void)
{
QStringList voiceNames;
QString systemVoice;
SInt16 numVoices;
OSErr error;
VoiceSpec vspec;
VoiceDescription vdesc;
// get system voice
error = GetVoiceDescription(NULL, &vdesc, sizeof(vdesc));
systemVoice
= QString::fromLocal8Bit((const char*)&vdesc.name[1], vdesc.name[0]);
// get list of all voices
CountVoices(&numVoices);
for(SInt16 i = 1; i < numVoices; ++i) {
error = GetIndVoice(i, &vspec);
error = GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
// name is pascal string, i.e. the first byte is the length.
QString name
= QString::fromLocal8Bit((const char*)&vdesc.name[1], vdesc.name[0]);
voiceNames.append(name.trimmed());
}
// voice
EncTtsSetting* setting;
QString voice
= RbSettings::subValue("carbon", RbSettings::TtsVoice).toString();
if(voice.isEmpty())
voice = systemVoice;
setting = new EncTtsSetting(this, EncTtsSetting::eSTRINGLIST,
tr("Voice:"), voice, voiceNames, EncTtsSetting::eNOBTN);
insertSetting(ConfigVoice, setting);
// speed
int speed = RbSettings::subValue("carbon", RbSettings::TtsSpeed).toInt();
setting = new EncTtsSetting(this, EncTtsSetting::eINT,
tr("Speed (words/min):"), speed, 80, 500,
EncTtsSetting::eNOBTN);
insertSetting(ConfigSpeed, setting);
// pitch
int pitch = RbSettings::subValue("carbon", RbSettings::TtsPitch).toInt();
setting = new EncTtsSetting(this, EncTtsSetting::eINT,
tr("Pitch (0 for default):"), pitch, 0, 65,
EncTtsSetting::eNOBTN);
insertSetting(ConfigPitch, setting);
}
void TTSCarbon::saveSettings(void)
{
// save settings in user config
RbSettings::setSubValue("carbon", RbSettings::TtsVoice,
getSetting(ConfigVoice)->current().toString());
RbSettings::setSubValue("carbon", RbSettings::TtsSpeed,
getSetting(ConfigSpeed)->current().toInt());
RbSettings::setSubValue("carbon", RbSettings::TtsPitch,
getSetting(ConfigPitch)->current().toInt());
RbSettings::sync();
}
/** @brief create wav file from text using the selected TTS voice.
*/
TTSStatus TTSCarbon::voice(QString text, QString wavfile, QString* errStr)
{
TTSStatus status = NoError;
OSErr error;
char* tmpfile = NULL;
if(!wavfile.isEmpty()) {
QString aifffile = wavfile + ".aiff";
// FIXME: find out why we need to do this.
// Create a local copy of the temporary file filename.
// Not doing so causes weird issues (path contains trailing spaces)
unsigned int len = aifffile.size() + 1;
tmpfile = (char*)malloc(len * sizeof(char));
strncpy(tmpfile, aifffile.toLocal8Bit().constData(), len);
CFStringRef tmpfileref = CFStringCreateWithCString(kCFAllocatorDefault,
tmpfile, kCFStringEncodingUTF8);
CFURLRef urlref = CFURLCreateWithFileSystemPath(kCFAllocatorDefault,
tmpfileref, kCFURLPOSIXPathStyle, false);
SetSpeechInfo(m_channel, soOutputToFileWithCFURL, urlref);
}
// speak it.
// Convert the string to the encoding requested by the voice. Do this
// via CFString, as this allows to directly use the destination encoding
// as CFString uses the same values as the voice.
// allocate enough space to allow storing the string in a 2 byte encoding
unsigned int textlen = 2 * text.length() + 1;
char* textbuf = (char*)calloc(textlen, sizeof(char));
char* utf8data = (char*)text.toUtf8().constData();
int utf8bytes = text.toUtf8().size();
CFStringRef cfstring = CFStringCreateWithBytes(kCFAllocatorDefault,
(UInt8*)utf8data, utf8bytes,
kCFStringEncodingUTF8, (Boolean)false);
CFIndex usedBuf = 0;
CFRange range;
range.location = 0; // character in string to start.
range.length = text.length(); // number of _characters_ in string
// FIXME: check if converting between encodings was lossless.
CFStringGetBytes(cfstring, range, m_voiceScript, ' ',
false, (UInt8*)textbuf, textlen, &usedBuf);
error = SpeakText(m_channel, textbuf, (unsigned long)usedBuf);
while(SpeechBusy()) {
// FIXME: add small delay here to make calls less frequent
QCoreApplication::processEvents();
}
if(error != 0) {
*errStr = tr("Could not voice string");
status = FatalError;
}
free(textbuf);
CFRelease(cfstring);
if(!wavfile.isEmpty()) {
// convert the temporary aiff file to wav
if(status == NoError
&& convertAiffToWav(tmpfile, wavfile.toLocal8Bit().constData()) != 0) {
*errStr = tr("Could not convert intermediate file");
status = FatalError;
}
// remove temporary aiff file
unlink(tmpfile);
free(tmpfile);
}
return status;
}
unsigned long TTSCarbon::be2u32(unsigned char* buf)
{
return (buf[0]&0xff)<<24 | (buf[1]&0xff)<<16 | (buf[2]&0xff)<<8 | (buf[3]&0xff);
}
unsigned long TTSCarbon::be2u16(unsigned char* buf)
{
return (buf[1]&0xff) | (buf[0]&0xff)<<8;
}
unsigned char* TTSCarbon::u32tobuf(unsigned char* buf, uint32_t val)
{
buf[0] = val & 0xff;
buf[1] = (val>> 8) & 0xff;
buf[2] = (val>>16) & 0xff;
buf[3] = (val>>24) & 0xff;
return buf;
}
unsigned char* TTSCarbon::u16tobuf(unsigned char* buf, uint16_t val)
{
buf[0] = val & 0xff;
buf[1] = (val>> 8) & 0xff;
return buf;
}
/** @brief convert 80 bit extended ("long double") to int.
* This is simplified to handle the usual audio sample rates. Everything else
* might break. If the value isn't supported it will return 0.
* Conversion taken from Rockbox aiff codec.
*/
unsigned int TTSCarbon::extended2int(unsigned char* buf)
{
unsigned int result = 0;
/* value negative? */
if(buf[0] & 0x80)
return 0;
/* check exponent. Int can handle up to 2^31. */
int exponent = buf[0] << 8 | buf[1];
if(exponent < 0x4000 || exponent > (0x4000 + 30))
return 0;
result = ((buf[2]<<24) | (buf[3]<<16) | (buf[4]<<8) | buf[5]) + 1;
result >>= (16 + 14 - buf[1]);
return result;
}
/** @brief Convert aiff file to wav. Returns 0 on success.
*/
int TTSCarbon::convertAiffToWav(const char* aiff, const char* wav)
{
struct commchunk {
unsigned long chunksize;
unsigned short channels;
unsigned long frames;
unsigned short size;
int rate;
};
struct ssndchunk {
unsigned long chunksize;
unsigned long offset;
unsigned long blocksize;
};
FILE* in;
FILE* out;
unsigned char obuf[4];
unsigned char* buf;
/* minimum file size for a valid aiff file is 46 bytes:
* - FORM chunk: 12 bytes
* - COMM chunk: 18 bytes
* - SSND chunk: 16 bytes (with no actual data)
*/
struct stat filestat;
stat(aiff, &filestat);
if(filestat.st_size < 46)
return -1;
/* read input file into memory */
buf = (unsigned char*)malloc(filestat.st_size * sizeof(unsigned char));
if(!buf) /* error out if malloc() failed */
return -1;
in = fopen(aiff, "rb");
if(fread(buf, 1, filestat.st_size, in) < filestat.st_size) {
printf("could not read file: not enought bytes read\n");
fclose(in);
free(buf);
return -1;
}
fclose(in);
/* check input file format */
if(memcmp(buf, "FORM", 4) | memcmp(&buf[8], "AIFF", 4)) {
printf("No valid AIFF header found.\n");
free(buf);
return -1;
}
/* read COMM chunk */
unsigned char* commstart = &buf[12];
struct commchunk comm;
if(memcmp(commstart, "COMM", 4)) {
printf("COMM chunk not at beginning.\n");
free(buf);
return -1;
}
comm.chunksize = be2u32(&commstart[4]);
comm.channels = be2u16(&commstart[8]);
comm.frames = be2u32(&commstart[10]);
comm.size = be2u16(&commstart[14]);
comm.rate = extended2int(&commstart[16]);
/* find SSND as next chunk */
unsigned char* ssndstart = commstart + 8 + comm.chunksize;
while(memcmp(ssndstart, "SSND", 4) && ssndstart < (buf + filestat.st_size)) {
printf("Skipping chunk.\n");
ssndstart += be2u32(&ssndstart[4]) + 8;
}
if(ssndstart > (buf + filestat.st_size)) {
free(buf);
return -1;
}
struct ssndchunk ssnd;
ssnd.chunksize = be2u32(&ssndstart[4]);
ssnd.offset = be2u32(&ssndstart[8]);
ssnd.blocksize = be2u32(&ssndstart[12]);
/* Calculate the total length of the resulting RIFF chunk.
* The length is given by frames * samples * bytes/sample.
* We need to add:
* - 16 bytes: fmt chunk header
* - 8 bytes: data chunk header
* - 4 bytes: wave chunk identifier
*/
out = fopen(wav, "wb+");
/* write the wav header */
unsigned short blocksize = comm.channels * (comm.size >> 3);
unsigned long rifflen = blocksize * comm.frames + 28;
fwrite("RIFF", 1, 4, out);
fwrite(u32tobuf(obuf, rifflen), 1, 4, out);
fwrite("WAVE", 1, 4, out);
/* write the fmt chunk and chunk size (always 16) */
/* write fmt chunk header:
* header, size (always 0x10, format code (always 0x0001)
*/
fwrite("fmt \x10\x00\x00\x00\x01\x00", 1, 10, out);
/* number of channels (2 bytes) */
fwrite(u16tobuf(obuf, comm.channels), 1, 2, out);
/* sampling rate (4 bytes) */
fwrite(u32tobuf(obuf, comm.rate), 1, 4, out);
/* data rate, i.e. bytes/sec */
fwrite(u32tobuf(obuf, comm.rate * blocksize), 1, 4, out);
/* data block size */
fwrite(u16tobuf(obuf, blocksize), 1, 2, out);
/* bits per sample */
fwrite(u16tobuf(obuf, comm.size), 1, 2, out);
/* write the data chunk */
/* chunk id */
fwrite("data", 1, 4, out);
/* chunk size: 4 bytes. */
unsigned long cs = blocksize * comm.frames;
fwrite(u32tobuf(obuf, cs), 1, 4, out);
/* write data */
unsigned char* data = ssndstart;
unsigned long pos = ssnd.chunksize;
/* byteswap if samples are 16 bit */
if(comm.size == 16) {
while(pos) {
obuf[1] = *data++ & 0xff;
obuf[0] = *data++ & 0xff;
fwrite(obuf, 1, 2, out);
pos -= 2;
}
}
/* 8 bit samples have need no conversion so we can bulk copy.
* Everything that is not 16 bit is considered 8. */
else {
fwrite(data, 1, pos, out);
}
/* number of bytes has to be even, even if chunksize is not. */
if(cs % 2) {
fwrite(obuf, 1, 1, out);
}
fclose(out);
free(buf);
return 0;
}