8317226ff8
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@30961 a1c6a512-1295-4272-9138-f99709370657
442 lines
14 KiB
C++
442 lines
14 KiB
C++
/***************************************************************************
|
|
* __________ __ ___.
|
|
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
|
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
|
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
|
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
|
* \/ \/ \/ \/ \/
|
|
*
|
|
* Copyright (C) 2010 by Dominik Riebeling
|
|
*
|
|
* All files in this archive are subject to the GNU General Public License.
|
|
* See the file COPYING in the source tree root for full license agreement.
|
|
*
|
|
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
|
* KIND, either express or implied.
|
|
*
|
|
****************************************************************************/
|
|
|
|
#include <QtCore>
|
|
#include "ttsbase.h"
|
|
#include "ttscarbon.h"
|
|
#include "encttssettings.h"
|
|
#include "rbsettings.h"
|
|
|
|
#include <CoreFoundation/CoreFoundation.h>
|
|
#include <ApplicationServices/ApplicationServices.h>
|
|
#include <Carbon/Carbon.h>
|
|
#include <unistd.h>
|
|
#include <sys/stat.h>
|
|
#include <inttypes.h>
|
|
|
|
TTSCarbon::TTSCarbon(QObject* parent) : TTSBase(parent)
|
|
{
|
|
}
|
|
|
|
TTSBase::Capabilities TTSCarbon::capabilities()
|
|
{
|
|
return TTSBase::CanSpeak;
|
|
}
|
|
|
|
bool TTSCarbon::configOk()
|
|
{
|
|
return true;
|
|
}
|
|
|
|
|
|
bool TTSCarbon::start(QString *errStr)
|
|
{
|
|
(void)errStr;
|
|
VoiceSpec vspec;
|
|
VoiceSpec* vspecref = NULL;
|
|
VoiceDescription vdesc;
|
|
OSErr error;
|
|
QString selectedVoice
|
|
= RbSettings::subValue("carbon", RbSettings::TtsVoice).toString();
|
|
SInt16 numVoices;
|
|
SInt16 voiceIndex;
|
|
error = CountVoices(&numVoices);
|
|
for(voiceIndex = 1; voiceIndex < numVoices; ++voiceIndex) {
|
|
error = GetIndVoice(voiceIndex, &vspec);
|
|
error = GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
|
|
// name is pascal string, i.e. the first byte is the length.
|
|
QString name = QString::fromLocal8Bit((const char*)&vdesc.name[1],
|
|
vdesc.name[0]);
|
|
if(name == selectedVoice) {
|
|
vspecref = &vspec;
|
|
if(vdesc.script != -1)
|
|
m_voiceScript = (CFStringBuiltInEncodings)vdesc.script;
|
|
else
|
|
m_voiceScript = (CFStringBuiltInEncodings)vdesc.reserved[0];
|
|
break;
|
|
}
|
|
}
|
|
if(voiceIndex == numVoices) {
|
|
// voice not found. Add user notification here and proceed with
|
|
// system default voice.
|
|
qDebug() << "[TTSCarbon] Selected voice not found, using system default!";
|
|
GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
|
|
if(vdesc.script != -1)
|
|
m_voiceScript = (CFStringBuiltInEncodings)vdesc.script;
|
|
else
|
|
m_voiceScript = (CFStringBuiltInEncodings)vdesc.reserved[0];
|
|
}
|
|
|
|
error = NewSpeechChannel(vspecref, &m_channel);
|
|
//SetSpeechInfo(channel, soSpeechDoneCallBack, speechDone);
|
|
Fixed rate = (Fixed)(0x10000 * RbSettings::subValue("carbon",
|
|
RbSettings::TtsSpeed).toInt());
|
|
if(rate != 0)
|
|
SetSpeechRate(m_channel, rate);
|
|
|
|
Fixed pitch = (Fixed)(0x10000 * RbSettings::subValue("carbon",
|
|
RbSettings::TtsPitch).toInt());
|
|
if(pitch != 0)
|
|
SetSpeechPitch(m_channel, pitch);
|
|
|
|
return (error == 0) ? true : false;
|
|
}
|
|
|
|
|
|
bool TTSCarbon::stop(void)
|
|
{
|
|
DisposeSpeechChannel(m_channel);
|
|
return true;
|
|
}
|
|
|
|
|
|
void TTSCarbon::generateSettings(void)
|
|
{
|
|
QStringList voiceNames;
|
|
QString systemVoice;
|
|
SInt16 numVoices;
|
|
OSErr error;
|
|
VoiceSpec vspec;
|
|
VoiceDescription vdesc;
|
|
|
|
// get system voice
|
|
error = GetVoiceDescription(NULL, &vdesc, sizeof(vdesc));
|
|
systemVoice
|
|
= QString::fromLocal8Bit((const char*)&vdesc.name[1], vdesc.name[0]);
|
|
// get list of all voices
|
|
CountVoices(&numVoices);
|
|
for(SInt16 i = 1; i < numVoices; ++i) {
|
|
error = GetIndVoice(i, &vspec);
|
|
error = GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
|
|
// name is pascal string, i.e. the first byte is the length.
|
|
QString name
|
|
= QString::fromLocal8Bit((const char*)&vdesc.name[1], vdesc.name[0]);
|
|
voiceNames.append(name.trimmed());
|
|
}
|
|
// voice
|
|
EncTtsSetting* setting;
|
|
QString voice
|
|
= RbSettings::subValue("carbon", RbSettings::TtsVoice).toString();
|
|
if(voice.isEmpty())
|
|
voice = systemVoice;
|
|
setting = new EncTtsSetting(this, EncTtsSetting::eSTRINGLIST,
|
|
tr("Voice:"), voice, voiceNames, EncTtsSetting::eNOBTN);
|
|
insertSetting(ConfigVoice, setting);
|
|
|
|
// speed
|
|
int speed = RbSettings::subValue("carbon", RbSettings::TtsSpeed).toInt();
|
|
setting = new EncTtsSetting(this, EncTtsSetting::eINT,
|
|
tr("Speed (words/min):"), speed, 80, 500,
|
|
EncTtsSetting::eNOBTN);
|
|
insertSetting(ConfigSpeed, setting);
|
|
|
|
// pitch
|
|
int pitch = RbSettings::subValue("carbon", RbSettings::TtsPitch).toInt();
|
|
setting = new EncTtsSetting(this, EncTtsSetting::eINT,
|
|
tr("Pitch (0 for default):"), pitch, 0, 65,
|
|
EncTtsSetting::eNOBTN);
|
|
insertSetting(ConfigPitch, setting);
|
|
|
|
}
|
|
|
|
|
|
void TTSCarbon::saveSettings(void)
|
|
{
|
|
// save settings in user config
|
|
RbSettings::setSubValue("carbon", RbSettings::TtsVoice,
|
|
getSetting(ConfigVoice)->current().toString());
|
|
RbSettings::setSubValue("carbon", RbSettings::TtsSpeed,
|
|
getSetting(ConfigSpeed)->current().toInt());
|
|
RbSettings::setSubValue("carbon", RbSettings::TtsPitch,
|
|
getSetting(ConfigPitch)->current().toInt());
|
|
RbSettings::sync();
|
|
}
|
|
|
|
|
|
/** @brief create wav file from text using the selected TTS voice.
|
|
*/
|
|
TTSStatus TTSCarbon::voice(QString text, QString wavfile, QString* errStr)
|
|
{
|
|
TTSStatus status = NoError;
|
|
OSErr error;
|
|
|
|
char* tmpfile = NULL;
|
|
if(!wavfile.isEmpty()) {
|
|
QString aifffile = wavfile + ".aiff";
|
|
// FIXME: find out why we need to do this.
|
|
// Create a local copy of the temporary file filename.
|
|
// Not doing so causes weird issues (path contains trailing spaces)
|
|
unsigned int len = aifffile.size() + 1;
|
|
tmpfile = (char*)malloc(len * sizeof(char));
|
|
strncpy(tmpfile, aifffile.toLocal8Bit().constData(), len);
|
|
CFStringRef tmpfileref = CFStringCreateWithCString(kCFAllocatorDefault,
|
|
tmpfile, kCFStringEncodingUTF8);
|
|
CFURLRef urlref = CFURLCreateWithFileSystemPath(kCFAllocatorDefault,
|
|
tmpfileref, kCFURLPOSIXPathStyle, false);
|
|
SetSpeechInfo(m_channel, soOutputToFileWithCFURL, urlref);
|
|
}
|
|
|
|
// speak it.
|
|
// Convert the string to the encoding requested by the voice. Do this
|
|
// via CFString, as this allows to directly use the destination encoding
|
|
// as CFString uses the same values as the voice.
|
|
|
|
// allocate enough space to allow storing the string in a 2 byte encoding
|
|
unsigned int textlen = 2 * text.length() + 1;
|
|
char* textbuf = (char*)calloc(textlen, sizeof(char));
|
|
char* utf8data = (char*)text.toUtf8().constData();
|
|
int utf8bytes = text.toUtf8().size();
|
|
CFStringRef cfstring = CFStringCreateWithBytes(kCFAllocatorDefault,
|
|
(UInt8*)utf8data, utf8bytes,
|
|
kCFStringEncodingUTF8, (Boolean)false);
|
|
CFIndex usedBuf = 0;
|
|
CFRange range;
|
|
range.location = 0; // character in string to start.
|
|
range.length = text.length(); // number of _characters_ in string
|
|
// FIXME: check if converting between encodings was lossless.
|
|
CFStringGetBytes(cfstring, range, m_voiceScript, ' ',
|
|
false, (UInt8*)textbuf, textlen, &usedBuf);
|
|
|
|
error = SpeakText(m_channel, textbuf, (unsigned long)usedBuf);
|
|
while(SpeechBusy()) {
|
|
// FIXME: add small delay here to make calls less frequent
|
|
QCoreApplication::processEvents();
|
|
}
|
|
if(error != 0) {
|
|
*errStr = tr("Could not voice string");
|
|
status = FatalError;
|
|
}
|
|
free(textbuf);
|
|
CFRelease(cfstring);
|
|
|
|
if(!wavfile.isEmpty()) {
|
|
// convert the temporary aiff file to wav
|
|
if(status == NoError
|
|
&& convertAiffToWav(tmpfile, wavfile.toLocal8Bit().constData()) != 0) {
|
|
*errStr = tr("Could not convert intermediate file");
|
|
status = FatalError;
|
|
}
|
|
// remove temporary aiff file
|
|
unlink(tmpfile);
|
|
free(tmpfile);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
unsigned long TTSCarbon::be2u32(unsigned char* buf)
|
|
{
|
|
return (buf[0]&0xff)<<24 | (buf[1]&0xff)<<16 | (buf[2]&0xff)<<8 | (buf[3]&0xff);
|
|
}
|
|
|
|
|
|
unsigned long TTSCarbon::be2u16(unsigned char* buf)
|
|
{
|
|
return buf[1]&0xff | (buf[0]&0xff)<<8;
|
|
}
|
|
|
|
|
|
unsigned char* TTSCarbon::u32tobuf(unsigned char* buf, uint32_t val)
|
|
{
|
|
buf[0] = val & 0xff;
|
|
buf[1] = (val>> 8) & 0xff;
|
|
buf[2] = (val>>16) & 0xff;
|
|
buf[3] = (val>>24) & 0xff;
|
|
return buf;
|
|
}
|
|
|
|
|
|
unsigned char* TTSCarbon::u16tobuf(unsigned char* buf, uint16_t val)
|
|
{
|
|
buf[0] = val & 0xff;
|
|
buf[1] = (val>> 8) & 0xff;
|
|
return buf;
|
|
}
|
|
|
|
|
|
/** @brief convert 80 bit extended ("long double") to int.
|
|
* This is simplified to handle the usual audio sample rates. Everything else
|
|
* might break. If the value isn't supported it will return 0.
|
|
* Conversion taken from Rockbox aiff codec.
|
|
*/
|
|
unsigned int TTSCarbon::extended2int(unsigned char* buf)
|
|
{
|
|
unsigned int result = 0;
|
|
/* value negative? */
|
|
if(buf[0] & 0x80)
|
|
return 0;
|
|
/* check exponent. Int can handle up to 2^31. */
|
|
int exponent = buf[0] << 8 | buf[1];
|
|
if(exponent < 0x4000 || exponent > (0x4000 + 30))
|
|
return 0;
|
|
result = ((buf[2]<<24) | (buf[3]<<16) | (buf[4]<<8) | buf[5]) + 1;
|
|
result >>= (16 + 14 - buf[1]);
|
|
return result;
|
|
}
|
|
|
|
|
|
/** @brief Convert aiff file to wav. Returns 0 on success.
|
|
*/
|
|
int TTSCarbon::convertAiffToWav(const char* aiff, const char* wav)
|
|
{
|
|
struct commchunk {
|
|
unsigned long chunksize;
|
|
unsigned short channels;
|
|
unsigned long frames;
|
|
unsigned short size;
|
|
int rate;
|
|
};
|
|
|
|
struct ssndchunk {
|
|
unsigned long chunksize;
|
|
unsigned long offset;
|
|
unsigned long blocksize;
|
|
};
|
|
|
|
FILE* in;
|
|
FILE* out;
|
|
unsigned char obuf[4];
|
|
unsigned char* buf;
|
|
/* minimum file size for a valid aiff file is 46 bytes:
|
|
* - FORM chunk: 12 bytes
|
|
* - COMM chunk: 18 bytes
|
|
* - SSND chunk: 16 bytes (with no actual data)
|
|
*/
|
|
struct stat filestat;
|
|
stat(aiff, &filestat);
|
|
if(filestat.st_size < 46)
|
|
return -1;
|
|
/* read input file into memory */
|
|
buf = (unsigned char*)malloc(filestat.st_size * sizeof(unsigned char));
|
|
if(!buf) /* error out if malloc() failed */
|
|
return -1;
|
|
in = fopen(aiff, "rb");
|
|
if(fread(buf, 1, filestat.st_size, in) < filestat.st_size) {
|
|
printf("could not read file: not enought bytes read\n");
|
|
fclose(in);
|
|
free(buf);
|
|
return -1;
|
|
}
|
|
fclose(in);
|
|
|
|
/* check input file format */
|
|
if(memcmp(buf, "FORM", 4) | memcmp(&buf[8], "AIFF", 4)) {
|
|
printf("No valid AIFF header found.\n");
|
|
free(buf);
|
|
return -1;
|
|
}
|
|
/* read COMM chunk */
|
|
unsigned char* commstart = &buf[12];
|
|
struct commchunk comm;
|
|
if(memcmp(commstart, "COMM", 4)) {
|
|
printf("COMM chunk not at beginning.\n");
|
|
free(buf);
|
|
return -1;
|
|
}
|
|
comm.chunksize = be2u32(&commstart[4]);
|
|
comm.channels = be2u16(&commstart[8]);
|
|
comm.frames = be2u32(&commstart[10]);
|
|
comm.size = be2u16(&commstart[14]);
|
|
comm.rate = extended2int(&commstart[16]);
|
|
|
|
/* find SSND as next chunk */
|
|
unsigned char* ssndstart = commstart + 8 + comm.chunksize;
|
|
while(memcmp(ssndstart, "SSND", 4) && ssndstart < (buf + filestat.st_size)) {
|
|
printf("Skipping chunk.\n");
|
|
ssndstart += be2u32(&ssndstart[4]) + 8;
|
|
}
|
|
if(ssndstart > (buf + filestat.st_size)) {
|
|
free(buf);
|
|
return -1;
|
|
}
|
|
|
|
struct ssndchunk ssnd;
|
|
ssnd.chunksize = be2u32(&ssndstart[4]);
|
|
ssnd.offset = be2u32(&ssndstart[8]);
|
|
ssnd.blocksize = be2u32(&ssndstart[12]);
|
|
|
|
/* Calculate the total length of the resulting RIFF chunk.
|
|
* The length is given by frames * samples * bytes/sample.
|
|
* We need to add:
|
|
* - 16 bytes: fmt chunk header
|
|
* - 8 bytes: data chunk header
|
|
* - 4 bytes: wave chunk identifier
|
|
*/
|
|
out = fopen(wav, "wb+");
|
|
|
|
/* write the wav header */
|
|
unsigned short blocksize = comm.channels * (comm.size >> 3);
|
|
unsigned long rifflen = blocksize * comm.frames + 28;
|
|
fwrite("RIFF", 1, 4, out);
|
|
fwrite(u32tobuf(obuf, rifflen), 1, 4, out);
|
|
fwrite("WAVE", 1, 4, out);
|
|
|
|
/* write the fmt chunk and chunk size (always 16) */
|
|
/* write fmt chunk header:
|
|
* header, size (always 0x10, format code (always 0x0001)
|
|
*/
|
|
fwrite("fmt \x10\x00\x00\x00\x01\x00", 1, 10, out);
|
|
/* number of channels (2 bytes) */
|
|
fwrite(u16tobuf(obuf, comm.channels), 1, 2, out);
|
|
/* sampling rate (4 bytes) */
|
|
fwrite(u32tobuf(obuf, comm.rate), 1, 4, out);
|
|
|
|
/* data rate, i.e. bytes/sec */
|
|
fwrite(u32tobuf(obuf, comm.rate * blocksize), 1, 4, out);
|
|
|
|
/* data block size */
|
|
fwrite(u16tobuf(obuf, blocksize), 1, 2, out);
|
|
|
|
/* bits per sample */
|
|
fwrite(u16tobuf(obuf, comm.size), 1, 2, out);
|
|
|
|
/* write the data chunk */
|
|
/* chunk id */
|
|
fwrite("data", 1, 4, out);
|
|
/* chunk size: 4 bytes. */
|
|
unsigned long cs = blocksize * comm.frames;
|
|
fwrite(u32tobuf(obuf, cs), 1, 4, out);
|
|
|
|
/* write data */
|
|
unsigned char* data = ssndstart;
|
|
unsigned long pos = ssnd.chunksize;
|
|
/* byteswap if samples are 16 bit */
|
|
if(comm.size == 16) {
|
|
while(pos) {
|
|
obuf[1] = *data++ & 0xff;
|
|
obuf[0] = *data++ & 0xff;
|
|
fwrite(obuf, 1, 2, out);
|
|
pos -= 2;
|
|
}
|
|
}
|
|
/* 8 bit samples have need no conversion so we can bulk copy.
|
|
* Everything that is not 16 bit is considered 8. */
|
|
else {
|
|
fwrite(data, 1, pos, out);
|
|
}
|
|
/* number of bytes has to be even, even if chunksize is not. */
|
|
if(cs % 2) {
|
|
fwrite(obuf, 1, 1, out);
|
|
}
|
|
|
|
fclose(out);
|
|
free(buf);
|
|
return 0;
|
|
}
|
|
|