rockbox/rbutil/rbutilqt/base/ttscarbon.cpp

/***************************************************************************
 *             __________               __   ___.
 *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
 *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
 *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
 *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
 *                     \/            \/     \/    \/            \/
 *
 *   Copyright (C) 2010 by Dominik Riebeling
 *
 * All files in this archive are subject to the GNU General Public License.
 * See the file COPYING in the source tree root for full license agreement.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 ****************************************************************************/

#include <QtCore>
#include "ttsbase.h"
#include "ttscarbon.h"
#include "encttssettings.h"
#include "rbsettings.h"

#include <CoreFoundation/CoreFoundation.h>
#include <ApplicationServices/ApplicationServices.h>
#include <Carbon/Carbon.h>
#include <unistd.h>
#include <sys/stat.h>
#include <inttypes.h>
#include "Logger.h"

TTSCarbon::TTSCarbon(QObject* parent) : TTSBase(parent)
{
}

TTSBase::Capabilities TTSCarbon::capabilities()
{
    return TTSBase::CanSpeak;
}

bool TTSCarbon::configOk()
{
    return true;
}


bool TTSCarbon::start(QString *errStr)
{
    (void)errStr;
    VoiceSpec vspec;
    VoiceSpec* vspecref = NULL;
    VoiceDescription vdesc;
    OSErr error;
    QString selectedVoice
            = RbSettings::subValue("carbon", RbSettings::TtsVoice).toString();
    SInt16 numVoices;
    SInt16 voiceIndex;
    error = CountVoices(&numVoices);
    for(voiceIndex = 1; voiceIndex < numVoices; ++voiceIndex) {
        error = GetIndVoice(voiceIndex, &vspec);
        error = GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
        // name is pascal string, i.e. the first byte is the length.
        QString name = QString::fromLocal8Bit((const char*)&vdesc.name[1],
                                              vdesc.name[0]);
        if(name == selectedVoice) {
            vspecref = &vspec;
            if(vdesc.script != -1)
                m_voiceScript = (CFStringBuiltInEncodings)vdesc.script;
            else
                m_voiceScript = (CFStringBuiltInEncodings)vdesc.reserved[0];
            break;
        }
    }
    if(voiceIndex == numVoices) {
        // voice not found. Add user notification here and proceed with
        // system default voice.
        LOG_WARNING() << "Selected voice not found, using system default!";
        GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
        if(vdesc.script != -1)
            m_voiceScript = (CFStringBuiltInEncodings)vdesc.script;
        else
            m_voiceScript = (CFStringBuiltInEncodings)vdesc.reserved[0];
    }

    error = NewSpeechChannel(vspecref, &m_channel);
    //SetSpeechInfo(channel, soSpeechDoneCallBack, speechDone);
    Fixed rate = (Fixed)(0x10000 * RbSettings::subValue("carbon",
                                    RbSettings::TtsSpeed).toInt());
    if(rate != 0)
        SetSpeechRate(m_channel, rate);

    Fixed pitch = (Fixed)(0x10000 * RbSettings::subValue("carbon",
                                    RbSettings::TtsPitch).toInt());
    if(pitch != 0)
        SetSpeechPitch(m_channel, pitch);

    return (error == 0) ? true : false;
}


bool TTSCarbon::stop(void)
{
    DisposeSpeechChannel(m_channel);
    return true;
}


void TTSCarbon::generateSettings(void)
{
    QStringList voiceNames;
    QString systemVoice;
    SInt16 numVoices;
    OSErr error;
    VoiceSpec vspec;
    VoiceDescription vdesc;

    // get system voice
    error = GetVoiceDescription(NULL, &vdesc, sizeof(vdesc));
    systemVoice
            = QString::fromLocal8Bit((const char*)&vdesc.name[1], vdesc.name[0]);
    // get list of all voices
    CountVoices(&numVoices);
    for(SInt16 i = 1; i < numVoices; ++i) {
        error = GetIndVoice(i, &vspec);
        error = GetVoiceDescription(&vspec, &vdesc, sizeof(vdesc));
        // name is pascal string, i.e. the first byte is the length.
        QString name
            = QString::fromLocal8Bit((const char*)&vdesc.name[1], vdesc.name[0]);
        voiceNames.append(name.trimmed());
    }
    // voice
    EncTtsSetting* setting;
    QString voice
        = RbSettings::subValue("carbon", RbSettings::TtsVoice).toString();
    if(voice.isEmpty())
        voice = systemVoice;
    setting = new EncTtsSetting(this, EncTtsSetting::eSTRINGLIST,
        tr("Voice:"), voice, voiceNames, EncTtsSetting::eNOBTN);
    insertSetting(ConfigVoice, setting);

    // speed
    int speed = RbSettings::subValue("carbon", RbSettings::TtsSpeed).toInt();
    setting = new EncTtsSetting(this, EncTtsSetting::eINT,
                                tr("Speed (words/min):"), speed, 80, 500,
                                EncTtsSetting::eNOBTN);
    insertSetting(ConfigSpeed, setting);

    // pitch
    int pitch = RbSettings::subValue("carbon", RbSettings::TtsPitch).toInt();
    setting = new EncTtsSetting(this, EncTtsSetting::eINT,
                                tr("Pitch (0 for default):"), pitch, 0, 65,
                                EncTtsSetting::eNOBTN);
    insertSetting(ConfigPitch, setting);

}


void TTSCarbon::saveSettings(void)
{
    // save settings in user config
    RbSettings::setSubValue("carbon", RbSettings::TtsVoice,
                            getSetting(ConfigVoice)->current().toString());
    RbSettings::setSubValue("carbon", RbSettings::TtsSpeed,
                            getSetting(ConfigSpeed)->current().toInt());
    RbSettings::setSubValue("carbon", RbSettings::TtsPitch,
                            getSetting(ConfigPitch)->current().toInt());
    RbSettings::sync();
}


/** @brief create wav file from text using the selected TTS voice.
  */
TTSStatus TTSCarbon::voice(QString text, QString wavfile, QString* errStr)
{
    TTSStatus status = NoError;
    OSErr error;

    char* tmpfile = NULL;
    if(!wavfile.isEmpty()) {
        QString aifffile = wavfile + ".aiff";
        // FIXME: find out why we need to do this.
        // Create a local copy of the temporary file filename.
        // Not doing so causes weird issues (path contains trailing spaces)
        unsigned int len = aifffile.size() + 1;
        tmpfile = (char*)malloc(len * sizeof(char));
        strncpy(tmpfile, aifffile.toLocal8Bit().constData(), len);
        CFStringRef tmpfileref = CFStringCreateWithCString(kCFAllocatorDefault,
                                                           tmpfile, kCFStringEncodingUTF8);
        CFURLRef urlref = CFURLCreateWithFileSystemPath(kCFAllocatorDefault,
                                                        tmpfileref, kCFURLPOSIXPathStyle, false);
        SetSpeechInfo(m_channel, soOutputToFileWithCFURL, urlref);
    }

    // speak it.
    // Convert the string to the encoding requested by the voice. Do this
    // via CFString, as this allows to directly use the destination encoding
    // as CFString uses the same values as the voice.

    // allocate enough space to allow storing the string in a 2 byte encoding
    unsigned int textlen = 2 * text.length() + 1;
    char* textbuf = (char*)calloc(textlen, sizeof(char));
    char* utf8data = (char*)text.toUtf8().constData();
    int utf8bytes = text.toUtf8().size();
    CFStringRef cfstring = CFStringCreateWithBytes(kCFAllocatorDefault,
                            (UInt8*)utf8data, utf8bytes,
                            kCFStringEncodingUTF8, (Boolean)false);
    CFIndex usedBuf = 0;
    CFRange range;
    range.location = 0; // character in string to start.
    range.length = text.length(); // number of _characters_ in string
    // FIXME: check if converting between encodings was lossless.
    CFStringGetBytes(cfstring, range, m_voiceScript, ' ',
                     false, (UInt8*)textbuf, textlen, &usedBuf);

    error = SpeakText(m_channel, textbuf, (unsigned long)usedBuf);
    while(SpeechBusy()) {
        // FIXME: add small delay here to make calls less frequent
        QCoreApplication::processEvents();
    }
    if(error != 0) {
        *errStr = tr("Could not voice string");
        status = FatalError;
    }
    free(textbuf);
    CFRelease(cfstring);

    if(!wavfile.isEmpty()) {
        // convert the temporary aiff file to wav
        if(status == NoError
            && convertAiffToWav(tmpfile, wavfile.toLocal8Bit().constData()) != 0) {
            *errStr = tr("Could not convert intermediate file");
            status = FatalError;
        }
        // remove temporary aiff file
        unlink(tmpfile);
        free(tmpfile);
    }

    return status;
}


unsigned long TTSCarbon::be2u32(unsigned char* buf)
{
    return (buf[0]&0xff)<<24 | (buf[1]&0xff)<<16 | (buf[2]&0xff)<<8 | (buf[3]&0xff);
}


unsigned long TTSCarbon::be2u16(unsigned char* buf)
{
    return buf[1]&0xff | (buf[0]&0xff)<<8;
}


unsigned char* TTSCarbon::u32tobuf(unsigned char* buf, uint32_t val)
{
    buf[0] =  val      & 0xff;
    buf[1] = (val>> 8) & 0xff;
    buf[2] = (val>>16) & 0xff;
    buf[3] = (val>>24) & 0xff;
    return buf;
}


unsigned char* TTSCarbon::u16tobuf(unsigned char* buf, uint16_t val)
{
    buf[0] =  val      & 0xff;
    buf[1] = (val>> 8) & 0xff;
    return buf;
}


/** @brief convert 80 bit extended ("long double") to int.
 *  This is simplified to handle the usual audio sample rates. Everything else
 *  might break. If the value isn't supported it will return 0.
 *  Conversion taken from Rockbox aiff codec.
 */
unsigned int TTSCarbon::extended2int(unsigned char* buf)
{
    unsigned int result = 0;
    /* value negative? */
    if(buf[0] & 0x80)
        return 0;
    /* check exponent. Int can handle up to 2^31. */
    int exponent = buf[0] << 8 | buf[1];
    if(exponent < 0x4000 || exponent > (0x4000 + 30))
        return 0;
    result = ((buf[2]<<24) | (buf[3]<<16) | (buf[4]<<8) | buf[5]) + 1;
    result >>= (16 + 14 - buf[1]);
    return result;
}


/** @brief Convert aiff file to wav. Returns 0 on success.
  */
int TTSCarbon::convertAiffToWav(const char* aiff, const char* wav)
{
    struct commchunk {
        unsigned long chunksize;
        unsigned short channels;
        unsigned long frames;
        unsigned short size;
        int rate;
    };

    struct ssndchunk {
        unsigned long chunksize;
        unsigned long offset;
        unsigned long blocksize;
    };

    FILE* in;
    FILE* out;
    unsigned char obuf[4];
    unsigned char* buf;
    /* minimum file size for a valid aiff file is 46 bytes:
     * - FORM chunk: 12 bytes
     * - COMM chunk: 18 bytes
     * - SSND chunk: 16 bytes (with no actual data)
     */
    struct stat filestat;
    stat(aiff, &filestat);
    if(filestat.st_size < 46)
        return -1;
    /* read input file into memory */
    buf = (unsigned char*)malloc(filestat.st_size * sizeof(unsigned char));
    if(!buf) /* error out if malloc() failed */
        return -1;
    in = fopen(aiff, "rb");
    if(fread(buf, 1, filestat.st_size, in) < filestat.st_size) {
        printf("could not read file: not enought bytes read\n");
        fclose(in);
        free(buf);
        return -1;
    }
    fclose(in);

    /* check input file format */
    if(memcmp(buf, "FORM", 4) | memcmp(&buf[8], "AIFF", 4)) {
        printf("No valid AIFF header found.\n");
        free(buf);
        return -1;
    }
    /* read COMM chunk */
    unsigned char* commstart = &buf[12];
    struct commchunk comm;
    if(memcmp(commstart, "COMM", 4)) {
        printf("COMM chunk not at beginning.\n");
        free(buf);
        return -1;
    }
    comm.chunksize = be2u32(&commstart[4]);
    comm.channels  = be2u16(&commstart[8]);
    comm.frames    = be2u32(&commstart[10]);
    comm.size      = be2u16(&commstart[14]);
    comm.rate      = extended2int(&commstart[16]);

    /* find SSND as next chunk */
    unsigned char* ssndstart = commstart + 8 + comm.chunksize;
    while(memcmp(ssndstart, "SSND", 4) && ssndstart < (buf + filestat.st_size)) {
        printf("Skipping chunk.\n");
        ssndstart += be2u32(&ssndstart[4]) + 8;
    }
    if(ssndstart > (buf + filestat.st_size)) {
        free(buf);
        return -1;
    }

    struct ssndchunk ssnd;
    ssnd.chunksize = be2u32(&ssndstart[4]);
        ssnd.offset    = be2u32(&ssndstart[8]);
        ssnd.blocksize = be2u32(&ssndstart[12]);

    /* Calculate the total length of the resulting RIFF chunk.
     * The length is given by frames * samples * bytes/sample.
     * We need to add:
     * - 16 bytes: fmt chunk header
     * -  8 bytes: data chunk header
     * -  4 bytes: wave chunk identifier
     */
    out = fopen(wav, "wb+");

    /* write the wav header */
    unsigned short blocksize = comm.channels * (comm.size >> 3);
    unsigned long rifflen = blocksize * comm.frames + 28;
    fwrite("RIFF", 1, 4, out);
    fwrite(u32tobuf(obuf, rifflen), 1, 4, out);
    fwrite("WAVE", 1, 4, out);

    /* write the fmt chunk and chunk size (always 16) */
    /* write fmt chunk header:
     * header, size (always 0x10, format code (always 0x0001)
     */
    fwrite("fmt \x10\x00\x00\x00\x01\x00", 1, 10, out);
    /* number of channels (2 bytes) */
    fwrite(u16tobuf(obuf, comm.channels), 1, 2, out);
    /* sampling rate (4 bytes) */
    fwrite(u32tobuf(obuf, comm.rate), 1, 4, out);

    /* data rate, i.e. bytes/sec */
    fwrite(u32tobuf(obuf, comm.rate * blocksize), 1, 4, out);

    /* data block size */
    fwrite(u16tobuf(obuf, blocksize), 1, 2, out);

    /* bits per sample */
    fwrite(u16tobuf(obuf, comm.size), 1, 2, out);

    /* write the data chunk */
    /* chunk id */
    fwrite("data", 1, 4, out);
    /* chunk size: 4 bytes. */
    unsigned long cs = blocksize * comm.frames;
    fwrite(u32tobuf(obuf, cs), 1, 4, out);

    /* write data */
    unsigned char* data = ssndstart;
    unsigned long pos = ssnd.chunksize;
    /* byteswap if samples are 16 bit */
    if(comm.size == 16) {
        while(pos) {
            obuf[1] = *data++ & 0xff;
            obuf[0] = *data++ & 0xff;
            fwrite(obuf, 1, 2, out);
            pos -= 2;
        }
    }
    /* 8 bit samples have need no conversion so we can bulk copy.
     * Everything that is not 16 bit is considered 8. */
    else {
        fwrite(data, 1, pos, out);
    }
    /* number of bytes has to be even, even if chunksize is not. */
    if(cs % 2) {
        fwrite(obuf, 1, 1, out);
    }

    fclose(out);
    free(buf);
    return 0;
}