SAPI voice file generation: * Added language matching and voice selection (use TTS option /voice:<name> to select; voices with spaces in the name need the options quotes). * Added SAPI4 support (use TTS option /sapi4; voice file generation takes hours...)

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@14587 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Jens Arnold 2007-09-02 22:32:34 +00:00
parent 09d66ac456
commit 5dbea469a6
4 changed files with 300 additions and 113 deletions

18
tools/configure vendored
View file

@ -378,20 +378,20 @@ voiceconfig () {
fi
# Allow SAPI if Windows is in use
if [ -f "`which winver`" ]; then
SAPI5="(S)API5 "
SAPI5_OPTS=""
DEFAULT_TTS="sapi5"
DEFAULT_TTS_OPTS=$SAPI5_OPTS
SAPI="(S)API "
SAPI_OPTS=""
DEFAULT_TTS="sapi"
DEFAULT_TTS_OPTS=$SAPI_OPTS
DEFAULT_NOISEFLOOR="500"
DEFAULT_CHOICE="S"
fi
if [ "$FESTIVAL" = "$FLITE" ] && [ "$FLITE" = "$ESPEAK" ] && [ "$ESPEAK" = "$SAPI5" ] && [ "$SAPI5" = "$SWIFT" ]; then
echo "You need Festival, eSpeak or Flite in your path, or SAPI5 available to build voice files"
if [ "$FESTIVAL" = "$FLITE" ] && [ "$FLITE" = "$ESPEAK" ] && [ "$ESPEAK" = "$SAPI" ] && [ "$SAPI" = "$SWIFT" ]; then
echo "You need Festival, eSpeak or Flite in your path, or SAPI available to build voice files"
exit
fi
echo "TTS engine to use: ${FLITE}${FESTIVAL}${ESPEAK}${SAPI5}${SWIFT}(${DEFAULT_CHOICE})?"
echo "TTS engine to use: ${FLITE}${FESTIVAL}${ESPEAK}${SAPI}${SWIFT}(${DEFAULT_CHOICE})?"
option=`input`
case "$option" in
[Ll])
@ -410,9 +410,9 @@ voiceconfig () {
TTS_OPTS=$FESTIVAL_OPTS
;;
[Ss])
TTS_ENGINE="sapi5"
TTS_ENGINE="sapi"
NOISEFLOOR="500"
TTS_OPTS=$SAPI5_OPTS
TTS_OPTS=$SAPI_OPTS
;;
[Ww])
TTS_ENGINE="swift"

View file

@ -1,94 +0,0 @@
'***************************************************************************
' __________ __ ___.
' Open \______ \ ____ ____ | | _\_ |__ _______ ___
' Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
' Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
' Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
' \/ \/ \/ \/ \/
' $Id: sapi5_voice.vbs$
'
' Copyright (C) 2007 Steve Bavin, Jens Arnold, Mesar Hameed
'
' All files in this archive are subject to the GNU General Public License.
' See the file COPYING in the source tree root for full license agreement.
'
' This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
' KIND, either express or implied.
'
'***************************************************************************
' Purpose: Make a voice clip file for the given text on stdin
'To be done:
' - Allow user to override voice, speed and/or format (currently uses Control Panel defaults for voice/speed)
Option Explicit
Const SSFMCreateForWrite = 3
' Audio formats for SAPI5 filestream object
Const SPSF_8kHz16BitMono = 6
Const SPSF_11kHz16BitMono = 10
Const SPSF_12kHz16BitMono = 14
Const SPSF_16kHz16BitMono = 18
Const SPSF_22kHz16BitMono = 22
Const SPSF_24kHz16BitMono = 26
Const SPSF_32kHz16BitMono = 30
Const SPSF_44kHz16BitMono = 34
Const SPSF_48kHz16BitMono = 38
Dim oShell, oEnv
Dim oSpVoice, oSpFS ' SAPI5 voice and filestream
Dim aLine, aData ' used in command reading
Dim nAudioFormat
Dim bVerbose
On Error Resume Next
nAudioFormat = SPSF_22kHz16BitMono 'Audio format to use, recommended settings:
'- for AT&T natural voices, use SPSF_32kHz16BitMono
'- for MS voices, use SPSF_22kHz16BitMono
Set oShell = CreateObject("WScript.Shell")
Set oEnv = oShell.Environment("Process")
bVerbose = (oEnv("V") <> "")
Set oSpVoice = CreateObject("SAPI.SpVoice")
If Err.Number <> 0 Then
WScript.StdErr.WriteLine "Error - could not get SpVoice object. " & _
"SAPI 5 not installed?"
Err.Clear
WScript.Quit 1
End If
Set oSpFS = CreateObject("SAPI.SpFileStream")
oSpFS.Format.Type = nAudioFormat
On Error Goto 0
Do
aLine = Split(WScript.StdIn.ReadLine, vbTab, 2)
If Err.Number <> 0 Then
WScript.StdErr.WriteLine "Error " & Err.Number & ": " & Err.Description
WScript.Quit 1
End If
Select Case aLine(0) ' command
Case "SPEAK"
aData = Split(aLine(1), vbTab, 2)
If bVerbose Then WScript.StdErr.WriteLine "Saying " & aData(1) _
& " in " & aData(0)
oSpFS.Open aData(0), SSFMCreateForWrite, false
Set oSpVoice.AudioOutputStream = oSpFS
oSpVoice.Speak aData(1)
oSpFS.Close
Case "EXEC"
If bVerbose Then WScript.StdErr.WriteLine "> " & aLine(1)
oShell.Run aLine(1), 0, true
Case "SYNC"
If bVerbose Then WScript.StdErr.WriteLine "Syncing"
WScript.StdOut.WriteLine aLine(1) ' Just echo what was passed
Case "QUIT"
If bVerbose Then WScript.StdErr.WriteLine "Quitting"
WScript.Quit 0
End Select
Loop

281
tools/sapi_voice.vbs Executable file
View file

@ -0,0 +1,281 @@
'***************************************************************************
' __________ __ ___.
' Open \______ \ ____ ____ | | _\_ |__ _______ ___
' Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
' Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
' Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
' \/ \/ \/ \/ \/
' $Id: sapi5_voice.vbs$
'
' Copyright (C) 2007 Steve Bavin, Jens Arnold, Mesar Hameed
'
' All files in this archive are subject to the GNU General Public License.
' See the file COPYING in the source tree root for full license agreement.
'
' This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
' KIND, either express or implied.
'
'***************************************************************************
Option Explicit
Const SSFMCreateForWrite = 3
' Audio formats for SAPI5 filestream object
Const SPSF_8kHz16BitMono = 6
Const SPSF_11kHz16BitMono = 10
Const SPSF_12kHz16BitMono = 14
Const SPSF_16kHz16BitMono = 18
Const SPSF_22kHz16BitMono = 22
Const SPSF_24kHz16BitMono = 26
Const SPSF_32kHz16BitMono = 30
Const SPSF_44kHz16BitMono = 34
Const SPSF_48kHz16BitMono = 38
Dim oShell, oArgs, oEnv
Dim bVerbose, bSAPI4
Dim sLanguage, sVoice
Dim oSpVoice, oSpFS ' SAPI5 voice and filestream
Dim oTTS, nMode ' SAPI4 TTS object, mode selector
Dim aLangIDs, sLangID, sSelectString
Dim aLine, aData ' used in command reading
On Error Resume Next
Set oShell = CreateObject("WScript.Shell")
Set oEnv = oShell.Environment("Process")
bVerbose = (oEnv("V") <> "")
Set oArgs = WScript.Arguments.Named
bSAPI4 = oArgs.Exists("sapi4")
sLanguage = oArgs.Item("language")
sVoice = oArgs.Item("voice")
If bSAPI4 Then
' Create SAPI4 ActiveVoice object
Set oTTS = WScript.CreateObject("ActiveVoice.ActiveVoice", "TTS_")
If Err.Number <> 0 Then
Err.Clear
Set oTTS = WScript.CreateObject("ActiveVoice.ActiveVoice.1", "TTS_")
If Err.Number <> 0 Then
WScript.StdErr.WriteLine "Error - could not get ActiveVoice" _
& " object. SAPI 4 not installed?"
WScript.Quit 1
End If
End If
oTTS.Initialized = 1
' Select matching voice
aLangIDs = LangIDs(sLanguage)
For Each sLangID in aLangIDs
sLangID = HexToDec(sLangID) ' SAPI4 wants it decimal
sSelectString = "LanguageID=" & sLangID
If sVoice <> "" Then
sSelectString = sSelectString & ";Speaker=" & sVoice _
& ";ModeName=" & sVoice
End If
nMode = oTTS.Find(sSelectString)
If oTTS.LanguageID(nMode) = sLangID And (sVoice = "" Or _
oTTS.Speaker(nMode) = sVoice Or oTTS.ModeName(nMode) = sVoice) Then
If bVerbose Then WScript.StdErr.WriteLine "Using " & sSelectString
Exit For
Else
sSelectString = ""
End If
Next
If sSelectString = "" Then
WScript.StdErr.WriteLine "Error - found no matching voice for " _
& sLanguage & ", " & sVoice
WScript.Quit 1
End If
oTTS.Select nMode
Else ' SAPI5
' Create SAPI5 object
Set oSpVoice = CreateObject("SAPI.SpVoice")
If Err.Number <> 0 Then
WScript.StdErr.WriteLine "Error - could not get SpVoice object." _
& " SAPI 5 not installed?"
WScript.Quit 1
End If
' Select matching voice
aLangIDs = LangIDs(sLanguage)
For Each sLangID in aLangIDs
sSelectString = "Language=" & sLangID
If sVoice <> "" Then
sSelectString = sSelectString & ";Name=" & sVoice
End If
Set oSpVoice.Voice = oSpVoice.GetVoices(sSelectString).Item(0)
If Err.Number = 0 Then
If bVerbose Then WScript.StdErr.WriteLine "Using " & sSelectString
Exit For
Else
sSelectString = ""
Err.Clear
End If
Next
If sSelectString = "" Then
WScript.StdErr.WriteLine "Error - found no matching voice for " _
& sLanguage & ", " & sVoice
WScript.Quit 1
End If
' Filestream object for output
Set oSpFS = CreateObject("SAPI.SpFileStream")
oSpFS.Format.Type = AudioFormat(oSpVoice.Voice.GetAttribute("Vendor"))
End If
Do
aLine = Split(WScript.StdIn.ReadLine, vbTab, 2)
If Err.Number <> 0 Then
WScript.StdErr.WriteLine "Error " & Err.Number & ": " & Err.Description
WScript.Quit 1
End If
Select Case aLine(0) ' command
Case "SPEAK"
aData = Split(aLine(1), vbTab, 2)
If bVerbose Then WScript.StdErr.WriteLine "Saying " & aData(1) _
& " in " & aData(0)
If bSAPI4 Then
oTTS.FileName = aData(0)
oTTS.Speak aData(1)
While oTTS.Speaking
WScript.Sleep 100
Wend
oTTS.FileName = ""
Else
oSpFS.Open aData(0), SSFMCreateForWrite, false
Set oSpVoice.AudioOutputStream = oSpFS
oSpVoice.Speak aData(1)
oSpFS.Close
End If
Case "EXEC"
If bVerbose Then WScript.StdErr.WriteLine "> " & aLine(1)
oShell.Run aLine(1), 0, true
Case "SYNC"
If bVerbose Then WScript.StdErr.WriteLine "Syncing"
WScript.StdOut.WriteLine aLine(1) ' Just echo what was passed
Case "QUIT"
If bVerbose Then WScript.StdErr.WriteLine "Quitting"
WScript.Quit 0
End Select
Loop
' Subroutines
' -----------
' SAPI5 output format selection based on engine
Function AudioFormat(sVendor)
Select Case sVendor
Case "Microsoft"
AudioFormat = SPSF_22kHz16BitMono
Case "AT&T Labs"
AudioFormat = SPSF_32kHz16BitMono
Case Else
AudioFormat = SPSF_22kHz16BitMono
WScript.StdErr.WriteLine "Warning - unknown vendor """ & sVendor _
& """ - using default wave format"
End Select
End Function
Function HexToDec(sHex)
Dim i, nDig
HexToDec = 0
For i = 1 To Len(sHex)
nDig = InStr("0123456789abcdef", LCase(Mid(sHex, i, 1))) - 1
HexToDec = 16 * HexToDec + nDig
Next
End Function
' Language mapping rockbox->windows (hex strings as needed by SAPI)
Function LangIDs(sLanguage)
Dim aIDs
Select Case sLanguage
Case "afrikaans"
LangIDs = Array("436")
Case "bulgarian"
LangIDs = Array("402")
Case "catala"
LangIDs = Array("403")
Case "chinese-simp"
LangIDs = Array("804") ' PRC
Case "chinese-trad"
LangIDs = Array("404") ' Taiwan. Perhaps also Hong Kong, Singapore, Macau?
Case "czech"
LangIDs = Array("405")
Case "dansk"
LangIDs = Array("406")
Case "deutsch"
LangIDs = Array("407", "c07", "1007", "1407")
' Standard, Austrian, Luxembourg, Liechtenstein (Swiss -> wallisertitsch)
Case "eesti"
LangIDs = Array("425")
Case "english"
LangIDs = Array("809", "409", "c09", "1009", "1409", "1809", _
"1c09", "2009", "2409", "2809", "2c09", "3009", _
"3409")
' Britsh, American, Australian, Canadian, New Zealand, Ireland,
' South Africa, Jamaika, Caribbean, Belize, Trinidad, Zimbabwe,
' Philippines
Case "espanol"
LangIDs = Array("40a", "c0a", "80a", "100a", "140a", "180a", _
"1c0a", "200a", "240a", "280a", "2c0a", "300a", _
"340a", "380a", "3c0a", "400a", "440a", "480a", _
"4c0a", "500a")
' trad. sort., mordern sort., Mexican, Guatemala, Costa Rica,
' Panama, Dominican Republic, Venezuela, Colombia, Peru, Argentina,
' Ecuador, Chile, Uruguay, Paraguay, Bolivia, El Salvador,
' Honduras, Nicaragua, Puerto Rico
Case "esperanto"
WScript.StdErr.WriteLine "Error: no esperanto support in Windows"
WScript.Quit 1
Case "finnish"
LangIDs = Array("40b")
Case "francais"
LangIDs = Array("40c", "80c", "c0c", "100c", "140c", "180c")
' Standard, Belgian, Canadian, Swiss, Luxembourg, Monaco
Case "galego"
LangIDs = Array("456")
Case "greek"
LangIDs = Array("408")
Case "hebrew"
LangIDs = Array("40d")
Case "islenska"
LangIDs = Array("40f")
Case "italiano"
LangIDs = Array("410", "810") ' Standard, Swiss
Case "japanese"
LangIDs = Array("411")
Case "korean"
LangIDs = Array("412")
Case "magyar"
LangIDs = Array("40e")
Case "nederlands"
LangIDs = Array("413", "813") ' Standard, Belgian
Case "norsk"
LangIDs = Array("414") ' Bokmal
Case "norsk-nynorsk"
LangIDs = Array("814")
Case "polski"
LangIDs = Array("415")
Case "portugues"
LangIDs = Array("816", "416") ' Standard, Brazilian
Case "romaneste"
LangIDs = Array("418")
Case "russian"
LangIDs = Array("419")
Case "slovenscina"
LangIDs = Array("424")
Case "svenska"
LangIDs = Array("41d", "81d") ' Standard, Finland
Case "turkce"
LangIDs = Array("41f")
Case "wallisertitsch"
LangIDs = Array("807") ' Swiss German
End Select
End Function

View file

@ -80,12 +80,12 @@ sub init_tts {
$SIG{KILL} = sub { kill TERM => $pid; print("boo"); panic_cleanup(); };
$ret{"pid"} = $pid;
}
case "sapi5" {
case "sapi" {
my $toolsdir = dirname($0);
my $path = `cygpath $toolsdir -a -w`;
chomp($path);
$path = $path . '\\';
my $cmd = $path . "sapi5_voice_new.vbs $language $tts_engine_opts";
my $cmd = $path . "sapi_voice.vbs /language:$language $tts_engine_opts";
$cmd =~ s/\\/\\\\/g;
print("> cscript //nologo $cmd\n") if $verbose;
my $pid = open2(*CMD_OUT, *CMD_IN, "cscript //nologo $cmd");
@ -105,7 +105,7 @@ sub shutdown_tts {
# Send SIGTERM to festival server
kill TERM => $$tts_object{"pid"};
}
case "sapi5" {
case "sapi" {
print({$$tts_object{"stdin"}} "QUIT\r\n");
close($$tts_object{"stdin"});
}
@ -181,8 +181,8 @@ sub voicestring {
print ESPEAK $string . "\n";
close(ESPEAK);
}
case "sapi5" {
print({$$tts_object{"stdin"}} sprintf("SPEAK\t%s\t%s\r\n", $output, $string));
case "sapi" {
print({$$tts_object{"stdin"}} "SPEAK\t$output\t$string\r\n");
}
case "swift" {
$cmd = "swift $tts_engine_opts -o $output \"$string\"";
@ -197,9 +197,9 @@ sub wavtrim {
our $verbose;
my ($file, $threshold, $tts_object) = @_;
printf("Trim \"%s\"\n", $file) if $verbose;
if ($$tts_object{"name"} eq "sapi5") {
if ($$tts_object{"name"} eq "sapi") {
my $cmd = $$tts_object{"toolspath"}."wavtrim $file $threshold";
print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd));
print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
}
else {
my $cmd = dirname($0) . "/wavtrim $file $threshold";
@ -225,8 +225,8 @@ sub encodewav {
$cmd = "speexenc $encoder_opts \"$input\" \"$output\"";
}
}
if ($$tts_object{"name"} eq "sapi5") {
print({$$tts_object{"stdin"}} sprintf("EXEC\t%s\r\n", $cmd));
if ($$tts_object{"name"} eq "sapi") {
print({$$tts_object{"stdin"}} "EXEC\t$cmd\r\n");
}
else {
print("> $cmd\n") if $verbose;
@ -237,7 +237,7 @@ sub encodewav {
# synchronize the clip generation / processing if it's running in another process
sub synchronize {
my ($tts_object) = @_;
if ($$tts_object{"name"} eq "sapi5") {
if ($$tts_object{"name"} eq "sapi") {
print({$$tts_object{"stdin"}} "SYNC\t42\r\n");
my $wait = readline($$tts_object{"stdout"});
#ignore what's actually returned