FS#11913: Separate TTS correction expressions into separate file.
voice.pl will now read the TTS correction expressions from a file tools/voice-corrections.txt which includes regular expressions for adjusting the string. This makes it easier to adjust the corrections and allows integrating them into tools like Rockbox Utility. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@29500 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
1f77d091a5
commit
7ad78222c4
2 changed files with 129 additions and 99 deletions
92
tools/voice-corrections.txt
Normal file
92
tools/voice-corrections.txt
Normal file
|
@ -0,0 +1,92 @@
|
|||
__________ __ ___.
|
||||
Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||
Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||
Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||
Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||
\/ \/ \/ \/ \/
|
||||
$Id$
|
||||
|
||||
|
||||
Voice string corrections for voice.pl to support TTS engines.
|
||||
The list items are separated by the separator that is defined by the first
|
||||
character on the line. If the first character is a whitespace the line will
|
||||
get treated as comment.
|
||||
|
||||
Format:
|
||||
/language/engine/vendor/string/replacement/
|
||||
|
||||
Where / is the separator, and all strings are Perl regexes.
|
||||
Empty lines and lines starting with a whitespace are ignored, for all other
|
||||
lines the first character will become the separator.
|
||||
|
||||
General for all engines and languages
|
||||
|
||||
/.*/.*/.*/USB/U S B/g
|
||||
/.*/.*/.*/ID3/I D 3/g
|
||||
English
|
||||
/english/(sapi|festival)/.*/plugin(s?)/plug-in$1/ig
|
||||
/english/festival/.*//\ba\b/ay/ig
|
||||
/english/festival/.*//$/./
|
||||
|
||||
German (deutsch)
|
||||
|
||||
/deutsch/.*/.*/alkaline/alkalein/ig
|
||||
/deutsch/.*/.*/byte(s?)/beit$1/ig
|
||||
/deutsch/.*/.*/clip(s?)/klipp$1/ig
|
||||
/deutsch/.*/.*/\bcover/kawwer/ig
|
||||
/deutsch/.*/.*/cuesheet/kjuschiet/ig
|
||||
/deutsch/.*/.*/dither/didder/ig
|
||||
/deutsch/.*/.*/equalizer/iquileiser/ig
|
||||
/deutsch/.*/.*/\bflash\b/fläsh/ig
|
||||
/deutsch/.*/.*/\bfirmware(s?)\b/firmwer$1/ig
|
||||
/deutsch/.*/.*/\bI D 3 tag\b/I D 3 täg/ig
|
||||
/deutsch/.*/.*/\bloudness\b/laudness/ig
|
||||
/deutsch/.*/.*/\bunicode\b/unikod/ig
|
||||
/deutsch/sapi/AT&T Labs/alphabet/alfabet/ig;
|
||||
/deutsch/sapi/AT&T Labs/ampere/amper/ig;
|
||||
/deutsch/sapi/AT&T Labs/\bdezibel\b/de-zibell/ig;
|
||||
/deutsch/sapi/AT&T Labs/diddering/didde-ring/ig;
|
||||
/deutsch/sapi/AT&T Labs/energie\b/ener-gie/ig;
|
||||
/deutsch/sapi/AT&T Labs/\Blauf\b/-lauf/ig;
|
||||
/deutsch/sapi/AT&T Labs/\bnumerisch\b/numehrisch/ig;
|
||||
|
||||
Swedish (svenska)
|
||||
for all swedish engines (e.g. for english words)
|
||||
|
||||
/svenska/.*/.*/kilobyte/kilobajt/ig
|
||||
/svenska/.*/.*/megabyte/megabajt/ig
|
||||
/svenska/.*/.*/gigabyte/gigabajt/ig
|
||||
/svenska/.*/.*/\bloudness\b/laudness/ig
|
||||
/svenska/espeak/.*/ampere/ampär/ig
|
||||
/svenska/espeak/.*/bokmärken/bok-märken/ig
|
||||
/svenska/espeak/.*/generella/schenerella/ig
|
||||
/svenska/espeak/.*/dithering/diddering/ig
|
||||
/svenska/espeak/.*/\bunicode\b/jynikod/ig
|
||||
/svenska/espeak/.*/uttoning/utoning/ig
|
||||
/svenska/espeak/.*/procent/pro-cent/ig
|
||||
/svenska/espeak/.*/spellistor/spelistor/ig
|
||||
/svenska/espeak/.*/cuesheet/qjyschiit/ig
|
||||
|
||||
Italian (italiano)
|
||||
for all italian engines (e.g. for english words)
|
||||
|
||||
/italiano/.*/.*/Replaygain/Ripleyghein/ig
|
||||
/italiano/.*/.*/Crossfade/Crossfeid/ig
|
||||
/italiano/.*/.*/beep/Bip/ig
|
||||
/italiano/.*/.*/cuesheet/chiushit/ig
|
||||
/italiano/.*/.*/fade/feid/ig
|
||||
/italiano/.*/.*/Crossfeed/crossfid/ig
|
||||
/italiano/.*/.*/Cache/chash/ig
|
||||
/italiano/.*/.*/\bfirmware(s?)\b/firmuer$1/ig
|
||||
/italiano/.*/.*/\bFile(s?)\b/fail$1/ig
|
||||
/italiano/.*/.*/\bloudness\b/laudness/ig
|
||||
/italiano/.*/.*/\bunicode\b/unikod/ig
|
||||
/italiano/.*/.*/Playlist/pleylist/ig
|
||||
/italiano/.*/.*/WavPack/wave pak/ig
|
||||
/italiano/.*/.*/BITRATE/bit reit/ig
|
||||
/italiano/.*/.*/Codepage/cod page/ig
|
||||
/italiano/.*/.*/PCM Wave/pcm Ue'iv/ig
|
||||
/italiano/sapi/Loquendo/Inizializza/inizializa/ig
|
||||
/italiano/sapi/ScanSoft, Inc/V/v/ig
|
||||
/italiano/sapi/ScanSoft, Inc/X/x/ig
|
||||
/italiano/sapi/ScanSoft, Inc/stop/stohp/ig
|
136
tools/voice.pl
136
tools/voice.pl
|
@ -128,106 +128,12 @@ sub correct_string {
|
|||
our $verbose;
|
||||
my ($string, $language, $tts_object) = @_;
|
||||
my $orig = $string;
|
||||
switch($language) {
|
||||
# General for all engines and languages
|
||||
$string =~ s/USB/U S B/g;
|
||||
$string =~ s/ID3/I D 3/g;
|
||||
my $corrections = $tts_object->{"corrections"};
|
||||
|
||||
case "english" {
|
||||
switch($$tts_object{"name"}) {
|
||||
case ["sapi","festival"] {
|
||||
$string =~ s/plugin(s?)/plug-in$1/ig; next
|
||||
}
|
||||
case "festival" {
|
||||
$string =~ s/\ba\b/ay/ig;
|
||||
$string =~ s/$/./;
|
||||
}
|
||||
}
|
||||
}
|
||||
case "deutsch" {
|
||||
# for all german engines (e.g. for english words)
|
||||
$string =~ s/alkaline/alkalein/ig;
|
||||
$string =~ s/byte(s?)/beit$1/ig;
|
||||
$string =~ s/clip(s?)/klipp$1/ig;
|
||||
$string =~ s/\bcover/kawwer/ig;
|
||||
$string =~ s/cuesheet/kjuschiet/ig;
|
||||
$string =~ s/dither/didder/ig;
|
||||
$string =~ s/equalizer/iquileiser/ig;
|
||||
$string =~ s/\bflash\b/fläsh/ig;
|
||||
$string =~ s/\bfirmware(s?)\b/firmwer$1/ig;
|
||||
$string =~ s/\bI D 3 tag\b/I D 3 täg/ig; # can't just use "tag" here
|
||||
$string =~ s/\bloudness\b/laudness/ig;
|
||||
$string =~ s/\bunicode\b/unikod/ig;
|
||||
switch($$tts_object{"name"}) {
|
||||
case "sapi" { # just for SAPI
|
||||
switch($$tts_object{"vendor"}) {
|
||||
case "AT&T Labs" {
|
||||
$string =~ s/alphabet/alfabet/ig;
|
||||
$string =~ s/ampere/amper/ig;
|
||||
$string =~ s/\bdezibel\b/de-zibell/ig;
|
||||
$string =~ s/diddering/didde-ring/ig;
|
||||
$string =~ s/energie\b/ener-gie/ig;
|
||||
$string =~ s/\Blauf\b/-lauf/ig;
|
||||
$string =~ s/\bnumerisch\b/numehrisch/ig;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
case "svenska" {
|
||||
# for all swedish engines (e.g. for english words)
|
||||
$string =~ s/kilobyte/kilobajt/ig;
|
||||
$string =~ s/megabyte/megabajt/ig;
|
||||
$string =~ s/gigabyte/gigabajt/ig;
|
||||
$string =~ s/\bloudness\b/laudness/ig;
|
||||
|
||||
switch($$tts_object{"name"}) {
|
||||
case "espeak" { # just for eSpeak
|
||||
$string =~ s/ampere/ampär/ig;
|
||||
$string =~ s/bokmärken/bok-märken/ig;
|
||||
$string =~ s/generella/schenerella/ig;
|
||||
$string =~ s/dithering/diddering/ig;
|
||||
$string =~ s/\bunicode\b/jynikod/ig;
|
||||
$string =~ s/uttoning/utoning/ig;
|
||||
$string =~ s/procent/pro-cent/ig;
|
||||
$string =~ s/spellistor/spelistor/ig;
|
||||
$string =~ s/cuesheet/qjyschiit/ig;
|
||||
}
|
||||
}
|
||||
}
|
||||
case "italiano" {
|
||||
# for all italian engines (e.g. for english words)
|
||||
$string =~ s/Replaygain/Ripleyghein/ig;
|
||||
$string =~ s/Crossfade/Crossfeid/ig;
|
||||
$string =~ s/beep/Bip/ig;
|
||||
$string =~ s/cuesheet/chiushit/ig;
|
||||
$string =~ s/fade/feid/ig;
|
||||
$string =~ s/Crossfeed/crossfid/ig;
|
||||
$string =~ s/Cache/chash/ig;
|
||||
$string =~ s/\bfirmware(s?)\b/firmuer$1/ig;
|
||||
$string =~ s/\bFile(s?)\b/fail$1/ig;
|
||||
$string =~ s/\bloudness\b/laudness/ig;
|
||||
$string =~ s/\bunicode\b/unikod/ig;
|
||||
$string =~ s/Playlist/pleylist/ig;
|
||||
$string =~ s/WavPack/wave pak/ig;
|
||||
$string =~ s/BITRATE/bit reit/ig;
|
||||
$string =~ s/Codepage/cod page/ig;
|
||||
$string =~ s/PCM Wave/pcm Ue'iv/ig;
|
||||
switch($$tts_object{"name"}) {
|
||||
case "sapi" { # just for SAPI
|
||||
switch($$tts_object{"vendor"}) {
|
||||
case "Loquendo" {
|
||||
$string =~ s/Inizializza/inizializa/ig;
|
||||
}
|
||||
case "ScanSoft, Inc" {
|
||||
$string =~ s/V/v/ig;
|
||||
$string =~ s/X/x/ig;
|
||||
$string =~ s/stop/stohp/ig;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
foreach (@$corrections) {
|
||||
my $r = "s" . $_->{separator} . $_->{search} . $_->{separator}
|
||||
. $_->{replace} . $_->{separator} . $_->{modifier};
|
||||
eval ('$string =~' . "$r;");
|
||||
}
|
||||
if ($orig ne $string) {
|
||||
printf("%s -> %s\n", $orig, $string) if $verbose;
|
||||
|
@ -331,6 +237,7 @@ sub generateclips {
|
|||
my ($language, $target, $encoder, $encoder_opts, $tts_engine, $tts_engine_opts) = @_;
|
||||
my $english = dirname($0) . '/../apps/lang/english.lang';
|
||||
my $langfile = dirname($0) . '/../apps/lang/' . $language . '.lang';
|
||||
my $correctionsfile = dirname($0) . '/voice-corrections.txt';
|
||||
my $id = '';
|
||||
my $voice = '';
|
||||
my $cmd = "genlang -o -t=$target -e=$english $langfile 2>/dev/null";
|
||||
|
@ -340,6 +247,37 @@ sub generateclips {
|
|||
local $| = 1; # make progress indicator work reliably
|
||||
|
||||
my $tts_object = init_tts($tts_engine, $tts_engine_opts, $language);
|
||||
# add string corrections to tts_object.
|
||||
my @corrects = ();
|
||||
open(VOICEREGEXP, "<$correctionsfile") or die "Can't open corrections file!\n";
|
||||
while(<VOICEREGEXP>) {
|
||||
# get first character of line
|
||||
my $line = $_;
|
||||
my $separator = substr($_, 0, 1);
|
||||
if($separator =~ m/\s+/) {
|
||||
next;
|
||||
}
|
||||
chomp($line);
|
||||
$line =~ s/^.//g; # remove separator at beginning
|
||||
my ($lang, $engine, $vendor, $search, $replace, $modifier) = split(/$separator/, $line);
|
||||
|
||||
# does language match?
|
||||
if($language !~ m/$lang/) {
|
||||
next;
|
||||
}
|
||||
if($$tts_object{"name"} !~ m/$engine/) {
|
||||
next;
|
||||
}
|
||||
my $v = $$tts_object{"vendor"} || ""; # vendor might be empty in $tts_object
|
||||
if($v !~ m/$vendor/) {
|
||||
next;
|
||||
}
|
||||
push @corrects, {separator => $separator, search => $search, replace => $replace, modifier => $modifier};
|
||||
|
||||
}
|
||||
close(VOICEREGEXP);
|
||||
$tts_object->{corrections} = [@corrects];
|
||||
|
||||
print("Generating voice clips");
|
||||
print("\n") if $verbose;
|
||||
for (`$cmd`) {
|
||||
|
|
Loading…
Reference in a new issue