#!/usr/bin/perl -s # __________ __ ___. # Open \______ \ ____ ____ | | _\_ |__ _______ ___ # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ # \/ \/ \/ \/ \/ # $Id$ # # Copyright (C) 2006 - 2008 by Daniel Stenberg # # See apps/language.c (TODO: Use common include for both) # Cookie and binary version for the binary lang file my $LANGUAGE_COOKIE = 0x1a; my $VOICE_COOKIE = 0x9a; my $LANGUAGE_VERSION = 0x06; my $LANGUAGE_FLAG_RTL = 0x01; my $HEADER_SIZE = 4; my $SUBHEADER_SIZE = 6; # A note for future users and readers: The original v1 language system allowed # the build to create and use a different language than english built-in. We # removed that feature from our build-system, but the build scripts still had # the ability. But, starting now, this ability is no longer provided since I # figured it was boring and unnecessary to write support for now since we # don't use it anymore. if(!$ARGV[0]) { print < -p= Make the tool create a [prefix].c and [prefix].h file. -b= Make the tool create a binary language (.lng) file named [outfile]. The use of this option requires that you also use -e, -t and -i. -c= Create binary voicestring file named [outfile]. Works like -b and can be used the same time. -u Update language file. Given the translated file and the most recent english file, you\'ll get an updated version sent to stdout. Suitable action to do when you intend to update a translation. -e= Point out the english (original source) file, to use that as master language template. Used in combination with -b, -u or -s. -s Sort the Update language file in the same order as the strings in the English file. -t= Specify which target you want the translations/phrases for. Required when -b or -p is used. The target can in fact be specified as numerous different strings, separated with colons. This will make genlang to use all the specified strings when searching for a matching phrase. -i= The target id number, needed for -b. -o Voice mode output. Outputs all id: and voice: lines for the given target! -v Enables verbose (debug) output. MOO ; exit; } # How update works: # # 1) scan the english file, keep the whole for each phrase. # 2) read the translated file, for each end of phrase, compare: # A) all source strings, if there's any change there should be a comment about # it output # B) the desc fields # # 3) output the phrase with the comments from above # 4) check which phrases that the translated version didn't have, and spit out # the english version of those # my $prefix = $p; my $binary = $b; my $update = $u; my $sortfile = $s; my $binvoice = $c; my $english = $e; my $voiceout = $o; my $check = ($binary?.5:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0) + ($sortfile?1:0) + ($binvoice?.5:0); if($check > 1) { print STDERR "Please use only one of -p, -u, -o, -b, -c and -s\n"; exit; } if(!$check) { print STDERR "Please use at least one of -p, -u, -o, -c, -e and -s\n"; exit; } if(($binary || $update || $voiceout || $sortfile) && !$english) { print STDERR "Please use -e too when you use -b, -o, -u or -s\n"; exit; } my $target_id = $i; if($binary && !$target_id) { print STDERR "Please specify a target id number (with -i)!\n"; exit; } my $target = $t; if(!$target && !$update && !$sortfile) { print STDERR "Please specify a target (with -t)!\n"; exit; } # Build up a regex which can be applied to target wildcard lists. We only need # to support prefix matches, so a target parameter of foo:bar can be expanded # to the regex "\*|f\*|fo\*|foo|b\*|ba\*|bar" and applied to the wildcard list # (plus end-of-string or commas on either side). The regex engine should # discard any duplicates generated for us in the process of constructing the # state machine, so we don't bother to check. my $target_regex = "(?:^|,) *(?:\\*"; foreach my $target_part (split ':', $target) { for (my $c=1; $c<=length $target_part; $c++) { my $partial = substr $target_part, 0, $c; $target_regex .= "|$partial\\*"; } $target_regex .= "|$target_part"; } $target_regex .= ") *(?:,|\$)"; $target_regex = qr/$target_regex/; my $binpath = ""; if ($binary =~ m|(.*)/[^/]+|) { $binpath = $1; } my $verbose=$v; my %id; # string to num hash my @idnum; # num to string array my %allphrases; # For sorting - an array of the elements my %source; # id string to source phrase hash my %dest; # id string to dest phrase hash my %voice; # id string to voice phrase hash my %users = ('core' => 0); my $input = $ARGV[0]; my @m; my $m="blank"; sub trim { my ($string) = @_; $string =~ s/^\s+//; $string =~ s/\s+$//; return $string; } sub blank { # nothing to do } my %head; sub header { my ($full, $n, $v)=@_; $head{$n}=$v; } my %phrase; sub phrase { my ($full, $n, $v)=@_; $phrase{$n}=$v; } my %options; sub options { my ($full, $n, $v)=@_; $options{$n}=$v; } sub parsetarget { my ($debug, $strref, $full, $n, $v)=@_; my $string; if ($n =~ $target_regex) { $string = $v; $$strref = $string; return $string; } } my $src; sub source { parsetarget("src", \$src, @_); } my $dest; sub dest { parsetarget("dest", \$dest, @_); } my $voice; sub voice { parsetarget("voice", \$voice, @_); } sub file_is_newer { my ($file1, $file2) = @_; my @s1 = stat $file1; my @s2 = stat $file2; return 1 if ($s1[9] > $s2[9]); return 0; } my %idmap; my %english; if($english) { readenglish(); } sub readenglish { # For the cases where the english file needs to be scanned/read, we do # it before we read the translated file. For -b it isn't necessary, but for # -u it is convenient. my @idnum = ((0)); # start with a true number my @vidnum = ((0x8000)); # first voice id if ($binary and file_is_newer("$binpath/english.list", $english)) { open(ENG, "<$binpath/english.list") || die "Error: can't open $binpath/english.list"; while () { my ($user, $id, $value) = split ':', $_; $idmap[$user]{$id} = $value; $english{$id} = 1; } close ENG; return; } open(ENG, "<$english") || die "Error: can't open $english"; my @phrase; my $id; my $maybeid; my $user; my $withindest; my $numphrases = 0; my $numusers = 1; # core is already in the users map while() { # get rid of DOS newlines $_ =~ tr/\r//d; if($_ =~ /^ *\/) { # this is the start of a phrase } elsif($_ =~ /\<\/phrase\>/) { # if id is something, when we count and store this phrase if($id) { # voice-only entries get a difference range if($id =~ /^VOICE_/) { # Assign an ID number to this entry $idmap[$user]{$id}=$vidnum[$user]; $vidnum[$user]++; } else { # Assign an ID number to this entry $idmap[$user]{$id}=$idnum[$user]; $idnum[$user]++; # print STDERR "DEST: bumped idnum to $idnum[$user]\n"; } # this is the end of a phrase, add it to the english hash $english{$id}=join("", @phrase); } undef @phrase; $id=""; } elsif($_ ne "\n") { # gather everything related to this phrase push @phrase, $_; if($_ =~ /^ *\/i) { $withindest=1; $deststr=""; } elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) { $withindest=0; if($update || ($deststr && ($deststr !~ /^none\z/i))) { # we unconditionally always use all IDs when the "update" # feature is used $id = $maybeid; # print "DEST: use this id $id\n"; } else { # print "skip $maybeid for $name\n"; } } elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) { my ($name, $val)=($1, $2); $dest=""; # in case it is left untouched for when the # model name isn't "our" dest($_, $name, $val); if($dest) { # Store the current dest string. If this target matches # multiple strings, it will get updated several times. $deststr = $dest; } } } if($_ =~ /^ *id: ([^ \t\n]+)/i) { $maybeid=$1; $sortorder{$maybeid}=$numphrases++; } if($_ =~ /^ *user: ([^ \t\n]+)/i) { $user = $users{$1}; if(!(defined $user)) { $user = ++$numusers; $users{$1} = $user; } } } close(ENG); } # a function that compares the english phrase with the translated one. # compare source strings and desc # Then output the updated version! sub compare { my ($idstr, $engref, $locref)=@_; my ($edesc, $ldesc); my ($esource, $lsource); my $mode=0; for my $l (@$engref) { if($l =~ /^ *#/) { # comment next; } if($l =~ /^ *desc: (.*)/) { $edesc=$1; } elsif($l =~ / *\/i) { $mode=1; } elsif($mode) { if($l =~ / *\<\/source\>/i) { last; } $esource .= "$l\n"; } } my @show; my @source; $mode = 0; for my $l (@$locref) { if($l =~ /^ *desc: (.*)/) { $ldesc=$1; if(trim($edesc) ne trim($ldesc)) { $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n"; } push @show, $l; } elsif($l =~ / *\/i) { $mode=1; push @show, $l; } elsif($mode) { if($l =~ / *\<\/source\>/i) { $mode = 0; print @show; if(trim($esource) ne trim($lsource)) { print "### The section differs from the english!\n", "### the previously used one is commented below:\n"; for(split("\n", $lsource)) { print "### $_\n"; } print $esource; } else { print $lsource; } undef @show; # start over push @show, $l; } else { $lsource .= "$l"; } } else { push @show, $l; } } print @show; } my @idcount; # counter for lang ID numbers my @voiceid; # counter for voice-only ID numbers for (keys %users) { push @idcount, 0; push @voiceid, 0x8001; } # # Now start the scanning of the selected language string # open(LANG, "<$input") || die "Error: couldn't read language file named $input\n"; my @phrase; my $header = 1; my $langoptions = 0; while() { $line++; # get rid of DOS newlines $_ =~ tr/\r//d; if($_ =~ /^( *\#|[ \t\n\r]*\z)/) { # comment or empty line - output it if it's part of the header if ($header and ($update || $sortfile)) { print($_); } next; } $header = 0; my $ll = $_; # print "M: $m\n"; push @phrase, $ll; # this is an XML-lookalike tag if (/^(<|[^\"<]+<)([^>]*)>/) { my $part = $2; # print "P: $part\n"; if($part =~ /^\//) { # this was a closing tag if($part eq "/phrase") { # closing the phrase my $idstr = $phrase{'id'}; my $idnum; if(($binary || $binvoice || $voiceout) && !$english{$idstr}) { # $idstr doesn't exist for english, skip it\n"; # FIXME/TODO: Any reason this filter shouldn't always be enabled? } elsif($dest =~ /^none\z/i) { # "none" as dest (without quotes) means that this entire # phrase is to be ignored } elsif($sortfile) { $allphrases{$idstr}=join('',@phrase); } elsif(!$update) { # we don't do the fully detailed analysis when we "update" # since we don't do it for a particular target etc # allow the keyword 'deprecated' to be used on dest and # voice strings to mark that as deprecated. It will then # be replaced with "". $dest =~ s/^deprecate(|d)\z/\"\"/i; $voice =~ s/^deprecate(|d)\z/\"\"/i; # basic syntax error alerts, if there are no quotes we # will assume an empty string was intended if($dest !~ /^\"/) { print STDERR "$input:$line:1: warning: dest before line lacks quotes ($dest)!\n"; $dest='""'; } if($src !~ /^\"/) { print STDERR "$input:$line:1: warning: source before line lacks quotes ($src)!\n"; $src='""'; } if($voice !~ /^\"/ and $voice !~ /^none\z/i) { print STDERR "$input:$line:1: warning: voice before line lacks quotes ($voice)!\n"; $voice='""'; } if($dest eq '""' && $phrase{'desc'} !~ /deprecated/i && $idstr !~ /^VOICE/) { print STDERR "$input:$line:1: warning: empty dest before line in non-deprecated phrase!\n"; } my $userstr = trim($phrase{'user'}); my $user = $users{$userstr}; if ($userstr eq "") { print STDERR "$input:$line:1: warning: missing user!\n"; $user = $users{"core"}; } elsif(!(defined $user)) { if($english) { print STDERR "$input:$line:1: warning: user was not found in $english!\n"; $user = keys %users; # set to an invalid user so it won't be added } else { # we found a new user, add it to the usermap $user = ++$numusers; $users{$userstr} = $user; } } # Use the ID name to figure out which id number range we # should use for this phrase. Voice-only strings are # separated. if($idstr =~ /^VOICE/) { $idnum = $voiceid[$user]++; } else { $idnum = $idcount[$user]++; } $id{$idstr} = $idnum; $idnum[$user][$idnum]=$idstr; $source{$idstr}=$src; $dest{$idstr}=$dest; $voice{$idstr}=$voice; if($verbose) { print "id: $phrase{id} ($idnum)\n"; print "source: $src\n"; print "dest: $dest\n"; print "voice: $voice\n"; print "user: $user\n"; } undef $src; undef $dest; undef $voice; undef $user; undef %phrase; } if($update) { my $e = $english{$idstr}; if($e) { # compare original english with this! my @eng = split("\n", $english{$idstr}); compare($idstr, \@eng, \@phrase); $english{$idstr}=""; # clear it } else { print "### $idstr: The phrase is not used. Skipped\n"; } } undef @phrase; } # end of elsif($part eq "/options") { # closing the options if ($options{'rtl'}) { $langoptions |= $LANGUAGE_FLAG_RTL; } } # end of # starts with a slash, this _ends_ this section $m = pop @m; # get back old value, the previous level's tag next; } # end of tag close # This is an opening (sub) tag push @m, $m; # store old value $m = $part; next; } if(/^ *([^:]+): *(.*)/) { my ($name, $val)=($1, $2); &$m($_, $name, $val); } } close(LANG); if($update) { my $any=0; for(keys %english) { if($english{$_}) { print "###\n", "### This phrase below was not present in the translated file\n", "\n"; print $english{$_}; print "\n"; } } } if ($sortfile) { for(sort { $sortorder{$a} <=> $sortorder{$b} } keys %allphrases) { print $allphrases{$_}; } } if($prefix) { # We create a .c and .h file open(HFILE_CORE, ">$prefix/lang.h") || die "Error: couldn't create file $prefix/lang.h\n"; open(CFILE_CORE, ">$prefix/lang_core.c") || die "Error: couldn't create file $prefix/lang_core.c\n"; # get header file name $headername = "$prefix/lang.h"; $headername =~ s/(.*\/)*//; print HFILE_CORE <${prefix}_enum.h") || die "couldn't create file ${prefix}_enum.h\n"; print HFILE_CORE <$binary") or die "Error: Can't create $binary"; binmode OUTF; printf OUTF ("%c%c%c%c", $LANGUAGE_COOKIE, $LANGUAGE_VERSION, $target_id, $langoptions); # magic lang file header } if($binvoice) { open(OUTV, ">$binvoice") or die "Error: Can't create $binary"; binmode OUTV; printf OUTV ("%c%c%c%c", $VOICE_COOKIE, $LANGUAGE_VERSION, $target_id, $langoptions); # magic lang file header } # output the number of strings for each user my $foffset = $HEADER_SIZE + $SUBHEADER_SIZE * keys(%users); for (keys %users) { my $size; for $n (0 .. $idcount[$_]-1) { $size += length(trim($dest{$idnum[$_][$n]})) + 1; } if($binary) { printf OUTF ("%c%c%c%c%c%c", ($idcount[$_] >> 8), ($idcount[$_] & 0xff), ($size >> 8), ($size & 0xff), ($foffset >> 8), ($foffset & 0xff)); } if($binvoice) { printf OUTV ("%c%c%c%c%c%c", ($idcount[$_] >> 8), ($idcount[$_] & 0xff), ($size >> 8), ($size & 0xff), ($foffset >> 8), ($foffset & 0xff)); } $foffset += $size; } for (keys %users) { # loop over the target phrases # This loops over the strings in the translated language file order my @ids = ((0 .. ($idcount[$_]-1))); push @ids, (0x8000 .. ($voiceid[$_]-1)); for $n (@ids) { my $name=$idnum[$_][$n]; # get the ID my $dest = $dest{$name}; # get the destination phrase my $voice = $voice{$name}; # get the destination voice string if($dest && $n < 0x8000 && $binary) { $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes # Now, make sure we get the number from the english sort order: $idnum = $idmap[$_]{$name}; printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest); } if($voice && $binvoice) { $voice =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes # Now, make sure we get the number from the english sort order: $idnum = $idmap[$_]{$name}; printf OUTV ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $voice); } } } if($binary) { close(OUTF); } if($binvoice) { close(OUTV); } } elsif($voiceout) { # voice output requested, display id: and voice: strings in a v1-like # fashion my @engl; # This loops over the strings in the translated language file order my @ids = ((0 .. ($idcount[$users{"core"}]-1))); push @ids, (0x8000 .. ($voiceid[$users{"core"}]-1)); #for my $id (@ids) { # print "$id\n"; #} for $i (@ids) { my $name=$idnum[$users{"core"}][$i]; # get the ID my $dest = $voice{$name}; # get the destination voice string if($dest) { $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes # Now, make sure we get the number from the english sort order: $idnum = $idmap[$users{"core"}]{$name}; if(length($idnum)) { $engl[$idnum] = $i; #print "Input index $i output index $idnum\n"; } else { # not used, mark it so $engl[$i] = -1 } } } for my $i (@ids) { my $o = $engl[$i]; if(($o < 0) || !length($o)) { if($i < 0x8000) { print "#$i\nid: NOT_USED_$i\nvoice: \"\"\n"; } else { print "#$i\nid: VOICE_NOT_USED_$i\nvoice: \"\"\n"; } next; } my $name=$idnum[$users{"core"}][$o]; # get the ID my $dest = $voice{$name}; # get the destination voice string print "#$i ($o)\nid: $name\nvoice: $dest\n"; } } if($verbose) { my $num_str = 0; for (keys %users) { $num_str += $idcount[$_]; } printf("%d ID strings scanned\n", $num_str); print "* head *\n"; for(keys %head) { printf "$_: %s\n", $head{$_}; } } if ($binary and !file_is_newer("$binpath/english.list", $english)) { open(ENGLIST, ">$binpath/english.list") || die "Failed creating $binpath/english.list"; for my $user (keys %users) { for my $id (keys %{$idmap[$user]}) { print ENGLIST "$user:$id:$idmap[$user]{$id}\n"; } } close ENGLIST; }