#!/usr/bin/perl -s # __________ __ ___. # Open \______ \ ____ ____ | | _\_ |__ _______ ___ # Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / # Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < # Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ # \/ \/ \/ \/ \/ # $Id$ # # Copyright (C) 2006 - 2007 by Daniel Stenberg # # binary version for the binary lang file my $langversion = 4; # 3 was the latest one used in the v1 format # A note for future users and readers: The original v1 language system allowed # the build to create and use a different language than english built-in. We # removed that feature from our build-system, but the build scripts still had # the ability. But, starting now, this ability is no longer provided since I # figured it was boring and unnecessary to write support for now since we # don't use it anymore. if(!$ARGV[0]) { print < -p= Make the tool create a [prefix].c and [prefix].h file. -b= Make the tool create a binary language (.lng) file named [outfile]. The use of this option requires that you also use -e, -t and -i. -u Update language file. Given the translated file and the most recent english file, you\'ll get an updated version sent to stdout. Suitable action to do when you intend to update a translation. -e= Point out the english (original source) file, to use that as master language template. Used in combination with -b or -u. -t= Specify which target you want the translations/phrases for. Required when -b or -p is used. The target can in fact be specified as numerous different strings, separated with colons. This will make genlang to use all the specified strings when searching for a matching phrase. -i= The target id number, needed for -b. -o Voice mode output. Outputs all id: and voice: lines for the given target! -v Enables verbose (debug) output. MOO ; exit; } # How update works: # # 1) scan the english file, keep the whole for each phrase. # 2) read the translated file, for each end of phrase, compare: # A) all source strings, if there's any change there should be a comment about # it output # B) the desc fields # # 3) output the phrase with the comments from above # 4) check which phrases that the translated version didn't have, and spit out # the english version of those # my $prefix = $p; my $binary = $b; my $update = $u; my $english = $e; my $voiceout = $o; my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0); if($check > 1) { print "Please use only one of -p, -u, -o and -b\n"; exit; } if(!$check) { print "Please use at least one of -p, -u, -o and -b\n"; exit; } if(($binary || $update || $voiceout) && !$english) { print "Please use -e too when you use -b, -o or -u\n"; exit; } my $target_id = $i; if($binary && !$target_id) { print "Please specify a target id number (with -i)!\n"; exit; } my $target = $t; if(!$target && !$update) { print "Please specify a target (with -t)!\n"; exit; } my $verbose=$v; my %id; # string to num hash my @idnum; # num to string array my %source; # id string to source phrase hash my %dest; # id string to dest phrase hash my %voice; # id string to voice phrase hash my $input = $ARGV[0]; my @m; my $m="blank"; sub match { my ($string, $pattern)=@_; $pattern =~ s/\*/.?*/g; $pattern =~ s/\?/./g; return ($string =~ /^$pattern\z/); } sub blank { # nothing to do } my %head; sub header { my ($full, $n, $v)=@_; $head{$n}=$v; } my %phrase; sub phrase { my ($full, $n, $v)=@_; $phrase{$n}=$v; } sub parsetarget { my ($debug, $strref, $full, $n, $v)=@_; my $string; my @all= split(" *, *", $n); my $test; for $test (@all) { # print "TEST ($debug) $target for $test\n"; for my $part (split(":", $target)) { if(match($part, $test)) { $string = $v; # print "MATCH: $test => $v\n"; $$strref = $string; return $string; } } } } my $src; sub source { parsetarget("src", \$src, @_); } my $dest; sub dest { parsetarget("dest", \$dest, @_); } my $voice; sub voice { parsetarget("voice", \$voice, @_); } my %idmap; my %english; if($english) { # For the cases where the english file needs to be scanned/read, we do # it before we read the translated file. For -b it isn't necessary, but for # -u it is convenient. my $idnum=0; # start with a true number my $vidnum=0x8000; # first voice id open(ENG, "<$english") || die "can't open $english"; my @phrase; my $id; my $maybeid; my $withindest; while() { # get rid of DOS newlines $_ =~ s/\r//g; if($_ =~ /^ *\/) { # this is the start of a phrase } elsif($_ =~ /^ *\<\/phrase\>/) { # if id is something, when we count and store this phrase if($id) { # voice-only entries get a difference range if($id =~ /^VOICE_/) { # Assign an ID number to this entry $idmap{$id}=$vidnum; $vidnum++; } else { # Assign an ID number to this entry $idmap{$id}=$idnum; $idnum++; # print STDERR "DEST: bumped idnum to $idnum\n"; } # this is the end of a phrase, add it to the english hash $english{$id}=join("", @phrase); } undef @phrase; $id=""; } elsif($_ ne "\n") { # gather everything related to this phrase push @phrase, $_; if($_ =~ /^ *\/i) { $withindest=1; } elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) { $withindest=0; } elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) { my ($name, $val)=($1, $2); $dest=""; # in case it is left untouched for when the # model name isn't "our" dest($_, $name, $val); # print STDERR "DEST: \"$dest\" for $name / $id\n"; if($update || ($dest && ($dest !~ /^none\z/i))) { # we unconditionally always use all IDs when the "update" # feature is used $id = $maybeid; # print STDERR "DEST: use this id $id\n"; } } } if($_ =~ /^ *id: ([^ \t\n]+)/i) { $maybeid=$1; } } close(ENG); } # a function that compares the english phrase with the translated one. # compare source strings and desc # Then output the updated version! sub compare { my ($idstr, $engref, $locref)=@_; my ($edesc, $ldesc); my ($esource, $lsource); my $mode=0; for my $l (@$engref) { if($l =~ /^ *#/) { # comment next; } if($l =~ /^ *desc: (.*)/) { $edesc=$1; } elsif($l =~ / *\/i) { $mode=1; } elsif($mode) { if($l =~ / *\<\/source\>/i) { last; } $esource .= "$l\n"; } } my @show; my @source; $mode = 0; for my $l (@$locref) { if($l =~ /^ *desc: (.*)/) { $ldesc=$1; if($edesc ne $ldesc) { $l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n"; } push @show, $l; } elsif($l =~ / *\/i) { $mode=1; push @show, $l; } elsif($mode) { if($l =~ / *\<\/source\>/i) { $mode = 0; print @show; if($esource ne $lsource) { print "### The section differs from the english!\n", "### the previously used one is commented below:\n"; for(split("\n", $lsource)) { print "### $_\n"; } print $esource; } else { print $lsource; } undef @show; # start over push @show, $l; } else { $lsource .= "$l"; } } else { push @show, $l; } } print @show; } my $idcount; # counter for lang ID numbers my $voiceid=0x8000; # counter for voice-only ID numbers # # Now start the scanning of the selected language string # open(LANG, "<$input") || die "couldn't read language file named $input\n"; my @phrase; while() { $line++; # get rid of DOS newlines $_ =~ s/\r//g; if($_ =~ /^( *\#|[ \t\n\r]*\z)/) { # comment or empty line next; } my $ll = $_; # print "M: $m\n"; push @phrase, $ll; # this is an XML-lookalike tag if (/^(<|[^\"<]+<)([^>]*)>/) { my $part = $2; # print "P: $part\n"; if($part =~ /^\//) { # this was a closing tag if($part eq "/phrase") { # closing the phrase my $idstr = $phrase{'id'}; my $idnum; if($binary && !$english{$idstr}) { # $idstr doesn't exist for english, skip it\n"; } elsif($dest =~ /^none\z/i) { # "none" as dest (without quotes) means that this entire # phrase is to be ignored } elsif(!$update) { # we don't do the fully detailed analysis when we "update" # since we don't do it for a particular target etc # allow the keyword 'deprecated' to be used on dest and # voice strings to mark that as deprecated. It will then # be replaced with "". $dest =~ s/^deprecate(|d)\z/\"\"/i; $voice =~ s/^deprecate(|d)\z/\"\"/i; # basic syntax error alerts, if there are no quotes we # will assume an empty string was intended if($dest !~ /^\"/) { print STDERR "Warning: dest before $input line $line lacks quotes ($dest)!\n"; $dest='""'; } if($src !~ /^\"/) { print STDERR "Warning: source before $input line $line lacks quotes ($src)!\n"; $src='""'; } if($voice !~ /^\"/) { print STDERR "Warning: voice before $input line $line lacks quotes ($voice)!\n"; $voice='""'; } # Use the ID name to figure out which id number range we # should use for this phrase. Voice-only strings are # separated. if($idstr =~ /^VOICE/) { $idnum = $voiceid++; } else { $idnum = $idcount++; } $id{$idstr} = $idnum; $idnum[$idnum]=$idstr; $source{$idstr}=$src; $dest{$idstr}=$dest; $voice{$idstr}=$voice; if($verbose) { print "id: $phrase{id} ($idnum)\n"; print "source: $src\n"; print "dest: $dest\n"; print "voice: $voice\n"; } undef $src; undef $dest; undef $voice; undef %phrase; } if($update) { my $e = $english{$idstr}; if($e) { # compare original english with this! my @eng = split("\n", $english{$idstr}); compare($idstr, \@eng, \@phrase); $english{$idstr}=""; # clear it } else { print "### $idstr: The phrase is not used. Skipped\n"; } } undef @phrase; } # end of # starts with a slash, this _ends_ this section $m = pop @m; # get back old value, the previous level's tag next; } # end of tag close # This is an opening (sub) tag push @m, $m; # store old value $m = $part; next; } if(/^ *([^:]+): *(.*)/) { my ($name, $val)=($1, $2); &$m($_, $name, $val); } } close(LANG); if($update) { my $any=0; for(keys %english) { if($english{$_}) { print "###\n", "### This phrase below was not present in the translated file\n", "\n"; print $english{$_}; print "\n"; } } } if($prefix) { # We create a .c and .h file open(HFILE, ">$prefix.h") || die "couldn't create file $prefix.h\n"; open(CFILE, ">$prefix.c") || die "couldn't create file $prefix.c\n"; print HFILE <$binary") or die "Can't create $binary"; binmode OUTF; printf OUTF ("\x1a%c%c", $langversion, $target_id); # magic lang file header # loop over the target phrases for $i (1 .. $idcount) { my $name=$idnum[$i - 1]; # get the ID my $dest = $dest{$name}; # get the destination phrase if($dest) { $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes # Now, make sure we get the number from the english sort order: $idnum = $idmap{$name}; printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest); } } } elsif($voiceout) { # voice output requested, display id: and voice: strings in a v1-like # fashion my @engl; # This loops over the strings in the translated language file order my @ids = ((0 .. ($idcount-1))); push @ids, (0x8000 .. ($voiceid-1)); #for my $id (@ids) { # print "$id\n"; #} for $i (@ids) { my $name=$idnum[$i]; # get the ID my $dest = $voice{$name}; # get the destination voice string if($dest) { $dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes # Now, make sure we get the number from the english sort order: $idnum = $idmap{$name}; $engl[$idnum] = $i; # print "Input index $i output index $idnum\n"; } } for my $i (@ids) { my $o = $engl[$i]; my $name=$idnum[$o]; # get the ID my $dest = $voice{$name}; # get the destination voice string print "#$i\nid: $name\nvoice: $dest\n"; } } if($verbose) { printf("%d ID strings scanned\n", $idcount); print "* head *\n"; for(keys %head) { printf "$_: %s\n", $head{$_}; } }