d71339bf44
strings. I also added the numerical id number for strings in the generated header file, which is useful when the new lngdump tool is used and you want to compare the ids for the built-in strings to the ones in the generated binary files. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@14274 a1c6a512-1295-4272-9138-f99709370657
675 lines
18 KiB
Perl
Executable file
675 lines
18 KiB
Perl
Executable file
#!/usr/bin/perl -s
|
|
# __________ __ ___.
|
|
# Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
|
# Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
|
# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
|
# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
|
# \/ \/ \/ \/ \/
|
|
# $Id$
|
|
#
|
|
# Copyright (C) 2006 - 2007 by Daniel Stenberg
|
|
#
|
|
|
|
# binary version for the binary lang file
|
|
my $langversion = 4; # 3 was the latest one used in the v1 format
|
|
|
|
# A note for future users and readers: The original v1 language system allowed
|
|
# the build to create and use a different language than english built-in. We
|
|
# removed that feature from our build-system, but the build scripts still had
|
|
# the ability. But, starting now, this ability is no longer provided since I
|
|
# figured it was boring and unnecessary to write support for now since we
|
|
# don't use it anymore.
|
|
|
|
if(!$ARGV[0]) {
|
|
print <<MOO
|
|
Usage: genlang [options] <langv2 file>
|
|
|
|
-p=<prefix>
|
|
Make the tool create a [prefix].c and [prefix].h file.
|
|
|
|
-b=<outfile>
|
|
Make the tool create a binary language (.lng) file named [outfile].
|
|
The use of this option requires that you also use -e, -t and -i.
|
|
|
|
-u
|
|
Update language file. Given the translated file and the most recent english
|
|
file, you\'ll get an updated version sent to stdout. Suitable action to do
|
|
when you intend to update a translation.
|
|
|
|
-e=<english lang file>
|
|
Point out the english (original source) file, to use that as master
|
|
language template. Used in combination with -b or -u.
|
|
|
|
-t=<target>
|
|
Specify which target you want the translations/phrases for. Required when
|
|
-b or -p is used.
|
|
|
|
The target can in fact be specified as numerous different strings,
|
|
separated with colons. This will make genlang to use all the specified
|
|
strings when searching for a matching phrase.
|
|
|
|
-i=<target id>
|
|
The target id number, needed for -b.
|
|
|
|
-o
|
|
Voice mode output. Outputs all id: and voice: lines for the given target!
|
|
|
|
-v
|
|
Enables verbose (debug) output.
|
|
MOO
|
|
;
|
|
exit;
|
|
}
|
|
|
|
# How update works:
|
|
#
|
|
# 1) scan the english file, keep the whole <phrase> for each phrase.
|
|
# 2) read the translated file, for each end of phrase, compare:
|
|
# A) all source strings, if there's any change there should be a comment about
|
|
# it output
|
|
# B) the desc fields
|
|
#
|
|
# 3) output the phrase with the comments from above
|
|
# 4) check which phrases that the translated version didn't have, and spit out
|
|
# the english version of those
|
|
#
|
|
|
|
my $prefix = $p;
|
|
my $binary = $b;
|
|
my $update = $u;
|
|
|
|
my $english = $e;
|
|
my $voiceout = $o;
|
|
|
|
my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
|
|
|
|
if($check > 1) {
|
|
print "Please use only one of -p, -u, -o and -b\n";
|
|
exit;
|
|
}
|
|
if(!$check) {
|
|
print "Please use at least one of -p, -u, -o and -b\n";
|
|
exit;
|
|
}
|
|
|
|
if(($binary || $update || $voiceout) && !$english) {
|
|
print "Please use -e too when you use -b, -o or -u\n";
|
|
exit;
|
|
}
|
|
|
|
my $target_id = $i;
|
|
if($binary && !$target_id) {
|
|
print "Please specify a target id number (with -i)!\n";
|
|
exit;
|
|
}
|
|
|
|
my $target = $t;
|
|
if(!$target && !$update) {
|
|
print "Please specify a target (with -t)!\n";
|
|
exit;
|
|
}
|
|
my $verbose=$v;
|
|
|
|
my %id; # string to num hash
|
|
my @idnum; # num to string array
|
|
|
|
my %source; # id string to source phrase hash
|
|
my %dest; # id string to dest phrase hash
|
|
my %voice; # id string to voice phrase hash
|
|
|
|
my $input = $ARGV[0];
|
|
|
|
my @m;
|
|
my $m="blank";
|
|
|
|
sub match {
|
|
my ($string, $pattern)=@_;
|
|
|
|
$pattern =~ s/\*/.?*/g;
|
|
$pattern =~ s/\?/./g;
|
|
|
|
return ($string =~ /^$pattern\z/);
|
|
}
|
|
|
|
sub blank {
|
|
# nothing to do
|
|
}
|
|
|
|
my %head;
|
|
sub header {
|
|
my ($full, $n, $v)=@_;
|
|
$head{$n}=$v;
|
|
}
|
|
|
|
my %phrase;
|
|
sub phrase {
|
|
my ($full, $n, $v)=@_;
|
|
$phrase{$n}=$v;
|
|
}
|
|
|
|
sub parsetarget {
|
|
my ($debug, $strref, $full, $n, $v)=@_;
|
|
my $string;
|
|
my @all= split(" *, *", $n);
|
|
my $test;
|
|
for $test (@all) {
|
|
# print "TEST ($debug) $target for $test\n";
|
|
for my $part (split(":", $target)) {
|
|
if(match($part, $test)) {
|
|
$string = $v;
|
|
# print "MATCH: $test => $v\n";
|
|
$$strref = $string;
|
|
return $string;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
my $src;
|
|
sub source {
|
|
parsetarget("src", \$src, @_);
|
|
}
|
|
|
|
my $dest;
|
|
sub dest {
|
|
parsetarget("dest", \$dest, @_);
|
|
}
|
|
|
|
my $voice;
|
|
sub voice {
|
|
parsetarget("voice", \$voice, @_);
|
|
}
|
|
|
|
my %idmap;
|
|
my %english;
|
|
if($english) {
|
|
# For the cases where the english file needs to be scanned/read, we do
|
|
# it before we read the translated file. For -b it isn't necessary, but for
|
|
# -u it is convenient.
|
|
|
|
my $idnum=0; # start with a true number
|
|
my $vidnum=0x8000; # first voice id
|
|
open(ENG, "<$english") || die "can't open $english";
|
|
my @phrase;
|
|
my $id;
|
|
my $maybeid;
|
|
my $withindest;
|
|
while(<ENG>) {
|
|
|
|
# get rid of DOS newlines
|
|
$_ =~ s/\r//g;
|
|
|
|
if($_ =~ /^ *\<phrase\>/) {
|
|
# this is the start of a phrase
|
|
}
|
|
elsif($_ =~ /^ *\<\/phrase\>/) {
|
|
|
|
# if id is something, when we count and store this phrase
|
|
if($id) {
|
|
# voice-only entries get a difference range
|
|
if($id =~ /^VOICE_/) {
|
|
# Assign an ID number to this entry
|
|
$idmap{$id}=$vidnum;
|
|
$vidnum++;
|
|
}
|
|
else {
|
|
# Assign an ID number to this entry
|
|
$idmap{$id}=$idnum;
|
|
$idnum++;
|
|
# print STDERR "DEST: bumped idnum to $idnum\n";
|
|
}
|
|
|
|
# this is the end of a phrase, add it to the english hash
|
|
$english{$id}=join("", @phrase);
|
|
}
|
|
undef @phrase;
|
|
$id="";
|
|
}
|
|
elsif($_ ne "\n") {
|
|
# gather everything related to this phrase
|
|
push @phrase, $_;
|
|
if($_ =~ /^ *\<dest\>/i) {
|
|
$withindest=1;
|
|
$deststr="";
|
|
}
|
|
elsif($withindest && ($_ =~ /^ *\<\/dest\>/i)) {
|
|
$withindest=0;
|
|
|
|
if($update || ($deststr && ($deststr !~ /^none\z/i))) {
|
|
# we unconditionally always use all IDs when the "update"
|
|
# feature is used
|
|
$id = $maybeid;
|
|
# print STDERR "DEST: use this id $id\n";
|
|
}
|
|
else {
|
|
# print "skip $maybeid for $name\n";
|
|
}
|
|
}
|
|
elsif($withindest && ($_ =~ / *([^:]+): *(.*)/)) {
|
|
my ($name, $val)=($1, $2);
|
|
$dest=""; # in case it is left untouched for when the
|
|
# model name isn't "our"
|
|
dest($_, $name, $val);
|
|
|
|
if($dest) {
|
|
# Store the current dest string. If this target matches
|
|
# multiple strings, it will get updated several times.
|
|
$deststr = $dest;
|
|
}
|
|
}
|
|
}
|
|
|
|
if($_ =~ /^ *id: ([^ \t\n]+)/i) {
|
|
$maybeid=$1;
|
|
}
|
|
}
|
|
close(ENG);
|
|
}
|
|
|
|
# a function that compares the english phrase with the translated one.
|
|
# compare source strings and desc
|
|
|
|
# Then output the updated version!
|
|
sub compare {
|
|
my ($idstr, $engref, $locref)=@_;
|
|
my ($edesc, $ldesc);
|
|
my ($esource, $lsource);
|
|
my $mode=0;
|
|
|
|
for my $l (@$engref) {
|
|
if($l =~ /^ *#/) {
|
|
# comment
|
|
next;
|
|
}
|
|
if($l =~ /^ *desc: (.*)/) {
|
|
$edesc=$1;
|
|
}
|
|
elsif($l =~ / *\<source\>/i) {
|
|
$mode=1;
|
|
}
|
|
elsif($mode) {
|
|
if($l =~ / *\<\/source\>/i) {
|
|
last;
|
|
}
|
|
$esource .= "$l\n";
|
|
}
|
|
}
|
|
|
|
my @show;
|
|
my @source;
|
|
|
|
$mode = 0;
|
|
for my $l (@$locref) {
|
|
if($l =~ /^ *desc: (.*)/) {
|
|
$ldesc=$1;
|
|
if($edesc ne $ldesc) {
|
|
$l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n";
|
|
}
|
|
push @show, $l;
|
|
}
|
|
elsif($l =~ / *\<source\>/i) {
|
|
$mode=1;
|
|
push @show, $l;
|
|
}
|
|
elsif($mode) {
|
|
if($l =~ / *\<\/source\>/i) {
|
|
$mode = 0;
|
|
print @show;
|
|
if($esource ne $lsource) {
|
|
print "### The <source> section differs from the english!\n",
|
|
"### the previously used one is commented below:\n";
|
|
for(split("\n", $lsource)) {
|
|
print "### $_\n";
|
|
}
|
|
print $esource;
|
|
}
|
|
else {
|
|
print $lsource;
|
|
}
|
|
undef @show; # start over
|
|
|
|
push @show, $l;
|
|
}
|
|
else {
|
|
$lsource .= "$l";
|
|
}
|
|
}
|
|
else {
|
|
push @show, $l;
|
|
}
|
|
}
|
|
|
|
|
|
print @show;
|
|
}
|
|
|
|
my $idcount; # counter for lang ID numbers
|
|
my $voiceid=0x8000; # counter for voice-only ID numbers
|
|
|
|
#
|
|
# Now start the scanning of the selected language string
|
|
#
|
|
|
|
open(LANG, "<$input") || die "couldn't read language file named $input\n";
|
|
my @phrase;
|
|
while(<LANG>) {
|
|
|
|
$line++;
|
|
|
|
# get rid of DOS newlines
|
|
$_ =~ s/\r//g;
|
|
|
|
if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
|
|
# comment or empty line
|
|
next;
|
|
}
|
|
|
|
my $ll = $_;
|
|
|
|
# print "M: $m\n";
|
|
|
|
push @phrase, $ll;
|
|
|
|
# this is an XML-lookalike tag
|
|
if (/^(<|[^\"<]+<)([^>]*)>/) {
|
|
my $part = $2;
|
|
# print "P: $part\n";
|
|
|
|
if($part =~ /^\//) {
|
|
# this was a closing tag
|
|
|
|
if($part eq "/phrase") {
|
|
# closing the phrase
|
|
|
|
my $idstr = $phrase{'id'};
|
|
my $idnum;
|
|
|
|
if($binary && !$english{$idstr}) {
|
|
# $idstr doesn't exist for english, skip it\n";
|
|
}
|
|
elsif($dest =~ /^none\z/i) {
|
|
# "none" as dest (without quotes) means that this entire
|
|
# phrase is to be ignored
|
|
}
|
|
elsif(!$update) {
|
|
# we don't do the fully detailed analysis when we "update"
|
|
# since we don't do it for a particular target etc
|
|
|
|
# allow the keyword 'deprecated' to be used on dest and
|
|
# voice strings to mark that as deprecated. It will then
|
|
# be replaced with "".
|
|
|
|
$dest =~ s/^deprecate(|d)\z/\"\"/i;
|
|
$voice =~ s/^deprecate(|d)\z/\"\"/i;
|
|
|
|
# basic syntax error alerts, if there are no quotes we
|
|
# will assume an empty string was intended
|
|
if($dest !~ /^\"/) {
|
|
print STDERR "Warning: dest before $input line $line lacks quotes ($dest)!\n";
|
|
$dest='""';
|
|
}
|
|
if($src !~ /^\"/) {
|
|
print STDERR "Warning: source before $input line $line lacks quotes ($src)!\n";
|
|
$src='""';
|
|
}
|
|
if($voice !~ /^\"/) {
|
|
print STDERR "Warning: voice before $input line $line lacks quotes ($voice)!\n";
|
|
$voice='""';
|
|
}
|
|
|
|
# Use the ID name to figure out which id number range we
|
|
# should use for this phrase. Voice-only strings are
|
|
# separated.
|
|
|
|
if($idstr =~ /^VOICE/) {
|
|
$idnum = $voiceid++;
|
|
}
|
|
else {
|
|
$idnum = $idcount++;
|
|
}
|
|
|
|
$id{$idstr} = $idnum;
|
|
$idnum[$idnum]=$idstr;
|
|
|
|
$source{$idstr}=$src;
|
|
$dest{$idstr}=$dest;
|
|
$voice{$idstr}=$voice;
|
|
|
|
if($verbose) {
|
|
print "id: $phrase{id} ($idnum)\n";
|
|
print "source: $src\n";
|
|
print "dest: $dest\n";
|
|
print "voice: $voice\n";
|
|
}
|
|
|
|
undef $src;
|
|
undef $dest;
|
|
undef $voice;
|
|
undef %phrase;
|
|
}
|
|
|
|
if($update) {
|
|
my $e = $english{$idstr};
|
|
|
|
if($e) {
|
|
# compare original english with this!
|
|
my @eng = split("\n", $english{$idstr});
|
|
|
|
compare($idstr, \@eng, \@phrase);
|
|
|
|
$english{$idstr}=""; # clear it
|
|
}
|
|
else {
|
|
print "### $idstr: The phrase is not used. Skipped\n";
|
|
}
|
|
}
|
|
undef @phrase;
|
|
|
|
} # end of </phrase>
|
|
|
|
# starts with a slash, this _ends_ this section
|
|
$m = pop @m; # get back old value, the previous level's tag
|
|
next;
|
|
} # end of tag close
|
|
|
|
# This is an opening (sub) tag
|
|
|
|
push @m, $m; # store old value
|
|
$m = $part;
|
|
next;
|
|
}
|
|
|
|
if(/^ *([^:]+): *(.*)/) {
|
|
my ($name, $val)=($1, $2);
|
|
&$m($_, $name, $val);
|
|
}
|
|
}
|
|
close(LANG);
|
|
|
|
if($update) {
|
|
my $any=0;
|
|
for(keys %english) {
|
|
if($english{$_}) {
|
|
print "###\n",
|
|
"### This phrase below was not present in the translated file\n",
|
|
"<phrase>\n";
|
|
print $english{$_};
|
|
print "</phrase>\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
if($prefix) {
|
|
# We create a .c and .h file
|
|
|
|
open(HFILE, ">$prefix.h") ||
|
|
die "couldn't create file $prefix.h\n";
|
|
open(CFILE, ">$prefix.c") ||
|
|
die "couldn't create file $prefix.c\n";
|
|
|
|
print HFILE <<MOO
|
|
/* This file was automatically generated using genlang */
|
|
/*
|
|
* The str() macro/functions is how to access strings that might be
|
|
* translated. Use it like str(MACRO) and expect a string to be
|
|
* returned!
|
|
*/
|
|
#define str(x) language_strings[x]
|
|
|
|
/* this is the array for holding the string pointers.
|
|
It will be initialized at runtime. */
|
|
extern unsigned char *language_strings[];
|
|
/* this contains the concatenation of all strings, separated by \\0 chars */
|
|
extern const unsigned char language_builtin[];
|
|
|
|
/* The enum below contains all available strings */
|
|
enum \{
|
|
MOO
|
|
;
|
|
|
|
print CFILE <<MOO
|
|
/* This file was automaticly generated using genlang, the strings come
|
|
from "$input" */
|
|
|
|
#include "$prefix.h"
|
|
|
|
unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
|
|
const unsigned char language_builtin[] =
|
|
MOO
|
|
;
|
|
|
|
# Output the ID names for the enum in the header file
|
|
my $i;
|
|
for $i (1 .. $idcount) {
|
|
my $name=$idnum[$i - 1]; # get the ID name
|
|
|
|
$name =~ s/\"//g; # cut off the quotes
|
|
|
|
printf HFILE (" %s, /* %d */\n", $name, $i-1);
|
|
}
|
|
|
|
# Output separation marker for last string ID and the upcoming voice IDs
|
|
|
|
print HFILE <<MOO
|
|
LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
|
|
/* --- below this follows voice-only strings --- */
|
|
VOICEONLY_DELIMITER = 0x8000,
|
|
MOO
|
|
;
|
|
|
|
# Output the ID names for the enum in the header file
|
|
for $i (0x8000 .. ($voiceid-1)) {
|
|
my $name=$idnum[$i]; # get the ID name
|
|
|
|
$name =~ s/\"//g; # cut off the quotes
|
|
|
|
printf HFILE (" %s,\n", $name);
|
|
}
|
|
|
|
# Output end of enum
|
|
print HFILE "\n};\n/* end of generated enum list */\n";
|
|
|
|
# Output the target phrases for the source file
|
|
for $i (1 .. $idcount) {
|
|
my $name=$idnum[$i - 1]; # get the ID
|
|
my $dest = $dest{$name}; # get the destination phrase
|
|
|
|
$dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
|
|
|
|
if(!$dest) {
|
|
# this is just to be on the safe side
|
|
$dest = '"\0"';
|
|
}
|
|
|
|
printf CFILE (" %s\n", $dest);
|
|
}
|
|
|
|
# Output end of string chunk
|
|
print CFILE <<MOO
|
|
;
|
|
/* end of generated string list */
|
|
MOO
|
|
;
|
|
|
|
close(HFILE);
|
|
close(CFILE);
|
|
} # end of the c/h file generation
|
|
elsif($binary) {
|
|
# Creation of a binary lang file was requested
|
|
|
|
# We must first scan the english file to get the correct order of the id
|
|
# numbers used there, as that is what sets the id order for all language
|
|
# files. The english file is scanned before the translated file was
|
|
# scanned.
|
|
|
|
open(OUTF, ">$binary") or die "Can't create $binary";
|
|
binmode OUTF;
|
|
printf OUTF ("\x1a%c%c", $langversion, $target_id); # magic lang file header
|
|
|
|
# loop over the target phrases
|
|
for $i (1 .. $idcount) {
|
|
my $name=$idnum[$i - 1]; # get the ID
|
|
my $dest = $dest{$name}; # get the destination phrase
|
|
|
|
if($dest) {
|
|
$dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
|
|
|
|
# Now, make sure we get the number from the english sort order:
|
|
$idnum = $idmap{$name};
|
|
|
|
printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
|
|
}
|
|
}
|
|
}
|
|
elsif($voiceout) {
|
|
# voice output requested, display id: and voice: strings in a v1-like
|
|
# fashion
|
|
|
|
my @engl;
|
|
|
|
# This loops over the strings in the translated language file order
|
|
my @ids = ((0 .. ($idcount-1)));
|
|
push @ids, (0x8000 .. ($voiceid-1));
|
|
|
|
#for my $id (@ids) {
|
|
# print "$id\n";
|
|
#}
|
|
|
|
for $i (@ids) {
|
|
my $name=$idnum[$i]; # get the ID
|
|
my $dest = $voice{$name}; # get the destination voice string
|
|
|
|
if($dest) {
|
|
$dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
|
|
|
|
# Now, make sure we get the number from the english sort order:
|
|
$idnum = $idmap{$name};
|
|
|
|
$engl[$idnum] = $i;
|
|
|
|
# print "Input index $i output index $idnum\n";
|
|
|
|
}
|
|
}
|
|
for my $i (@ids) {
|
|
|
|
my $o = $engl[$i];
|
|
|
|
my $name=$idnum[$o]; # get the ID
|
|
my $dest = $voice{$name}; # get the destination voice string
|
|
|
|
print "#$i\nid: $name\nvoice: $dest\n";
|
|
}
|
|
|
|
}
|
|
|
|
|
|
if($verbose) {
|
|
printf("%d ID strings scanned\n", $idcount);
|
|
|
|
print "* head *\n";
|
|
for(keys %head) {
|
|
printf "$_: %s\n", $head{$_};
|
|
}
|
|
}
|
|
|