993a20a2ca
* allow 'deprecated' as a keyword for strings marked as ... deprecated! * warns on stderr if a given string is given without quotes git-svn-id: svn://svn.rockbox.org/rockbox/trunk@9533 a1c6a512-1295-4272-9138-f99709370657
613 lines
16 KiB
Perl
Executable file
613 lines
16 KiB
Perl
Executable file
#!/usr/bin/perl -s
|
|
# __________ __ ___.
|
|
# Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
|
# Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
|
# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
|
# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
|
# \/ \/ \/ \/ \/
|
|
# $Id$
|
|
#
|
|
# Copyright (C) 2006 by Daniel Stenberg
|
|
#
|
|
|
|
# binary version for the binary lang file
|
|
my $langversion = 3; # 3 was the latest one used in the v1 format
|
|
|
|
# A note for future users and readers: The original v1 language system allowed
|
|
# the build to create and use a different language than english built-in. We
|
|
# removed that feature from our build-system, but the build scripts still had
|
|
# the ability. But, starting now, this ability is no longer provided since I
|
|
# figured it was boring and unnecessary to write support for now since we
|
|
# don't use it anymore.
|
|
|
|
if(!$ARGV[0]) {
|
|
print <<MOO
|
|
Usage: genlang2 [options] <langv2 file>
|
|
|
|
-p=<prefix>
|
|
Make the tool create a [prefix].c and [prefix].h file.
|
|
|
|
-b=<outfile>
|
|
Make the tool create a binary language (.lng) file namaed [outfile].
|
|
The use of this option requires that you also use -e.
|
|
|
|
-u
|
|
Update language file. Given the translated file and the most recent english
|
|
file, you\'ll get an updated version sent to stdout. Suitable action to do
|
|
when you intend to update a translation.
|
|
|
|
-e=<english lang file>
|
|
Point out the english (original source) file, to use that as master
|
|
language template. Used in combination with -b or -u.
|
|
|
|
-t=<target>
|
|
Specify which target you want the translations/phrases for. Required when
|
|
-b or -p is used.
|
|
|
|
-o
|
|
Voice mode output. Outputs all id: and voice: lines for the given target!
|
|
|
|
-v
|
|
Enables verbose (debug) output.
|
|
MOO
|
|
;
|
|
exit;
|
|
}
|
|
|
|
# How update works:
|
|
#
|
|
# 1) scan the english file, keep the whole <phrase> for each phrase.
|
|
# 2) read the translated file, for each end of phrase, compare:
|
|
# A) all source strings, if there's any change there should be a comment about
|
|
# it output
|
|
# B) the desc fields
|
|
#
|
|
# 3) output the phrase with the comments from above
|
|
# 4) check which phrases that the translated version didn't have, and spit out
|
|
# the english version of those
|
|
#
|
|
|
|
my $prefix = $p;
|
|
my $binary = $b;
|
|
my $update = $u;
|
|
|
|
my $english = $e;
|
|
my $voiceout = $o;
|
|
|
|
my $check = ($binary?1:0) + ($prefix?1:0) + ($update?1:0) + ($voiceout?1:0);
|
|
|
|
if($check > 1) {
|
|
print "Please use only one of -p, -u, -o and -b\n";
|
|
exit;
|
|
}
|
|
if(!$check) {
|
|
print "Please use at least one of -p, -u, -o and -b\n";
|
|
exit;
|
|
}
|
|
if(($binary || $update || $voiceout) && !$english) {
|
|
print "Please use -e too when you use -b, -o or -u\n";
|
|
exit;
|
|
}
|
|
|
|
my $target = $t;
|
|
if(!$target && !$update) {
|
|
print "Please specify a target (with -t)!\n";
|
|
exit;
|
|
}
|
|
my $verbose=$v;
|
|
|
|
my %id; # string to num hash
|
|
my @idnum; # num to string array
|
|
|
|
my %source; # id string to source phrase hash
|
|
my %dest; # id string to dest phrase hash
|
|
my %voice; # id string to voice phrase hash
|
|
|
|
my $input = $ARGV[0];
|
|
|
|
my @m;
|
|
my $m="blank";
|
|
|
|
sub match {
|
|
my ($string, $pattern)=@_;
|
|
|
|
$pattern =~ s/\*/.?*/g;
|
|
$pattern =~ s/\?/./g;
|
|
|
|
return ($string =~ $pattern);
|
|
}
|
|
|
|
sub blank {
|
|
# nothing to do
|
|
}
|
|
|
|
my %head;
|
|
sub header {
|
|
my ($full, $n, $v)=@_;
|
|
$head{$n}=$v;
|
|
}
|
|
|
|
my %phrase;
|
|
sub phrase {
|
|
my ($full, $n, $v)=@_;
|
|
$phrase{$n}=$v;
|
|
}
|
|
|
|
sub parsetarget {
|
|
my ($debug, $strref, $full, $n, $v)=@_;
|
|
my $string;
|
|
my @all= split(" *, *", $n);
|
|
my $test;
|
|
for $test (@all) {
|
|
# print "TEST ($debug) $target for $test\n";
|
|
if(match($target, $test)) {
|
|
$string = $v;
|
|
# print "MATCH: $test => $v\n";
|
|
}
|
|
}
|
|
if($string) {
|
|
$$strref = $string;
|
|
}
|
|
return $string;
|
|
}
|
|
|
|
my $src;
|
|
sub source {
|
|
parsetarget("src", \$src, @_);
|
|
}
|
|
|
|
my $dest;
|
|
sub dest {
|
|
parsetarget("dest", \$dest, @_);
|
|
}
|
|
|
|
my $voice;
|
|
sub voice {
|
|
parsetarget("voice", \$voice, @_);
|
|
}
|
|
|
|
my %idmap;
|
|
my %english;
|
|
if($english) {
|
|
# For the cases where the english file needs to be scanned/read, we do
|
|
# it before we read the translated file. For -b it isn't necessary, but for
|
|
# -u it is convenient.
|
|
|
|
my $idnum=0; # start with a true number
|
|
my $vidnum=0x8000; # first voice id
|
|
open(ENG, "<$english") || die "can't open $english";
|
|
my @phrase;
|
|
my $id;
|
|
while(<ENG>) {
|
|
|
|
# get rid of DOS newlines
|
|
$_ =~ s/\r//g;
|
|
|
|
if($_ =~ /^ *\<phrase\>/) {
|
|
# this is the start of a phrase
|
|
}
|
|
elsif($_ =~ /^ *\<\/phrase\>/) {
|
|
# this is the end of a phrase, add it to the english hash
|
|
$english{$id}=join("", @phrase);
|
|
undef @phrase;
|
|
}
|
|
elsif($_ ne "\n") {
|
|
# gather everything related to this phrase
|
|
push @phrase, $_;
|
|
}
|
|
|
|
if($_ =~ /^ *id: ([^ \t\n]+)/i) {
|
|
$id=$1;
|
|
# voice-only entries get a difference range
|
|
if($id =~ /^VOICE_/) {
|
|
# Assign an ID number to this entry
|
|
$idmap{$id}=$vidnum;
|
|
$vidnum++;
|
|
}
|
|
else {
|
|
# Assign an ID number to this entry
|
|
$idmap{$id}=$idnum;
|
|
$idnum++;
|
|
}
|
|
}
|
|
}
|
|
close(ENG);
|
|
}
|
|
|
|
# a function that compares the english phrase with the translated one.
|
|
# compare source strings and desc
|
|
|
|
# Then output the updated version!
|
|
sub compare {
|
|
my ($idstr, $engref, $locref)=@_;
|
|
my ($edesc, $ldesc);
|
|
my ($esource, $lsource);
|
|
my $mode=0;
|
|
|
|
for my $l (@$engref) {
|
|
if($l =~ /^ *desc: (.*)/) {
|
|
$edesc=$1;
|
|
}
|
|
elsif($l =~ / *\<source\>/i) {
|
|
$mode=1;
|
|
}
|
|
elsif($mode) {
|
|
if($l =~ / *\<\/source\>/i) {
|
|
last;
|
|
}
|
|
$esource .= "$l\n";
|
|
}
|
|
}
|
|
|
|
my @show;
|
|
my @source;
|
|
|
|
$mode = 0;
|
|
for my $l (@$locref) {
|
|
if($l =~ /^ *desc: (.*)/) {
|
|
$ldesc=$1;
|
|
if($edesc ne $ldesc) {
|
|
$l = "### The 'desc' field differs from the english!\n### the previously used desc is commented below:\n### desc: $ldesc\n desc: $edesc\n";
|
|
}
|
|
push @show, $l;
|
|
}
|
|
elsif($l =~ / *\<source\>/i) {
|
|
$mode=1;
|
|
push @show, $l;
|
|
}
|
|
elsif($mode) {
|
|
if($l =~ / *\<\/source\>/i) {
|
|
$mode = 0;
|
|
print @show;
|
|
if($esource ne $lsource) {
|
|
print "### The <source> section differs from the english!\n",
|
|
"### the previously used one is commented below:\n";
|
|
for(split("\n", $lsource)) {
|
|
print "### $_\n";
|
|
}
|
|
print $esource;
|
|
}
|
|
else {
|
|
print $lsource;
|
|
}
|
|
undef @show; # start over
|
|
|
|
push @show, $l;
|
|
}
|
|
else {
|
|
$lsource .= "$l";
|
|
}
|
|
}
|
|
else {
|
|
push @show, $l;
|
|
}
|
|
}
|
|
|
|
|
|
print @show;
|
|
}
|
|
|
|
my $idcount; # counter for lang ID numbers
|
|
my $voiceid=0x8000; # counter for voice-only ID numbers
|
|
|
|
#
|
|
# Now start the scanning of the selected language string
|
|
#
|
|
|
|
open(LANG, "<$input") || die "couldn't read language file named $input\n";
|
|
my @phrase;
|
|
while(<LANG>) {
|
|
|
|
$line++;
|
|
|
|
# get rid of DOS newlines
|
|
$_ =~ s/\r//g;
|
|
|
|
if($_ =~ /^( *\#|[ \t\n\r]*\z)/) {
|
|
# comment or empty line
|
|
next;
|
|
}
|
|
|
|
my $ll = $_;
|
|
|
|
# print "M: $m\n";
|
|
|
|
push @phrase, $ll;
|
|
|
|
# this is an XML-lookalike tag
|
|
if(/ *<([^>]*)>/) {
|
|
my $part = $1;
|
|
#print "P: $part\n";
|
|
|
|
if($part =~ /^\//) {
|
|
# this was a closing tag
|
|
|
|
if($part eq "/phrase") {
|
|
# closing the phrase
|
|
|
|
my $idstr = $phrase{'id'};
|
|
my $idnum;
|
|
|
|
if($dest =~ /^none\z/i) {
|
|
# "none" as dest (without quotes) means that this entire
|
|
# phrase is to be ignored
|
|
}
|
|
else {
|
|
# allow the keyword 'deprecated' to be used on dest and
|
|
# voice strings to mark that as deprecated. It will then
|
|
# be replaced with "".
|
|
|
|
$dest =~ s/^deprecate(|d)\z/\"\"/i;
|
|
$voice =~ s/^deprecate(|d)\z/\"\"/i;
|
|
|
|
# Use the ID name to figure out which id number range we
|
|
# should use for this phrase. Voice-only strings are
|
|
# separated.
|
|
|
|
# basic syntax error alerts
|
|
if($dest != /^\"/) {
|
|
print STDERR "dest around line $line lacks quotes!\n";
|
|
}
|
|
if($src != /^\"/) {
|
|
print STDERR "source around line $line lacks quotes!\n";
|
|
}
|
|
if($voice != /^\"/) {
|
|
print STDERR "voice around line $line lacks quotes!\n";
|
|
}
|
|
|
|
if($idstr =~ /^VOICE/) {
|
|
$idnum = $voiceid++;
|
|
}
|
|
else {
|
|
$idnum = $idcount++;
|
|
}
|
|
|
|
$id{$idstr} = $idnum;
|
|
$idnum[$idnum]=$idstr;
|
|
|
|
$source{$idstr}=$src;
|
|
$dest{$idstr}=$dest;
|
|
$voice{$idstr}=$voice;
|
|
|
|
if($verbose) {
|
|
print "id: $phrase{id} ($idnum)\n";
|
|
print "source: $src\n";
|
|
print "dest: $dest\n";
|
|
print "voice: $voice\n";
|
|
}
|
|
|
|
undef $src;
|
|
undef $dest;
|
|
undef $voice;
|
|
undef %phrase;
|
|
}
|
|
|
|
if($update) {
|
|
my $e = $english{$idstr};
|
|
|
|
if($e) {
|
|
# compare original english with this!
|
|
my @eng = split("\n", $english{$idstr});
|
|
|
|
compare($idstr, \@eng, \@phrase);
|
|
|
|
$english{$idstr}=""; # clear it
|
|
}
|
|
else {
|
|
print "### $idstr: The phrase is not used. Skipped\n";
|
|
}
|
|
}
|
|
undef @phrase;
|
|
|
|
} # end of </phrase>
|
|
|
|
# starts with a slash, this _ends_ this section
|
|
$m = pop @m; # get back old value, the previous level's tag
|
|
next;
|
|
} # end of tag close
|
|
|
|
# This is an opening (sub) tag
|
|
|
|
push @m, $m; # store old value
|
|
$m = $1;
|
|
next;
|
|
}
|
|
|
|
if(/^ *([^:]+): *(.*)/) {
|
|
my ($name, $val)=($1, $2);
|
|
&$m($_, $name, $val);
|
|
}
|
|
}
|
|
close(LANG);
|
|
|
|
if($update) {
|
|
my $any=0;
|
|
for(keys %english) {
|
|
if($english{$_}) {
|
|
print "###\n",
|
|
"### This phrase below was not present in the translated file\n",
|
|
"<phrase>\n";
|
|
print $english{$_};
|
|
print "</phrase>\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
if($prefix) {
|
|
# We create a .c and .h file
|
|
|
|
open(HFILE, ">$prefix.h") ||
|
|
die "couldn't create file $prefix.h\n";
|
|
open(CFILE, ">$prefix.c") ||
|
|
die "couldn't create file $prefix.c\n";
|
|
|
|
print HFILE <<MOO
|
|
/* This file was automatically generated using genlang2 */
|
|
/*
|
|
* The str() macro/functions is how to access strings that might be
|
|
* translated. Use it like str(MACRO) and expect a string to be
|
|
* returned!
|
|
*/
|
|
#define str(x) language_strings[x]
|
|
|
|
/* this is the array for holding the string pointers.
|
|
It will be initialized at runtime. */
|
|
extern unsigned char *language_strings[];
|
|
/* this contains the concatenation of all strings, separated by \\0 chars */
|
|
extern const unsigned char language_builtin[];
|
|
|
|
/* The enum below contains all available strings */
|
|
enum \{
|
|
MOO
|
|
;
|
|
|
|
print CFILE <<MOO
|
|
/* This file was automaticly generated using genlang2, the strings come
|
|
from "$input" */
|
|
|
|
#include "$prefix.h"
|
|
|
|
unsigned char *language_strings[LANG_LAST_INDEX_IN_ARRAY];
|
|
const unsigned char language_builtin[] =
|
|
MOO
|
|
;
|
|
|
|
# Output the ID names for the enum in the header file
|
|
my $i;
|
|
for $i (1 .. $idcount) {
|
|
my $name=$idnum[$i - 1]; # get the ID name
|
|
|
|
$name =~ s/\"//g; # cut off the quotes
|
|
|
|
printf HFILE (" %s,\n", $name);
|
|
}
|
|
|
|
# Output separation marker for last string ID and the upcoming voice IDs
|
|
|
|
print HFILE <<MOO
|
|
LANG_LAST_INDEX_IN_ARRAY, /* this is not a string, this is a marker */
|
|
/* --- below this follows voice-only strings --- */
|
|
VOICEONLY_DELIMITER = 0x8000,
|
|
MOO
|
|
;
|
|
|
|
# Output the ID names for the enum in the header file
|
|
my $i;
|
|
for $i (0x8000 .. ($voiceid-1)) {
|
|
my $name=$idnum[$i]; # get the ID name
|
|
|
|
$name =~ s/\"//g; # cut off the quotes
|
|
|
|
printf HFILE (" %s,\n", $name);
|
|
}
|
|
|
|
# Output end of enum
|
|
print HFILE "\n};\n/* end of generated enum list */\n";
|
|
|
|
# Output the target phrases for the source file
|
|
for $i (1 .. $idcount) {
|
|
my $name=$idnum[$i - 1]; # get the ID
|
|
my $dest = $dest{$name}; # get the destination phrase
|
|
|
|
$dest =~ s:\"$:\\0\":; # insert a \0 before the second quote
|
|
|
|
if(!$dest) {
|
|
# this is just to be on the safe side
|
|
$dest = '"\0"';
|
|
}
|
|
|
|
printf CFILE (" %s\n", $dest);
|
|
}
|
|
|
|
# Output end of string chunk
|
|
print CFILE <<MOO
|
|
;
|
|
/* end of generated string list */
|
|
MOO
|
|
;
|
|
|
|
close(HFILE);
|
|
close(CFILE);
|
|
} # end of the c/h file generation
|
|
elsif($binary) {
|
|
# Creation of a binary lang file was requested
|
|
|
|
# We must first scan the english file to get the correct order of the id
|
|
# numbers used there, as that is what sets the id order for all language
|
|
# files. The english file is scanned before the translated file was
|
|
# scanned.
|
|
|
|
open(OUTF, ">$binary") or die "Can't create $binary";
|
|
binmode OUTF;
|
|
printf OUTF ("\x1a%c", $langversion); # magic lang file header
|
|
|
|
# loop over the target phrases
|
|
for $i (1 .. $idcount) {
|
|
my $name=$idnum[$i - 1]; # get the ID
|
|
my $dest = $dest{$name}; # get the destination phrase
|
|
|
|
if($dest) {
|
|
$dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
|
|
|
|
# Now, make sure we get the number from the english sort order:
|
|
$idnum = $idmap{$name};
|
|
|
|
printf OUTF ("%c%c%s\x00", ($idnum>>8), ($idnum&0xff), $dest);
|
|
if($debug) {
|
|
printf("%02x => %s\n", $idnum, $value);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
elsif($voiceout) {
|
|
# voice output requested, display id: and voice: strings in a v1-like
|
|
# fashion
|
|
|
|
my @engl;
|
|
|
|
# This loops over the strings in the translated language file order
|
|
my @ids = ((0 .. ($idcount-1)));
|
|
push @ids, (0x8000 .. ($voiceid-1));
|
|
|
|
#for my $id (@ids) {
|
|
# print "$id\n";
|
|
#}
|
|
|
|
for $i (@ids) {
|
|
my $name=$idnum[$i]; # get the ID
|
|
my $dest = $voice{$name}; # get the destination voice string
|
|
|
|
if($dest) {
|
|
$dest =~ s/^\"(.*)\"\s*$/$1/g; # cut off quotes
|
|
|
|
# Now, make sure we get the number from the english sort order:
|
|
$idnum = $idmap{$name};
|
|
|
|
$engl[$idnum] = $i;
|
|
|
|
# print "Input index $i output index $idnum\n";
|
|
|
|
}
|
|
}
|
|
for my $i (@ids) {
|
|
|
|
my $o = $engl[$i];
|
|
|
|
my $name=$idnum[$o]; # get the ID
|
|
my $dest = $voice{$name}; # get the destination voice string
|
|
|
|
print "#$i\nid: $name\nvoice: $dest\n";
|
|
}
|
|
|
|
}
|
|
|
|
|
|
if($verbose) {
|
|
printf("%d ID strings scanned\n", $idcount);
|
|
|
|
print "* head *\n";
|
|
for(keys %head) {
|
|
printf "$_: %s\n", $head{$_};
|
|
}
|
|
}
|
|
|