52abc68b11
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6395 a1c6a512-1295-4272-9138-f99709370657
122 lines
2.3 KiB
Perl
122 lines
2.3 KiB
Perl
#! /usr/bin/perl -w
|
|
|
|
# Wordnet dictionary database converter
|
|
#
|
|
# Converts the Wordnet prolog data to rockbox dictionary format.
|
|
#
|
|
# Written by Miika Pekkarinen <slasher@ihme.org>
|
|
#
|
|
# $Id$
|
|
|
|
use strict;
|
|
|
|
# Lookup tables
|
|
my %words;
|
|
my %descriptions;
|
|
|
|
sub getcatname {
|
|
my ($id) = @_;
|
|
|
|
return 'N' if $id == 1;
|
|
return 'V' if $id == 2;
|
|
return 'A' if $id == 3;
|
|
return 'A' if $id == 4;
|
|
return '?';
|
|
}
|
|
|
|
open IN_WORD, "wn_s.pl" or die "Open fail(#1): $!";
|
|
open IN_DESC, "wn_g.pl" or die "Open fail(#2): $!";
|
|
open OUTPUT, "> dict.preparsed" or die "Open fail(#3): $!";
|
|
|
|
print "Reading word file...\n";
|
|
|
|
# Read everything into memory
|
|
while (<IN_WORD>) {
|
|
chomp ;
|
|
|
|
# s(100001740,1,'entity',n,1,11). => 100001740,1,'entity',n,1,11
|
|
s/(^s\()(.*)(\)\.$)/$2/;
|
|
|
|
my ($seqid, $n1, $word, $n2, $n3, $n4) = split /,/, $_, 6;
|
|
|
|
# 'entity' => entity
|
|
$word =~ s/(^\')(.*)(\'$)/$2/;
|
|
$word =~ s/\'\'/\'/s;
|
|
|
|
my $category = substr $seqid, 0, 1;
|
|
|
|
$words{lc $word}{$seqid} = $category;
|
|
}
|
|
|
|
close IN_WORD;
|
|
|
|
print "Reading description file...\n";
|
|
while (<IN_DESC>) {
|
|
chomp ;
|
|
|
|
# g(100002056,'(a separate and self-contained entity)').
|
|
# => 100002056,'(a separate and self-contained entity)'
|
|
s/(^g\()(.*)(\)\.$)/$2/;
|
|
|
|
my ($seqid, $desc) = split /,/, $_, 2;
|
|
|
|
$desc =~ s/(^\'\()(.*)(\)\'$)/$2/;
|
|
$desc =~ s/\'\'/\'/s;
|
|
|
|
$descriptions{$seqid} = $desc;
|
|
}
|
|
|
|
close IN_DESC;
|
|
|
|
print "Sorting and writing output...\n";
|
|
|
|
# Now sort and find correct descriptions
|
|
foreach my $word (sort keys %words) {
|
|
my %categories;
|
|
|
|
# Find all definitions of the word
|
|
foreach my $id (keys %{$words{$word}}) {
|
|
my $catid = $words{$word}{$id};
|
|
my $description = $descriptions{$id};
|
|
|
|
if (!defined($description) or $description eq '') {
|
|
print "Error: Failed to link word: $word / ",
|
|
$words{$word}, "\n";
|
|
exit 1;
|
|
}
|
|
|
|
push @{$categories{$catid}}, $description;
|
|
}
|
|
|
|
my $finaldesc;
|
|
|
|
# 1 = noun
|
|
# 2 = verb
|
|
# 3 = adjective
|
|
# 4 = adverb
|
|
for my $catid (1 .. 4) {
|
|
my $n = 1;
|
|
my $catdesc;
|
|
|
|
next unless $categories{$catid};
|
|
foreach my $desc ( @{$categories{$catid}} ) {
|
|
$catdesc .= " " if $catdesc;
|
|
$catdesc .= "$n. $desc";
|
|
$n++;
|
|
}
|
|
|
|
next unless $catdesc;
|
|
$finaldesc .= "\t" if $finaldesc;
|
|
$finaldesc .= getcatname($catid) . ": $catdesc"
|
|
}
|
|
|
|
die "Internal error" unless $finaldesc;
|
|
|
|
print OUTPUT "$word\t$finaldesc\n";
|
|
}
|
|
|
|
close OUTPUT;
|
|
|
|
print "Done, output was successfully written!\n";
|
|
|
|
|