4 # file getTranslators.pl
5 # This file is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public
7 # License as published by the Free Software Foundation; either
8 # version 2 of the License, or (at your option) any later version.
10 # This software is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public
16 # License along with this software; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 # Copyright (c) 2015 Kornel Benko <kornel@lyx.org>
23 package IdentityParse;
24 use base "HTML::Parser";
26 use POSIX qw(locale_h);
29 setlocale(LC_CTYPE, "");
30 setlocale(LC_MESSAGES, "en_US.UTF-8");
31 $ENV{LC_ALL} = "C"; # set language for the output of command 'msgfmt'
34 sub insertEntry(\%$$);
45 my $p = new IdentityParse;
48 my $lyxurl = "http://www.lyx.org/I18n-trunk";
49 my $podir = "./po"; # script starts in top binary dir
50 my $jsfile = "$podir/lyxtranslators.js";
52 my %langs = ( # translation to make language spec unique
54 "pt_BR" => "Portuguese (Brazilian)",
70 "ia" => "Interlingua",
76 "nb" => "Norwegian (Bokmål)",
78 "nn" => "Norwegian (Nynorsk)",
86 "zh_CN" => "Chinese (simplified)",
87 "zh_TW" => "Chinese (traditional)",
88 "Simplified" => "Chinese (simplified)",
89 "Simplified Chinese" => "Chinese (simplified)",
90 "Traditional Chinese" => "Chinese (traditional)",
93 open(FO, '>', "$jsfile");
94 print FO &phantomscript();
98 my @tag = (); # stack for active html-tags
99 my %page_row = (); # entries for mail, name pofile, language (in the actual row or po-file)
100 my @translation_entries = qw(language mail name pofile);
102 my %list = (); # collected list of rows, entry key is language
105 if (open(my $fh, "phantomjs $jsfile|")) {
107 print "Parsed \"$lyxurl\"\n";
110 $errors++; # cannot parse html file
111 print "ERROR: Program \"phantomjs\" to parse \"$lyxurl\" could not be executed\n";
114 if (opendir(DI, $podir)) {
116 while (my $po = readdir(DI)) {
117 if ($po =~ /\.po$/) {
118 my $res = &scanPoFile("$po");
120 print "No valid entry found in \"$po\"\n";
130 print "ERROR: No correct po-files in directory \"$podir\" found\n";
134 print "Found $po_count po-files with valid translator entry\n";
138 print "Directory for po-files ($podir) missing\n";
139 $errors++; # PO directory not found, so cannot check
142 for my $lang (sort keys %list) {
143 for my $rentry (@{$list{$lang}}) {
144 my $prefix = sprintf("(%03d%) ", "$rentry->{fract}");
145 if (defined($rentry->{error})) {
147 $prefix .= sprintf("%-24s", "$rentry->{error}:");
150 $prefix .= sprintf("%24s", "");
152 my $msg = sprintf("%-24s%-10s", "$lang:", "$rentry->{po},");
153 print "$prefix$msg mail = \"$rentry->{name}\" <$rentry->{mail}>\n";
163 ###################################################################
164 # Insert collected row values in %list
165 sub insertEntry(\%$$)
167 my ($rrow, $cycle, $fract) = @_;
169 # Convert mangled mail
170 $rrow->{mail} =~ s/ pound /\@/;
171 $rrow->{mail} =~ s/ dot /\./g;
172 $rrow->{mail} =~ s/ underscore /_/g;
174 my $language = $rrow->{language};
175 $entry{mail} = $rrow->{mail};
176 $entry{name} = $rrow->{name};
177 $entry{po} = $rrow->{pofile};
178 $entry{cycle} = $cycle;
179 # Check, if entry already exists
180 if (! defined($list{$language})) {
181 $list{$language} = [];
186 for my $rentry (@{$list{$language}}) {
189 if (! defined($rentry->{fract})) {
190 $rentry->{fract} = $fract;
193 next if (fc($rentry->{mail}) ne fc($rrow->{mail})); # char case does not matter in mail strings
194 next if ($rentry->{po} ne $rrow->{pofile});
195 $name1 = $rentry->{name};
200 push(@{$list{$language}}, \%entry);
203 $entry{fract} = $fract;
206 $entry{error} = "Missing in page";
207 push(@{$list{$language}}, \%entry);
211 $entry{error} = "Different mail in po";
212 push(@{$list{$language}}, \%entry);
215 # found, but maybe incorrect name?
216 if ($name1 ne $rrow->{name}) {
217 $entry{error} = "Different name in po";
218 push(@{$list{$language}}, \%entry);
224 #######################################################################
225 # Routines called from parse_file(): start(), text(), end().
228 my ($self, $tag, $attr, $attrseq, $origtext) = @_;
231 if ($tag eq "tr") { # new table row
232 &row_init(\%page_row);
233 for my $k (keys %status) {
237 $status{"Tag_" . $tag} = $status{"Tag_" . $tag} + 1;
240 if (defined($attr->{class})) {
241 if ($attr->{class} eq "urllink") {
242 if ($status{Tag_td} == 6) {
243 if ($attr->{href} =~ /^mailto:(.*)$/) {
244 $page_row{mail} = $1;
245 $page_row{mail} =~ s/\%20/ /g;
248 elsif ($status{Tag_td} == 1) {
249 if ($attr->{href} =~ /f=po\/([a-z][a-z](_[A-Z][A-Z])?\.po)$/) {
250 $page_row{pofile} = $1;
260 my ($self, $text) = @_;
262 if ($status{Tag_td} == 1) {
263 if (&actual_tag() eq "a") {
264 if ($text =~ /^[A-Z][a-z]+( .+)?$/) {
265 $page_row{language} = &convertlang($text);
269 if ($status{Tag_td} == 6) {
270 if (&actual_tag() eq "a") {
271 $page_row{name} .= $text; # '.=' because text can be splitted
273 elsif (&actual_tag() eq "td") { # name without associated e-mail
274 $page_row{name} .= $text;
281 my ($self, $tag, $origtext) = @_;
283 while (my $t = pop(@tag)) {
284 last if ($t eq $tag);
287 # check row entry for completeness
288 return if (! &row_valid(\%page_row));
289 &insertEntry(\%page_row, 1);
295 return undef if (@tag == 0);
303 if (defined($langs{$ilang})) {
304 return($langs{$ilang});
315 my ($translated, $fuzzy, $untranslated) = (0, 0, 0);
317 if (open(FM, "msgfmt -c --statistics $podir/$pofile 2>&1 |")) {
318 while (my $l = <FM>) {
319 if ($l =~ s/^(\d+)\s+translated messages.\s*//) {
322 if ($l =~ s/^(\d+)\s+fuzzy translations.\s*//) {
325 if ($l =~ s/^(\d+)\s+untranslated messages//) {
331 return 0 if ($translated == 0);
332 my $fract = int(($translated * 100)/($translated+$fuzzy+$untranslated));
333 if (open(FI, "$podir/$pofile")) {
335 $po_row{pofile} = $pofile;
337 while (my $l = <FI>) {
338 last if ($l =~ /^"[A-Z].*:/);
341 while (my $l = <FI>) {
342 last if ($l !~ /^"/);
344 if ($l =~ s/^"Last-Translator:\s//) {
345 while ($l !~ />\\n"$/) {
347 my $extraline = <FI>;
349 $extraline =~ s/^"//;
352 if ($l =~ /^([^<]*)<([^>]*)>/) { # allow empty mail
354 ($po_row{name} = $1) =~ s/\s+$//;
358 elsif ($l =~/^"Language:\s*([^\\]+)\\n/) {
359 $po_row{language} = &convertlang($1);
362 if (&row_valid(\%po_row)) {
363 &insertEntry(\%po_row, 2, $fract);
376 ###########################################################
377 # handling of row entries
383 $rrow->{mail} = ""; # Allow for empty mail
389 for my $k (@translation_entries) {
390 return 0 if (! defined($rrow->{$k}));
395 # used by phantomjs command to output the refered html page
398 return "var page = require(\"webpage\").create();
399 var url = \"$lyxurl\";
402 var f = function () {
403 var html = page.evaluate(function () { return document.documentElement.innerHTML });