From 0a4fbe4836f6a34cb4b44e25b8b3ef097bb65126 Mon Sep 17 00:00:00 2001 From: Kornel Benko Date: Sun, 25 Oct 2015 11:10:14 +0100 Subject: [PATCH] Cmake tests: Check for consistency between po-files and lyx-trunk translations page. --- development/autotests/ExportTests.cmake | 5 +- development/checkurls/getTranslators.pl | 411 ++++++++++++++++++++++++ 2 files changed, 414 insertions(+), 2 deletions(-) create mode 100755 development/checkurls/getTranslators.pl diff --git a/development/autotests/ExportTests.cmake b/development/autotests/ExportTests.cmake index 50b4aa42e3..30a9def4ba 100644 --- a/development/autotests/ExportTests.cmake +++ b/development/autotests/ExportTests.cmake @@ -138,8 +138,9 @@ endmacro() loadTestList(revertedTests revertedTests) loadTestList(ignoredTests ignoredTests) -foreach(libsubfolder doc examples templates) - set(LIBSUB_SRC_DIR "${TOP_SRC_DIR}/lib/${libsubfolder}") +foreach(libsubfolderx lib/doc lib/examples lib/templates development/mathmacros) + set(LIBSUB_SRC_DIR "${TOP_SRC_DIR}/${libsubfolderx}") + string(REGEX REPLACE "^(lib|development)/" "" libsubfolder "${libsubfolderx}") file(GLOB_RECURSE lyx_files RELATIVE "${LIBSUB_SRC_DIR}" "${LIBSUB_SRC_DIR}/*.lyx") list(SORT lyx_files) # Now create 2 lists. One for files in a language dir, one without diff --git a/development/checkurls/getTranslators.pl b/development/checkurls/getTranslators.pl new file mode 100755 index 0000000000..ac60a6123b --- /dev/null +++ b/development/checkurls/getTranslators.pl @@ -0,0 +1,411 @@ +#! /usr/bin/env perl +# -*- mode: perl; -*- + +# file getTranslators.pl +# This file is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This software is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this software; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# Copyright (c) 2015 Kornel Benko + +use strict; + +package IdentityParse; +use base "HTML::Parser"; +use locale; +use POSIX qw(locale_h); +use feature 'fc'; + +setlocale(LC_CTYPE, ""); +setlocale(LC_MESSAGES, "en_US.UTF-8"); +$ENV{LC_ALL} = "C"; # set language for the output of command 'msgfmt' + +# Prototypes +sub insertEntry(\%$$); +sub start($$$$$); +sub text($$); +sub end($$$); +sub actual_tag(); +sub convertlang($); +sub scanPoFile($); +sub row_init(\%); +sub row_valid(\%); +sub phantomscript(); + +my $p = new IdentityParse; +$p->strict_names(0); + +my $lyxurl = "http://www.lyx.org/I18n-trunk"; +my $podir = "./po"; # script starts in top binary dir +my $jsfile = "$podir/lyxtranslators.js"; + +my %langs = ( # translation to make language spec unique + "pt" => "Portuguese", + "pt_BR" => "Portuguese (Brazilian)", + "ar" => "Arabic", + "bg" => "Bulgarian", + "ca" => "Catalan", + "cs" => "Czech", + "da" => "Danish", + "de" => "German", + "el" => "Greek", + "en" => "English", + "es" => "Spanish", + "eu" => "Basque", + "fi" => "Finnish", + "fr" => "French", + "gl" => "Galician", + "he" => "Hebrew", + "hu" => "Hungarian", + "ia" => "Interlingua", + "id" => "Indonesian", + "it" => "Italian", + "ja" => "Japanese", + "ko" => "Korean", + "sk" => "Slovak", + "nb" => "Norwegian (Bokmål)", + "nl" => "Dutch", + "nn" => "Norwegian (Nynorsk)", + "pl" => "Polish", + "ro" => "Romanian", + "ru" => "Russian", + "sr" => "Serbian", + "sv" => "Swedish", + "tr" => "Turkish", + "uk" => "Ukrainian", + "zh_CN" => "Chinese (simplified)", + "zh_TW" => "Chinese (traditional)", + "Simplified" => "Chinese (simplified)", + "Simplified Chinese" => "Chinese (simplified)", + "Traditional Chinese" => "Chinese (traditional)", + ); + +open(FO, '>', "$jsfile"); +print FO &phantomscript(); +close(FO); + +my %status = (); +my @tag = (); # stack for active html-tags +my %page_row = (); # entries for mail, name pofile, language (in the actual row or po-file) +my @translation_entries = qw(language mail name pofile); + +my %list = (); # collected list of rows, entry key is language + +my $errors = 0; +if (open(my $fh, "phantomjs $jsfile|")) { + $p->parse_file($fh); + print "Parsed \"$lyxurl\"\n"; +} +else { + $errors++; # cannot parse html file + print "ERROR: Program \"phantomjs\" to parse \"$lyxurl\" could not be executed\n"; +} + +if (opendir(DI, $podir)) { + my $po_count = 0; + while (my $po = readdir(DI)) { + if ($po =~ /\.po$/) { + my $res = &scanPoFile("$po"); + if ($res == 0) { + print "No valid entry found in \"$po\"\n"; + $errors++; + } + else { + $po_count += $res; + } + } + } + closedir(DI); + if ($po_count < 1) { + print "ERROR: No correct po-files in directory \"$podir\" found\n"; + $errors++; + } + else { + print "Found $po_count po-files with valid translator entry\n"; + } +} +else { + print "Directory for po-files ($podir) missing\n"; + $errors++; # PO directory not found, so cannot check +} + +for my $lang (sort keys %list) { + for my $rentry (@{$list{$lang}}) { + my $prefix = sprintf("(%02d%) ", "$rentry->{fract}"); + if (defined($rentry->{error})) { + $errors++; + $prefix .= sprintf("%-24s", "$rentry->{error}:"); + } + else { + $prefix .= sprintf("%24s", ""); + } + my $msg = sprintf("%-24s%-10s", "$lang:", "$rentry->{po},"); + print "$prefix$msg mail = \"$rentry->{name}\" <$rentry->{mail}>\n"; + } +} + +if ($errors > 0) { + exit(1); +} +else { + exit(0); +} +################################################################### +# Insert collected row values in %list +sub insertEntry(\%$$) +{ + my ($rrow, $cycle, $fract) = @_; + + # Convert mangled mail + $rrow->{mail} =~ s/ pound /\@/; + $rrow->{mail} =~ s/ dot /\./g; + $rrow->{mail} =~ s/ underscore /_/g; + my %entry = (); + my $language = $rrow->{language}; + $entry{mail} = $rrow->{mail}; + $entry{name} = $rrow->{name}; + $entry{po} = $rrow->{pofile}; + $entry{cycle} = $cycle; + # Check, if entry already exists + if (! defined($list{$language})) { + $list{$language} = []; + } + my $found = 0; + my $empty = 1; + my $name1 = undef; + for my $rentry (@{$list{$language}}) { + $empty = 0; + if ($cycle == 2) { + if (! defined($rentry->{fract})) { + $rentry->{fract} = $fract; + } + } + next if (fc($rentry->{mail}) ne fc($rrow->{mail})); # char case does not matter in mail strings + next if ($rentry->{po} ne $rrow->{pofile}); + $name1 = $rentry->{name}; + $found = 1; + last; + } + if ($cycle == 1) { + push(@{$list{$language}}, \%entry); + } + else { + $entry{fract} = $fract; + if ($empty) { + if ($fract > 40) { + $entry{error} = "Missing in page"; + push(@{$list{$language}}, \%entry); + } + } + elsif (! $found) { + $entry{error} = "Different mail in po"; + push(@{$list{$language}}, \%entry); + } + else { + # found, but maybe incorrect name? + if ($name1 ne $rrow->{name}) { + $entry{error} = "Different name in po"; + push(@{$list{$language}}, \%entry); + } + } + } +} + +####################################################################### +# Routines called from parse_file(): start(), text(), end(). +sub start($$$$$) +{ + my ($self, $tag, $attr, $attrseq, $origtext) = @_; + + push(@tag, $tag); + if ($tag eq "tr") { # new table row + &row_init(\%page_row); + for my $k (keys %status) { + $status{$k} = 0; + } + } + $status{"Tag_" . $tag} = $status{"Tag_" . $tag} + 1; + + if ($tag eq "a") { + if (defined($attr->{class})) { + if ($attr->{class} eq "urllink") { + if ($status{Tag_td} == 6) { + if ($attr->{href} =~ /^mailto:(.*)$/) { + $page_row{mail} = $1; + $page_row{mail} =~ s/\%20/ /g; + } + } + elsif ($status{Tag_td} == 1) { + if ($attr->{href} =~ /f=po\/([a-z][a-z](_[A-Z][A-Z])?\.po)$/) { + $page_row{pofile} = $1; + } + } + } + } + } +} + +sub text($$) +{ + my ($self, $text) = @_; + + if ($status{Tag_td} == 1) { + if (&actual_tag() eq "a") { + if ($text =~ /^[A-Z][a-z]+( .+)?$/) { + $page_row{language} = &convertlang($text); + } + } + } + if ($status{Tag_td} == 6) { + if (&actual_tag() eq "a") { + $page_row{name} = $text; + } + elsif (&actual_tag() eq "td") { # name without associated e-mail + $page_row{name} = $text; + } + } +} + +sub end($$$) +{ + my ($self, $tag, $origtext) = @_; + + while (my $t = pop(@tag)) { + last if ($t eq $tag); + } + if ($tag eq "tr") { + # check row entry for completeness + return if (! &row_valid(\%page_row)); + &insertEntry(\%page_row, 1); + } +} + +sub actual_tag() +{ + return undef if (@tag == 0); + return($tag[$#tag]); +} + +sub convertlang($) +{ + my ($ilang) = @_; + $ilang =~ s/\s+$//; + if (defined($langs{$ilang})) { + return($langs{$ilang}); + } + else { + return($ilang); + } +} + +sub scanPoFile($) +{ + my ($pofile) = @_; + my %po_row; + my ($translated, $fuzzy, $untranslated) = (0, 0, 0); + + if (open(FM, "msgfmt -c --statistics $podir/$pofile 2>&1 |")) { + while (my $l = ) { + if ($l =~ s/^(\d+)\s+translated messages.\s*//) { + $translated = $1; + } + if ($l =~ s/^(\d+)\s+fuzzy translations.\s*//) { + $fuzzy = $1; + } + if ($l =~ s/^(\d+)\s+untranslated messages//) { + $untranslated = $1; + } + } + close(FM); + } + return 0 if ($translated == 0); + my $fract = int(($translated * 100)/($translated+$fuzzy+$untranslated)); + if (open(FI, "$podir/$pofile")) { + &row_init(\%po_row); + $po_row{pofile} = $pofile; + my $i = 0; + while (my $l = ) { + last if ($l =~ /^"[A-Z].*:/); + } + my $inserted = 0; + while (my $l = ) { + last if ($l !~ /^"/); + chomp($l); + if ($l =~ s/^"Last-Translator:\s//) { + while ($l !~ />\\n"$/) { + $l =~ s/"$//; + my $extraline = ; + chomp($extraline); + $extraline =~ s/^"//; + $l .= $extraline + } + if ($l =~ /^([^<]*)<([^>]*)>/) { # allow empty mail + $po_row{mail} = $2; + ($po_row{name} = $1) =~ s/\s+$//; + $i += 2; + } + } + elsif ($l =~/^"Language:\s*([^\\]+)\\n/) { + $po_row{language} = &convertlang($1); + $i += 1; + } + if (&row_valid(\%po_row)) { + &insertEntry(\%po_row, 2, $fract); + $inserted++; + last; + } + } + close(FI); + return($inserted); + } + else { + return(0); + } +} + +########################################################### +# handling of row entries + +sub row_init(\%) +{ + my ($rrow) = @_; + %{$rrow} = (); + $rrow->{mail} = ""; # Allow for empty mail +} + +sub row_valid(\%) +{ + my ($rrow) = @_; + for my $k (@translation_entries) { + return 0 if (! defined($rrow->{$k})); + } + return(1); +} + +# used by phantomjs command to output the refered html page +sub phantomscript() +{ + return "var page = require(\"webpage\").create(); +var url = \"$lyxurl\"; +page.open(url, + function (status) { + var f = function () { + var html = page.evaluate(function () { return document.documentElement.innerHTML }); + console.log(html); + phantom.exit(); + }; + setTimeout(f, 5000); + } +); +"; +} -- 2.39.2