X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=po%2Fpocheck.pl;h=fcc5dc11dd2ab395654a51c3286a4dbb93feaa39;hb=623cc13b60bc7345ad8a45bb1a6ab95e16dba6b0;hp=1c826e17521a228072547dac7cadf51d559fcd92;hpb=c294f1f62393b0ece94f1e6cad0ac3273ade3018;p=lyx.git diff --git a/po/pocheck.pl b/po/pocheck.pl index 1c826e1752..fcc5dc11dd 100755 --- a/po/pocheck.pl +++ b/po/pocheck.pl @@ -1,4 +1,5 @@ #! /usr/bin/perl -w +# -*- mode: perl; -*- # file pocheck.pl # @@ -7,25 +8,67 @@ # # author: Michael Gerz, michael.gerz@teststep.org # -# This script performs some consistency checks on po files: -# -# 1. Uniform translation of messages that are identical except -# for capitalization, shortcuts, and shortcut notation. -# 2. Usage of the following elements in both the original and -# the translated message (or no usage at all): -# shortcuts ("&" and "|..."), trailing space, trailing colon -# -# Invocation: -# pocheck.pl po_file po_file ... use strict; use warnings; +use Getopt::Std; +use Encode qw(encode decode); + +sub mylc($); +sub replaceSynopsis($); + +my $usage = < $n; } - if ($n <= 0) { - print "Problem finding arguments in:\n $msgid!\n"; - $warn++; - } else { - foreach my $i (1..$n) { - my $arg = "%$i\\\$s"; - if ( $msgstr !~ m/$arg/ ) { - print "Missing argument `$arg'\n '$msgid' ==> '$msgstr'\n"; - $warn++; + if ($check_args) { + my @argstrs = ( $msgid =~ m/%(\d)\$s/g ); + if (@argstrs) { + my $n = 0; + foreach my $arg (@argstrs) { $n = $arg if $arg > $n; } + if ($n <= 0) { + print "$pofilename, line $linenum: Problem finding arguments in:\n $msgid!\n" + unless $only_total; + ++$bad{"Missing arguments"}; + $warn++; + } else { + foreach my $i (1..$n) { + my $arg = "%$i\\\$s"; + if ( $msgstr !~ m/$arg/ ) { + print "$pofilename, line $linenum: Missing argument `$arg'\n '$msgid' ==> '$msgstr'\n" + unless $only_total; + ++$bad{"Missing arguments"}; + $warn++; + } } } } } - # Check colon at the end of a message - if ( ( $msgid =~ m/: *(\|.*)?$/ ) != ( $msgstr =~ m/: *(\|.*)?$/ ) ) { - print( "Missing or unexpected colon:\n" ); - print( " '$msgid' => '$msgstr'\n" ); - $warn++; + if ($check_colons) { + # Check colon at the end of a message + if ( ( $msgid =~ m/: *(\|.*)?$/ ) != ( $msgstr =~ m/: *(\|.*)?$/ ) ) { + print "Line $linenum: Missing or unexpected colon:\n '$msgid' => '$msgstr'\n" + unless $only_total; + ++$bad{"Bad colons"}; + $warn++; + } } - # Check period at the end of a message; uncomment code if you are paranoid - #if ( ( $msgid =~ m/\. *(\|.*)?$/ ) != ( $msgstr =~ m/\. *(\|.*)?$/ ) ) { - # print( "Missing or unexpected period:\n" ); - # print( " '$msgid' => '$msgstr'\n" ); - # $warn++; - #} - - # Check space at the end of a message - if ( ( $msgid =~ m/ *?(\|.*)?$/ ) != ( $msgstr =~ m/ *?(\|.*)?$/ ) ) { - print( "Missing or unexpected space:\n" ); - print( " '$msgid' => '$msgstr'\n" ); - $warn++; + if ($check_periods) { + # Check period at the end of a message; uncomment code if you are paranoid + # Convert '...' to '…' first + $msgid = replaceSynopsis($msgid); + $msgstr = replaceSynopsis($msgstr); + if ( ( $msgid =~ m/\. *(\|.*)?$/ ) != ( $msgstr =~ m/\. *(\|.*)?$/ ) ) { + print "Line $linenum: Missing or unexpected period:\n '$msgid' => '$msgstr'\n" + unless $only_total; + ++$bad{"Bad periods"}; + $warn++; + } } - # Check for "&" shortcuts - if ( ( $msgid =~ m/&[^ ]/ ) != ( $msgstr =~ m/&[^ ]/ ) ) { - print( "Missing or unexpected Qt shortcut:\n" ); - print( " '$msgid' => '$msgstr'\n" ); - $warn++; + if ($check_spaces) { + # Check space at the end of a message (if not a shortcut) + my ($msgid1, $msgstr1) = ($msgid, $msgstr); + $msgid1 =~ s/\|.$//; + if ($msgstr =~ /^(.*)\|(.+)$/) { + my ($msg, $shortcut) = ($1, $2); + # Check for unicode char + my $u = decode('utf-8', $shortcut); + if (length($u) == 1) { + $msgstr1 = $msg; + } + } + if (($msgid1 =~ / $/) != ($msgstr1 =~ / $/)) { + print "Line $linenum: Missing or unexpected space:\n '$msgid' => '$msgstr'\n" + unless $only_total; + ++$bad{"Bad spaces"}; + $warn++; + } } - # Check for "|..." shortcuts - if ( ( $msgid =~ m/\|[^ ]/ ) != ( $msgstr =~ m/\|[^ ]/ ) ) { - print( "Missing or unexpected menu shortcut:\n" ); - print( " '$msgid' => '$msgstr'\n" ); - $warn++; + if ($check_qt) { + # Check for "&" shortcuts + if ( ( $msgid =~ m/&[^ &]/ ) != ( $msgstr =~ m/&[^ &]/ ) ) { + print "Line $linenum: Missing or unexpected Qt shortcut:\n '$msgid' => '$msgstr'\n" + unless $only_total; + ++$bad{"Bad Qt shortcuts"}; + $warn++; + } } + + if ($check_menu) { + # Check for "|..." shortcuts (space shortcut allowed) + # Shortcut is either 1 char (ascii in msgid) or utf8 char (in msgstr) + my ($s1, $s2) = (0,0); + $s1 = 1 if ($msgid =~ /\|(.)$/); + if ($msgstr =~ /.*\|(.+)$/) { + my $chars = $1; + my $u = decode('utf-8', $chars); + $s2 = 1 if (length($u) == 1); + } + if($s1 != $s2) { + print "Line $linenum: Missing or unexpected menu shortcut:\n '$msgid' => '$msgstr'\n" + unless $only_total; + ++$bad{"Bad menu shortcuts"}; + $warn++; + } + } + + next unless $check_trans; # we now collect these translations in a hash. # this will allow us to check below if we have translated # anything more than one way. - my $msgid_clean = lc($msgid); - my $msgstr_clean = lc($msgstr); + my $msgid_clean = lc($msgid_trans); + my $msgstr_clean = mylc($msgstr); $msgid_clean =~ s/(.*)\|.*?$/$1/; # strip menu shortcuts $msgstr_clean =~ s/(.*)\|.*?$/$1/; @@ -136,24 +232,60 @@ foreach my $pofilename ( @ARGV ) # cleaned versions of ORIGINAL strings. the keys of the inner hash # are the cleaned versions of their TRANSLATIONS. The value for the # inner hash is an array of the orignal string and translation. - $trans{$msgid_clean}{$msgstr_clean} = [ $msgid, $msgstr ]; + $trans{$msgid_clean}{$msgstr_clean} = [ $msgid_trans, $msgstr, $linenum ]; } - foreach $msgid ( keys %trans ) { - # so $ref is a reference to the inner hash. - my $ref = $trans{$msgid}; - # @msgstrkeys is an array of the keys of that inner hash. - my @msgstrkeys = keys %$ref; - - # do we have more than one such key? - if ( $#msgstrkeys > 0 ) { - print( "Different translations for '$msgid':\n" ); - foreach $msgstr ( @msgstrkeys ) { - print( " '" . $trans{$msgid}{$msgstr}[0] . "' => '" . $trans{$msgid}{$msgstr}[1] . "'\n" ); + if ($check_trans) { + foreach $msgid ( keys %trans ) { + # so $ref is a reference to the inner hash. + my $ref = $trans{$msgid}; + # @msgstrkeys is an array of the keys of that inner hash. + my @msgstrkeys = keys %$ref; + + # do we have more than one such key? + if ( $#msgstrkeys > 0 ) { + if (!$only_total) { + print "Different translations for '$msgid':\n"; + foreach $msgstr ( @msgstrkeys ) { + print "Line $ref->{$msgstr}[2]: '" . + $ref->{$msgstr}[0] . "' => '" . + $ref->{$msgstr}[1] . "'\n"; + } + } + ++$bad{"Inconsistent translations"}; + $warn++; + } + } + } + if (!$silent_mode) { + if ($warn) { + while (my ($k, $v) = each %bad) { print "$k: $v\n"; } + if (scalar(keys %bad) > 1) { + print "Total warnings: $warn\n"; } - $warn++; + } else { + print "No warnings!\n"; } + print "\n"; } + $total_warn += $warn; +} + +exit ($total_warn > 0); + +# Use lowercase also for non-ascii chars +sub mylc($) +{ + my ($msg) = @_; + return(encode('utf-8',lc(decode('utf-8', $msg)))); +} + +sub replaceSynopsis($) +{ + my ($string) = @_; - print( "\nTotal number of warnings: $warn\n\n" ); + return ($string) if ($string !~ /^(.*)\.\.\.(.*)$/); + my ($before, $after) = ($1, $2); + return $string if (($before =~ /\.$/) || ($after =~ /^\./)); + return("$before…$after"); }