X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=po%2Fpocheck.pl;h=68dd83f23c4978d80f43d402d8d96b7c43f6fff0;hb=43dd383073fc61cfe5f24b87294c4e8698a1827a;hp=491935962ebe05e068d26ede34d49ffead5b539b;hpb=39fe2ed0c13be3d06d621209a93a873d0a121e87;p=lyx.git diff --git a/po/pocheck.pl b/po/pocheck.pl index 491935962e..68dd83f23c 100755 --- a/po/pocheck.pl +++ b/po/pocheck.pl @@ -7,114 +7,242 @@ # # author: Michael Gerz, michael.gerz@teststep.org # -# This script performs some consistency checks on po files: -# -# 1. Uniform translation of messages that are identical except -# for capitalization, shortcuts, and shortcut notation. -# 2. Usage of the following elements in both the original and -# the translated message (or no usage at all): -# shortcuts ("&" and "|..."), trailing space, trailing colon -# -# Invocation: -# pocheck.pl po_file po_file ... -foreach $pofilename ( @ARGV ) -{ - print "Processing po file '$pofilename'...\n"; +use strict; +use warnings; +use Getopt::Std; + +my $usage = <; + my @pofile = ; close( INPUT ); undef( %trans ); keys( %trans ) = 10000; - $noOfLines = $#pofile; + my $noOfLines = $#pofile; + + my $warn = 0; - $warn = 0; + my $i = 0; + my ($msgid, $msgstr, $more); - $i = 0; while ($i <= $noOfLines) { - if ( ( $msgid ) = ( $pofile[$i] =~ m/^msgid "(.*)"/ ) ) { + my $linenum = $i; + ( $msgid ) = ( $pofile[$i] =~ m/^msgid "(.*)"/ ); + $i++; + next unless $msgid; + if ($ignore_fuzzy) { + my $previous = $pofile[$i - 2]; + next if $previous =~ m/#,.*fuzzy/; + } + + # some msgid's are more than one line long, so add those. + while ( ( $more ) = $pofile[$i] =~ m/^"(.*)"/ ) { + $msgid = $msgid . $more; $i++; - while ( ( $more ) = $pofile[$i] =~ m/^"(.*)"/ ) { - $msgid = $msgid . $more; - $i++; - } - - until ( ( $msgstr ) = ( $pofile[$i] =~ m/^msgstr "(.*)"/ ) ) { $i++; }; + } + + # now look for the associated msgstr. + until ( ( $msgstr ) = ( $pofile[$i] =~ m/^msgstr "(.*)"/ ) ) { $i++; }; + $i++; + # again collect any extra lines. + while ( ( $i <= $noOfLines ) && + ( ( $more ) = $pofile[$i] =~ m/^"(.*)"/ ) ) { + $msgstr = $msgstr . $more; $i++; - while ( ( $i <= $noOfLines ) && - ( ( $more ) = $pofile[$i] =~ m/^"(.*)"/ ) ) { - $msgstr = $msgstr . $more; - $i++; - } - - if ( $msgid ne "" && $msgstr ne "" ) { - - # Check colon at the end of a message - if ( ( $msgid =~ m/: *(\|.*)?$/ ) != ( $msgstr =~ m/: *(\|.*)?$/ ) ) { - print( "Missing or unexpected colon:\n" ); - print( " '$msgid' => '$msgstr'\n" ); - $warn++; - } + } - # Check period at the end of a message; uncomment code if you are paranoid - #if ( ( $msgid =~ m/\. *(\|.*)?$/ ) != ( $msgstr =~ m/\. *(\|.*)?$/ ) ) { - # print( "Missing or unexpected period:\n" ); - # print( " '$msgid' => '$msgstr'\n" ); - # $warn++; - #} - - # Check space at the end of a message - if ( ( $msgid =~ m/ *?(\|.*)?$/ ) != ( $msgstr =~ m/ *?(\|.*)?$/ ) ) { - print( "Missing or unexpected space:\n" ); - print( " '$msgid' => '$msgstr'\n" ); + # nothing to do if one of them is empty. + # (surely that is always $msgstr?) + next if ($msgid eq "" or $msgstr eq ""); + + # discard [[...]] from the end of msgid, this is used only as hint to translation + $msgid =~ s/\[\[.*\]\]$//; + + # Check for matching %1$s, etc. + if ($check_args) { + my @argstrs = ( $msgid =~ m/%(\d)\$s/g ); + if (@argstrs) { + my $n = 0; + foreach my $arg (@argstrs) { $n = $arg if $arg > $n; } + if ($n <= 0) { + print "$pofilename, line $linenum: Problem finding arguments in:\n $msgid!\n" + unless $only_total; + ++$bad{"Missing arguments"}; $warn++; + } else { + foreach my $i (1..$n) { + my $arg = "%$i\\\$s"; + if ( $msgstr !~ m/$arg/ ) { + print "$pofilename, line $linenum: Missing argument `$arg'\n '$msgid' ==> '$msgstr'\n" + unless $only_total; + ++$bad{"Missing arguments"}; + $warn++; + } + } } + } + } - # Check for "&" shortcuts - if ( ( $msgid =~ m/&[^ ]/ ) != ( $msgstr =~ m/&[^ ]/ ) ) { - print( "Missing or unexpected Qt shortcut:\n" ); - print( " '$msgid' => '$msgstr'\n" ); - $warn++; - } + if ($check_colons) { + # Check colon at the end of a message + if ( ( $msgid =~ m/: *(\|.*)?$/ ) != ( $msgstr =~ m/: *(\|.*)?$/ ) ) { + print "Line $linenum: Missing or unexpected colon:\n '$msgid' => '$msgstr'\n" + unless $only_total; + ++$bad{"Bad colons"}; + $warn++; + } + } - # Check for "|..." shortcut(s) - if ( ( $msgid =~ m/\|[^ ]/ ) != ( $msgstr =~ m/\|[^ ]/ ) ) { - print( "Missing or unexpected xforms shortcut:\n" ); - print( " '$msgid' => '$msgstr'\n" ); - $warn++; - } + if ($check_periods) { + # Check period at the end of a message; uncomment code if you are paranoid + if ( ( $msgid =~ m/\. *(\|.*)?$/ ) != ( $msgstr =~ m/\. *(\|.*)?$/ ) ) { + print "Line $linenum: Missing or unexpected period:\n '$msgid' => '$msgstr'\n" + unless $only_total; + ++$bad{"Bad periods"}; + $warn++; + } + } - $msgid_clean = lc($msgid); - $msgstr_clean = lc($msgstr); + if ($check_spaces) { + # Check space at the end of a message + if ( ( $msgid =~ m/ *?(\|.*)?$/ ) != ( $msgstr =~ m/ *?(\|.*)?$/ ) ) { + print "Line $linenum: Missing or unexpected space:\n '$msgid' => '$msgstr'\n" + unless $only_total; + ++$bad{"Bad spaces"}; + $warn++; + } + } - $msgid_clean =~ s/(.*)\|.*?$/$1/; # strip xforms shortcuts - $msgstr_clean =~ s/(.*)\|.*?$/$1/; - $msgid_clean =~ s/&([^ ])/$1/; # strip Qt shortcuts - $msgstr_clean =~ s/&([^ ])/$1/; + if ($check_qt) { + # Check for "&" shortcuts + if ( ( $msgid =~ m/&[^ ]/ ) != ( $msgstr =~ m/&[^ ]/ ) ) { + print "Line $linenum: Missing or unexpected Qt shortcut:\n '$msgid' => '$msgstr'\n" + unless $only_total; + ++$bad{"Bad Qt shortcuts"}; + $warn++; + } + } - $trans{$msgid_clean}{$msgstr_clean} = [ $msgid, $msgstr ]; + if ($check_menu) { + # Check for "|..." shortcuts + if ( ( $msgid =~ m/\|[^ ]/ ) != ( $msgstr =~ m/\|[^ ]/ ) ) { + print "Line $linenum: Missing or unexpected menu shortcut:\n '$msgid' => '$msgstr'\n" + unless $only_total; + ++$bad{"Bad menu shortcuts"}; + $warn++; } - } else { - $i++; } + + next unless $check_trans; + + # we now collect these translations in a hash. + # this will allow us to check below if we have translated + # anything more than one way. + my $msgid_clean = lc($msgid); + my $msgstr_clean = lc($msgstr); + + $msgid_clean =~ s/(.*)\|.*?$/$1/; # strip menu shortcuts + $msgstr_clean =~ s/(.*)\|.*?$/$1/; + $msgid_clean =~ s/&([^ ])/$1/; # strip Qt shortcuts + $msgstr_clean =~ s/&([^ ])/$1/; + + # this is a hash of hashes. the keys of the outer hash are + # cleaned versions of ORIGINAL strings. the keys of the inner hash + # are the cleaned versions of their TRANSLATIONS. The value for the + # inner hash is an array of the orignal string and translation. + $trans{$msgid_clean}{$msgstr_clean} = [ $msgid, $msgstr, $linenum ]; } - foreach $msgid ( keys %trans ) { - $ref = $trans{$msgid}; - @msgstrkeys = keys %$ref; - - if ( $#msgstrkeys > 0 ) { - print( "Different translations for '$msgid':\n" ); - foreach $msgstr ( @msgstrkeys ) { - print( " '" . $trans{$msgid}{$msgstr}[0] . "' => '" . $trans{$msgid}{$msgstr}[1] . "'\n" ); + if ($check_trans) { + foreach $msgid ( keys %trans ) { + # so $ref is a reference to the inner hash. + my $ref = $trans{$msgid}; + # @msgstrkeys is an array of the keys of that inner hash. + my @msgstrkeys = keys %$ref; + + # do we have more than one such key? + if ( $#msgstrkeys > 0 ) { + if (!$only_total) { + print "Different translations for '$msgid':\n"; + foreach $msgstr ( @msgstrkeys ) { + print "Line $ref->{$msgstr}[2]: '" . + $ref->{$msgstr}[0] . "' => '" . + $ref->{$msgstr}[1] . "'\n"; + } + } + ++$bad{"Inconsistent translations"}; + $warn++; } - $warn++; } } - - print( "\nTotal number of warnings: $warn\n\n" ); + if (!$silent_mode) { + if ($warn) { + while (my ($k, $v) = each %bad) { print "$k: $v\n"; } + if (scalar(keys %bad) > 1) { + print "Total warnings: $warn\n"; + } + } else { + print "No warnings!\n"; + } + print "\n"; + } + $total_warn += $warn; } + +exit ($total_warn > 0); +