6 # This file is part of LyX, the document processor.
7 # Licence details can be found in the file COPYING.
9 # author: Michael Gerz, michael.gerz@teststep.org
15 use Encode qw(encode decode);
18 sub replaceSynopsis($);
21 pocheck.pl [-acmpqst] po_file [po_file] ...
23 This script performs some consistency checks on po files.
25 We check for everything listed here, unless one or more of these
26 options is given, in which case we checks only for those requested.
27 -a: Check arguments, like %1\$s
28 -c: Check for colons at end
29 -m: Check for menu shortcuts
30 -p: Check for period at end
31 -q: Check Qt shortcuts
32 -s: Check for space at end
33 -t: Check for uniform translation
34 These options can be given with or without other options.
35 -f: Ignore fuzzy translations
36 -w: Only report summary total of errors
37 -i: Silent mode, report only errors
41 getopts(":hacfmpqstwi", \%options);
43 if (defined($options{h})) {
48 my $only_total = defined($options{w});
49 delete $options{w} if $only_total;
50 my $ignore_fuzzy = defined($options{f});
51 delete $options{f} if $ignore_fuzzy;
52 my $silent_mode = defined($options{i});
53 delete $options{i} if $silent_mode;
55 my $check_args = (!%options or defined($options{a}));
56 my $check_colons = (!%options or defined($options{c}));
57 my $check_spaces = (!%options or defined($options{s}));
58 my $check_periods = (!%options or defined($options{p}));
59 my $check_qt = (!%options or defined($options{q}));
60 my $check_menu = (!%options or defined($options{m}));
61 my $check_trans = (!%options or defined($options{t}));
67 foreach my $pofilename ( @ARGV ) {
70 print "Processing po file '$pofilename'...\n";
73 open( INPUT, "<$pofilename" )
74 || die "Cannot read po file '$pofilename'";
79 keys( %trans ) = 10000;
81 my $noOfLines = $#pofile;
86 my ($msgid, $msgid_trans, $msgstr, $more);
88 while ($i <= $noOfLines) {
90 ( $msgid ) = ( $pofile[$i] =~ m/^msgid "(.*)"/ );
94 my $previous = $pofile[$i - 2];
95 next if $previous =~ m/#,.*fuzzy/;
98 # some msgid's are more than one line long, so add those.
99 while ( ( $more ) = $pofile[$i] =~ m/^"(.*)"/ ) {
100 $msgid = $msgid . $more;
104 # now look for the associated msgstr.
105 until ( ( $msgstr ) = ( $pofile[$i] =~ m/^msgstr "(.*)"/ ) ) { $i++; };
107 # again collect any extra lines.
108 while ( ( $i <= $noOfLines ) &&
109 ( ( $more ) = $pofile[$i] =~ m/^"(.*)"/ ) ) {
110 $msgstr = $msgstr . $more;
114 # nothing to do if one of them is empty.
115 # (surely that is always $msgstr?)
116 next if ($msgid eq "" or $msgstr eq "");
118 # discard [[...]] from the end of msgid, this is used only as hint to translation
119 $msgid_trans = $msgid; # used for uniform translation
120 $msgid =~ s/\[\[.*\]\]$//;
122 # Check for matching %1$s, etc.
124 my @argstrs = ( $msgid =~ m/%(\d)\$s/g );
127 foreach my $arg (@argstrs) { $n = $arg if $arg > $n; }
129 print "$pofilename, line $linenum: Problem finding arguments in:\n $msgid!\n"
131 ++$bad{"Missing arguments"};
134 foreach my $i (1..$n) {
135 my $arg = "%$i\\\$s";
136 if ( $msgstr !~ m/$arg/ ) {
137 print "$pofilename, line $linenum: Missing argument `$arg'\n '$msgid' ==> '$msgstr'\n"
139 ++$bad{"Missing arguments"};
148 # Check colon at the end of a message
149 if ( ( $msgid =~ m/: *(\|.*)?$/ ) != ( $msgstr =~ m/: *(\|.*)?$/ ) ) {
150 print "Line $linenum: Missing or unexpected colon:\n '$msgid' => '$msgstr'\n"
152 ++$bad{"Bad colons"};
157 if ($check_periods) {
158 # Check period at the end of a message; uncomment code if you are paranoid
159 # Convert '...' to '…' first
160 $msgid = replaceSynopsis($msgid);
161 $msgstr = replaceSynopsis($msgstr);
162 if ( ( $msgid =~ m/\. *(\|.*)?$/ ) != ( $msgstr =~ m/\. *(\|.*)?$/ ) ) {
163 print "Line $linenum: Missing or unexpected period:\n '$msgid' => '$msgstr'\n"
165 ++$bad{"Bad periods"};
171 # Check space at the end of a message (if not a shortcut)
172 my ($msgid1, $msgstr1) = ($msgid, $msgstr);
174 if ($msgstr =~ /^(.*)\|(.+)$/) {
175 my ($msg, $shortcut) = ($1, $2);
176 # Check for unicode char
177 my $u = decode('utf-8', $shortcut);
178 if (length($u) == 1) {
182 if (($msgid1 =~ / $/) != ($msgstr1 =~ / $/)) {
183 print "Line $linenum: Missing or unexpected space:\n '$msgid' => '$msgstr'\n"
185 ++$bad{"Bad spaces"};
191 # Check for "&" shortcuts
192 if ( ( $msgid =~ m/&[^ &]/ ) != ( $msgstr =~ m/&[^ &]/ ) ) {
193 print "Line $linenum: Missing or unexpected Qt shortcut:\n '$msgid' => '$msgstr'\n"
195 ++$bad{"Bad Qt shortcuts"};
201 # Check for "|..." shortcuts (space shortcut allowed)
202 # Shortcut is either 1 char (ascii in msgid) or utf8 char (in msgstr)
203 my ($s1, $s2) = (0,0);
204 $s1 = 1 if ($msgid =~ /\|(.)$/);
205 if ($msgstr =~ /.*\|(.+)$/) {
207 my $u = decode('utf-8', $chars);
208 $s2 = 1 if (length($u) == 1);
211 print "Line $linenum: Missing or unexpected menu shortcut:\n '$msgid' => '$msgstr'\n"
213 ++$bad{"Bad menu shortcuts"};
218 next unless $check_trans;
220 # we now collect these translations in a hash.
221 # this will allow us to check below if we have translated
222 # anything more than one way.
223 my $msgid_clean = lc($msgid_trans);
224 my $msgstr_clean = mylc($msgstr);
226 $msgid_clean =~ s/(.*)\|.*?$/$1/; # strip menu shortcuts
227 $msgstr_clean =~ s/(.*)\|.*?$/$1/;
228 $msgid_clean =~ s/&([^ ])/$1/; # strip Qt shortcuts
229 $msgstr_clean =~ s/&([^ ])/$1/;
231 # this is a hash of hashes. the keys of the outer hash are
232 # cleaned versions of ORIGINAL strings. the keys of the inner hash
233 # are the cleaned versions of their TRANSLATIONS. The value for the
234 # inner hash is an array of the orignal string and translation.
235 $trans{$msgid_clean}{$msgstr_clean} = [ $msgid_trans, $msgstr, $linenum ];
239 foreach $msgid ( keys %trans ) {
240 # so $ref is a reference to the inner hash.
241 my $ref = $trans{$msgid};
242 # @msgstrkeys is an array of the keys of that inner hash.
243 my @msgstrkeys = keys %$ref;
245 # do we have more than one such key?
246 if ( $#msgstrkeys > 0 ) {
248 print "Different translations for '$msgid':\n";
249 foreach $msgstr ( @msgstrkeys ) {
250 print "Line $ref->{$msgstr}[2]: '" .
251 $ref->{$msgstr}[0] . "' => '" .
252 $ref->{$msgstr}[1] . "'\n";
255 ++$bad{"Inconsistent translations"};
262 while (my ($k, $v) = each %bad) { print "$k: $v\n"; }
263 if (scalar(keys %bad) > 1) {
264 print "Total warnings: $warn\n";
267 print "No warnings!\n";
271 $total_warn += $warn;
274 exit ($total_warn > 0);
276 # Use lowercase also for non-ascii chars
280 return(encode('utf-8',lc(decode('utf-8', $msg))));
283 sub replaceSynopsis($)
287 return ($string) if ($string !~ /^(.*)\.\.\.(.*)$/);
288 my ($before, $after) = ($1, $2);
289 return $string if (($before =~ /\.$/) || ($after =~ /^\./));
290 return("$before…$after");