6 # This file is part of LyX, the document processor.
7 # Licence details can be found in the file COPYING.
9 # author: Michael Gerz, michael.gerz@teststep.org
15 use Encode qw(encode decode);
18 sub replaceSynopsis($);
21 pocheck.pl [-acmpqst] po_file [po_file] ...
23 This script performs some consistency checks on po files.
25 We check for everything listed here, unless one or more of these
26 options is given, in which case we checks only for those requested.
27 -a: Check arguments, like %1\$s
28 -c: Check for colons at end
29 -m: Check for menu shortcuts
30 -p: Check for period at end
31 -q: Check Qt shortcuts
32 -s: Check for space at end
33 -t: Check for uniform translation
34 These options can be given with or without other options.
35 -f: Ignore fuzzy translations
36 -w: Only report summary total of errors
37 -i: Silent mode, report only errors
41 getopts(":hacfmpqstwi", \%options);
43 if (defined($options{h})) {
48 my $only_total = defined($options{w});
49 delete $options{w} if $only_total;
50 my $ignore_fuzzy = defined($options{f});
51 delete $options{f} if $ignore_fuzzy;
52 my $silent_mode = defined($options{i});
53 delete $options{i} if $silent_mode;
55 my $check_args = (!%options or defined($options{a}));
56 my $check_colons = (!%options or defined($options{c}));
57 my $check_spaces = (!%options or defined($options{s}));
58 my $check_periods = (!%options or defined($options{p}));
59 my $check_qt = (!%options or defined($options{q}));
60 my $check_menu = (!%options or defined($options{m}));
61 my $check_trans = (!%options or defined($options{t}));
67 foreach my $pofilename ( @ARGV ) {
70 print "Processing po file '$pofilename'...\n";
73 open( INPUT, "<$pofilename" )
74 || die "Cannot read po file '$pofilename'";
79 keys( %trans ) = 10000;
81 my $noOfLines = $#pofile;
86 my ($msgid, $msgid_trans, $msgstr, $more);
88 while ($i <= $noOfLines) {
90 ( $msgid ) = ( $pofile[$i] =~ m/^msgid "(.*)"/ );
94 my $previous = $pofile[$i - 2];
95 next if $previous =~ m/#,.*fuzzy/;
98 # some msgid's are more than one line long, so add those.
99 while ( ( $more ) = $pofile[$i] =~ m/^"(.*)"/ ) {
100 $msgid = $msgid . $more;
104 # now look for the associated msgstr.
105 until ( ( $msgstr ) = ( $pofile[$i] =~ m/^msgstr "(.*)"/ ) ) { $i++; };
107 # again collect any extra lines.
108 while ( ( $i <= $noOfLines ) &&
109 ( ( $more ) = $pofile[$i] =~ m/^"(.*)"/ ) ) {
110 $msgstr = $msgstr . $more;
114 # nothing to do if one of them is empty.
115 # (surely that is always $msgstr?)
116 next if ($msgid eq "" or $msgstr eq "");
118 # discard [[...]] from the end of msgid, this is used only as hint to translation
119 $msgid_trans = $msgid; # used for uniform translation
120 $msgid =~ s/\[\[.*\]\]$//;
122 # Check for matching %1$s, etc.
124 my @argstrs = ( $msgid =~ m/%(\d)\$s/g );
127 foreach my $arg (@argstrs) { $n = $arg if $arg > $n; }
129 print "$pofilename, line $linenum: Problem finding arguments in:\n $msgid!\n"
131 ++$bad{"Missing arguments"};
134 foreach my $i (1..$n) {
135 my $arg = "%$i\\\$s";
136 if ( $msgstr !~ m/$arg/ ) {
137 print "$pofilename, line $linenum: Missing argument `$arg'\n '$msgid' ==> '$msgstr'\n"
139 ++$bad{"Missing arguments"};
148 # Check colon at the end of a message
149 if ( ( $msgid =~ m/: *(\|.*)?$/ ) != ( $msgstr =~ m/: *(\|.*)?$/ ) ) {
150 print "Line $linenum: Missing or unexpected colon:\n '$msgid' => '$msgstr'\n"
152 ++$bad{"Bad colons"};
157 if ($check_periods) {
158 # Check period at the end of a message; uncomment code if you are paranoid
159 # Convert '...' to '…' first
160 $msgid = replaceSynopsis($msgid);
161 $msgstr = replaceSynopsis($msgstr);
162 if ( ( $msgid =~ m/\. *(\|.*)?$/ ) != ( $msgstr =~ m/\. *(\|.*)?$/ ) ) {
163 print "Line $linenum: Missing or unexpected period:\n '$msgid' => '$msgstr'\n"
165 ++$bad{"Bad periods"};
171 # Check space at the end of a message (if not a shortcut)
172 my ($msgid1, $msgstr1);
173 ($msgid1 = $msgid) =~ s/\|.$//;
174 ($msgstr1 = $msgstr) =~ s/\|.$//; # TODO: Shortcut may be utf-8 char
175 if (($msgid1 =~ / $/) != ($msgstr1 =~ / $/)) {
176 print "Line $linenum: Missing or unexpected space:\n '$msgid' => '$msgstr'\n"
178 ++$bad{"Bad spaces"};
184 # Check for "&" shortcuts
185 if ( ( $msgid =~ m/&[^ &]/ ) != ( $msgstr =~ m/&[^ &]/ ) ) {
186 print "Line $linenum: Missing or unexpected Qt shortcut:\n '$msgid' => '$msgstr'\n"
188 ++$bad{"Bad Qt shortcuts"};
194 # Check for "|..." shortcuts (space shortcut allowed)
195 # Shortcut is either 1 char (ascii in msgid) or utf8 char (in msgstr)
196 my ($s1, $s2) = (0,0);
197 $s1 = 1 if ($msgid =~ /\|([^\|])$/);
198 if ($msgstr =~ /\|([^\|]+)$/) {
200 my $u = decode('utf-8', $chars);
201 $s2 = 1 if (length($u) == 1);
204 print "Line $linenum: Missing or unexpected menu shortcut:\n '$msgid' => '$msgstr'\n"
206 ++$bad{"Bad menu shortcuts"};
211 next unless $check_trans;
213 # we now collect these translations in a hash.
214 # this will allow us to check below if we have translated
215 # anything more than one way.
216 my $msgid_clean = lc($msgid_trans);
217 my $msgstr_clean = mylc($msgstr);
219 $msgid_clean =~ s/(.*)\|.*?$/$1/; # strip menu shortcuts
220 $msgstr_clean =~ s/(.*)\|.*?$/$1/;
221 $msgid_clean =~ s/&([^ ])/$1/; # strip Qt shortcuts
222 $msgstr_clean =~ s/&([^ ])/$1/;
224 # this is a hash of hashes. the keys of the outer hash are
225 # cleaned versions of ORIGINAL strings. the keys of the inner hash
226 # are the cleaned versions of their TRANSLATIONS. The value for the
227 # inner hash is an array of the orignal string and translation.
228 $trans{$msgid_clean}{$msgstr_clean} = [ $msgid_trans, $msgstr, $linenum ];
232 foreach $msgid ( keys %trans ) {
233 # so $ref is a reference to the inner hash.
234 my $ref = $trans{$msgid};
235 # @msgstrkeys is an array of the keys of that inner hash.
236 my @msgstrkeys = keys %$ref;
238 # do we have more than one such key?
239 if ( $#msgstrkeys > 0 ) {
241 print "Different translations for '$msgid':\n";
242 foreach $msgstr ( @msgstrkeys ) {
243 print "Line $ref->{$msgstr}[2]: '" .
244 $ref->{$msgstr}[0] . "' => '" .
245 $ref->{$msgstr}[1] . "'\n";
248 ++$bad{"Inconsistent translations"};
255 while (my ($k, $v) = each %bad) { print "$k: $v\n"; }
256 if (scalar(keys %bad) > 1) {
257 print "Total warnings: $warn\n";
260 print "No warnings!\n";
264 $total_warn += $warn;
267 exit ($total_warn > 0);
269 # Use lowercase also for non-ascii chars
273 return(encode('utf-8',lc(decode('utf-8', $msg))));
276 sub replaceSynopsis($)
280 return ($string) if ($string !~ /^(.*)\.\.\.(.*)$/);
281 my ($before, $after) = ($1, $2);
282 return $string if (($before =~ /\.$/) || ($after =~ /^\./));
283 return("$before…$after");