6 # This file is part of LyX, the document processor.
7 # Licence details can be found in the file COPYING.
9 # author: Michael Gerz, michael.gerz@teststep.org
15 use Encode qw(encode decode);
20 pocheck.pl [-acmpqst] po_file [po_file] ...
22 This script performs some consistency checks on po files.
24 We check for everything listed here, unless one or more of these
25 options is given, in which case we checks only for those requested.
26 -a: Check arguments, like %1\$s
27 -c: Check for colons at end
28 -m: Check for menu shortcuts
29 -p: Check for period at end
30 -q: Check Qt shortcuts
31 -s: Check for space at end
32 -t: Check for uniform translation
33 These options can be given with or without other options.
34 -f: Ignore fuzzy translations
35 -w: Only report summary total of errors
36 -i: Silent mode, report only errors
40 getopts(":hacfmpqstwi", \%options);
42 if (defined($options{h})) {
47 my $only_total = defined($options{w});
48 delete $options{w} if $only_total;
49 my $ignore_fuzzy = defined($options{f});
50 delete $options{f} if $ignore_fuzzy;
51 my $silent_mode = defined($options{i});
52 delete $options{i} if $silent_mode;
54 my $check_args = (!%options or defined($options{a}));
55 my $check_colons = (!%options or defined($options{c}));
56 my $check_spaces = (!%options or defined($options{s}));
57 my $check_periods = (!%options or defined($options{p}));
58 my $check_qt = (!%options or defined($options{q}));
59 my $check_menu = (!%options or defined($options{m}));
60 my $check_trans = (!%options or defined($options{t}));
66 foreach my $pofilename ( @ARGV ) {
69 print "Processing po file '$pofilename'...\n";
72 open( INPUT, "<$pofilename" )
73 || die "Cannot read po file '$pofilename'";
78 keys( %trans ) = 10000;
80 my $noOfLines = $#pofile;
85 my ($msgid, $msgid_trans, $msgstr, $more);
87 while ($i <= $noOfLines) {
89 ( $msgid ) = ( $pofile[$i] =~ m/^msgid "(.*)"/ );
93 my $previous = $pofile[$i - 2];
94 next if $previous =~ m/#,.*fuzzy/;
97 # some msgid's are more than one line long, so add those.
98 while ( ( $more ) = $pofile[$i] =~ m/^"(.*)"/ ) {
99 $msgid = $msgid . $more;
103 # now look for the associated msgstr.
104 until ( ( $msgstr ) = ( $pofile[$i] =~ m/^msgstr "(.*)"/ ) ) { $i++; };
106 # again collect any extra lines.
107 while ( ( $i <= $noOfLines ) &&
108 ( ( $more ) = $pofile[$i] =~ m/^"(.*)"/ ) ) {
109 $msgstr = $msgstr . $more;
113 # nothing to do if one of them is empty.
114 # (surely that is always $msgstr?)
115 next if ($msgid eq "" or $msgstr eq "");
117 # discard [[...]] from the end of msgid, this is used only as hint to translation
118 $msgid_trans = $msgid; # used for uniform translation
119 $msgid =~ s/\[\[.*\]\]$//;
121 # Check for matching %1$s, etc.
123 my @argstrs = ( $msgid =~ m/%(\d)\$s/g );
126 foreach my $arg (@argstrs) { $n = $arg if $arg > $n; }
128 print "$pofilename, line $linenum: Problem finding arguments in:\n $msgid!\n"
130 ++$bad{"Missing arguments"};
133 foreach my $i (1..$n) {
134 my $arg = "%$i\\\$s";
135 if ( $msgstr !~ m/$arg/ ) {
136 print "$pofilename, line $linenum: Missing argument `$arg'\n '$msgid' ==> '$msgstr'\n"
138 ++$bad{"Missing arguments"};
147 # Check colon at the end of a message
148 if ( ( $msgid =~ m/: *(\|.*)?$/ ) != ( $msgstr =~ m/: *(\|.*)?$/ ) ) {
149 print "Line $linenum: Missing or unexpected colon:\n '$msgid' => '$msgstr'\n"
151 ++$bad{"Bad colons"};
156 if ($check_periods) {
157 # Check period at the end of a message; uncomment code if you are paranoid
158 if ( ( $msgid =~ m/\. *(\|.*)?$/ ) != ( $msgstr =~ m/\. *(\|.*)?$/ ) ) {
159 print "Line $linenum: Missing or unexpected period:\n '$msgid' => '$msgstr'\n"
161 ++$bad{"Bad periods"};
167 # Check space at the end of a message
168 if ( ( $msgid =~ m/ *?(\|.*)?$/ ) != ( $msgstr =~ m/ *?(\|.*)?$/ ) ) {
169 print "Line $linenum: Missing or unexpected space:\n '$msgid' => '$msgstr'\n"
171 ++$bad{"Bad spaces"};
177 # Check for "&" shortcuts
178 if ( ( $msgid =~ m/&[^ &]/ ) != ( $msgstr =~ m/&[^ &]/ ) ) {
179 print "Line $linenum: Missing or unexpected Qt shortcut:\n '$msgid' => '$msgstr'\n"
181 ++$bad{"Bad Qt shortcuts"};
187 # Check for "|..." shortcuts
188 if ( ( $msgid =~ m/\|[^ ]/ ) != ( $msgstr =~ m/\|[^ ]/ ) ) {
189 print "Line $linenum: Missing or unexpected menu shortcut:\n '$msgid' => '$msgstr'\n"
191 ++$bad{"Bad menu shortcuts"};
196 next unless $check_trans;
198 # we now collect these translations in a hash.
199 # this will allow us to check below if we have translated
200 # anything more than one way.
201 my $msgid_clean = lc($msgid_trans);
202 my $msgstr_clean = mylc($msgstr);
204 $msgid_clean =~ s/(.*)\|.*?$/$1/; # strip menu shortcuts
205 $msgstr_clean =~ s/(.*)\|.*?$/$1/;
206 $msgid_clean =~ s/&([^ ])/$1/; # strip Qt shortcuts
207 $msgstr_clean =~ s/&([^ ])/$1/;
209 # this is a hash of hashes. the keys of the outer hash are
210 # cleaned versions of ORIGINAL strings. the keys of the inner hash
211 # are the cleaned versions of their TRANSLATIONS. The value for the
212 # inner hash is an array of the orignal string and translation.
213 $trans{$msgid_clean}{$msgstr_clean} = [ $msgid_trans, $msgstr, $linenum ];
217 foreach $msgid ( keys %trans ) {
218 # so $ref is a reference to the inner hash.
219 my $ref = $trans{$msgid};
220 # @msgstrkeys is an array of the keys of that inner hash.
221 my @msgstrkeys = keys %$ref;
223 # do we have more than one such key?
224 if ( $#msgstrkeys > 0 ) {
226 print "Different translations for '$msgid':\n";
227 foreach $msgstr ( @msgstrkeys ) {
228 print "Line $ref->{$msgstr}[2]: '" .
229 $ref->{$msgstr}[0] . "' => '" .
230 $ref->{$msgstr}[1] . "'\n";
233 ++$bad{"Inconsistent translations"};
240 while (my ($k, $v) = each %bad) { print "$k: $v\n"; }
241 if (scalar(keys %bad) > 1) {
242 print "Total warnings: $warn\n";
245 print "No warnings!\n";
249 $total_warn += $warn;
252 exit ($total_warn > 0);
254 # Use lowercase also for non-ascii chars
258 return(encode('utf-8',lc(decode('utf-8', $msg))));