1.) The command for lowercase (lc) does not make a good job for German, Slovak, etc..
For example lc("BÄR") gives "bÄr".
2.) Don't discard [[...]] when checking for uniform translation
use strict;
use warnings;
use Getopt::Std;
use strict;
use warnings;
use Getopt::Std;
+use Encode qw(encode decode);
+
+sub mylc($);
my $usage = <<EOT;
pocheck.pl [-acmpqst] po_file [po_file] ...
my $usage = <<EOT;
pocheck.pl [-acmpqst] po_file [po_file] ...
- my ($msgid, $msgstr, $more);
+ my ($msgid, $msgid_trans, $msgstr, $more);
while ($i <= $noOfLines) {
my $linenum = $i;
while ($i <= $noOfLines) {
my $linenum = $i;
next if ($msgid eq "" or $msgstr eq "");
# discard [[...]] from the end of msgid, this is used only as hint to translation
next if ($msgid eq "" or $msgstr eq "");
# discard [[...]] from the end of msgid, this is used only as hint to translation
+ $msgid_trans = $msgid; # used for uniform translation
$msgid =~ s/\[\[.*\]\]$//;
# Check for matching %1$s, etc.
$msgid =~ s/\[\[.*\]\]$//;
# Check for matching %1$s, etc.
# we now collect these translations in a hash.
# this will allow us to check below if we have translated
# anything more than one way.
# we now collect these translations in a hash.
# this will allow us to check below if we have translated
# anything more than one way.
- my $msgid_clean = lc($msgid);
- my $msgstr_clean = lc($msgstr);
+ my $msgid_clean = lc($msgid_trans);
+ my $msgstr_clean = mylc($msgstr);
$msgid_clean =~ s/(.*)\|.*?$/$1/; # strip menu shortcuts
$msgstr_clean =~ s/(.*)\|.*?$/$1/;
$msgid_clean =~ s/(.*)\|.*?$/$1/; # strip menu shortcuts
$msgstr_clean =~ s/(.*)\|.*?$/$1/;
# cleaned versions of ORIGINAL strings. the keys of the inner hash
# are the cleaned versions of their TRANSLATIONS. The value for the
# inner hash is an array of the orignal string and translation.
# cleaned versions of ORIGINAL strings. the keys of the inner hash
# are the cleaned versions of their TRANSLATIONS. The value for the
# inner hash is an array of the orignal string and translation.
- $trans{$msgid_clean}{$msgstr_clean} = [ $msgid, $msgstr, $linenum ];
+ $trans{$msgid_clean}{$msgstr_clean} = [ $msgid_trans, $msgstr, $linenum ];
+# Use lowercase also for non-ascii chars
+sub mylc($)
+{
+ my ($msg) = @_;
+ return(encode('utf-8',lc(decode('utf-8', $msg))));
+}