}
use CheckURL;
+use Try::Tiny;
+use locale;
+use POSIX qw(locale_h);
-$ENV{LANG} = "en";
-$ENV{LANGUAGE} = "en";
+setlocale(LC_CTYPE, "");
+setlocale(LC_MESSAGES, "en_US.UTF-8");
+
+# Prototypes
+sub printNotUsedURLS($\%);
+sub replaceSpecialChar($);
+sub readUrls($\%);
+sub parse_file($ );
+sub handle_url($$$ );
+##########
my %URLS = ();
my %ignoredURLS = ();
my %revertedURLS = ();
my %extraURLS = ();
my %selectedURLS = ();
+my %knownToRegisterURLS = ();
+my $summaryFile = undef;
my $checkSelectedOnly = 0;
for my $arg (@ARGV) {
if (open(FLIST, $val)) {
while (my $l = <FLIST>) {
chomp($l);
- &parse_file($l);
+ parse_file($l);
}
close(FLIST);
}
}
elsif ($type eq "ignoredURLS") {
- &readUrls($val, \%ignoredURLS);
+ readUrls($val, %ignoredURLS);
}
elsif ($type eq "revertedURLS") {
- &readUrls($val, \%revertedURLS);
+ readUrls($val, %revertedURLS);
}
elsif ($type eq "extraURLS") {
- &readUrls($val, \%extraURLS);
+ readUrls($val, %extraURLS);
}
elsif ($type eq "selectedURLS") {
$checkSelectedOnly = 1;
- &readUrls($val, \%selectedURLS);
+ readUrls($val, %selectedURLS);
+ }
+ elsif ($type eq "knownToRegisterURLS") {
+ readUrls($val, %knownToRegisterURLS);
+ }
+ elsif ($type eq "summaryFile") {
+ if (open(SFO, '>', "$val")) {
+ $summaryFile = $val;
+ }
}
else {
die("Invalid argument \"$arg\"");
}
my @urls = sort keys %URLS, keys %extraURLS;
+# Tests
+#my @urls = ("ftp://ftp.edpsciences.org/pub/aa/readme.html", "ftp://ftp.springer.de/pub/tex/latex/compsc/proc/author");
my $errorcount = 0;
my $URLScount = 0;
for my $u (@urls) {
- next if (defined($ignoredURLS{$u}));
- next if ($checkSelectedOnly && ! defined(${selectedURLS}{$u}));
- $URLScount++;
- print "Checking '$u'";
- my $res = &check_url($u);
- if ($res) {
- print ": Failed\n";
+ if (defined($ignoredURLS{$u})) {
+ $ignoredURLS{$u}->{count} += 1;
+ next;
}
- else {
- print ": OK\n";
+ my $use_curl = 0;
+ if (defined($knownToRegisterURLS{$u})) {
+ if ($knownToRegisterURLS{$u}->{use_curl}) {
+ $use_curl = 1;
+ }
+ else {
+ next;
+ }
+ }
+ if (defined($selectedURLS{$u})) {
+ ${selectedURLS}{$u}->{count} += 1;
}
+ next if ($checkSelectedOnly && ! defined($selectedURLS{$u}));
+ $URLScount++;
+ print "Checking '$u': ";
+ my ($res, $prnt, $outSum);
+ try {
+ $res = check_url($u, $use_curl);
+ if ($res) {
+ print "Failed\n";
+ $prnt = "";
+ $outSum = 1;
+ }
+ else {
+ $prnt = "OK\n";
+ $outSum = 0;
+ }
+ }
+ catch {
+ $prnt = "Failed, caught error: $_\n";
+ $outSum = 1;
+ $res = 700;
+ };
+ printx("$prnt", $outSum);
my $printSourceFiles = 0;
my $err_txt = "Error url:";
}
$res = ! $res if (defined($revertedURLS{$u}));
if ($res || $checkSelectedOnly) {
- print "$err_txt \"$u\"\n";
+ printx("$err_txt \"$u\"\n", $outSum);
}
if ($printSourceFiles) {
if (defined($URLS{$u})) {
for my $f(sort keys %{$URLS{$u}}) {
- print " $f\n";
+ my $lines = ":" . join(',', @{$URLS{$u}->{$f}});
+ printx(" $f$lines\n", $outSum);
}
}
if ($res ) {
}
}
+if (%URLS) {
+ printNotUsedURLS("Ignored", %ignoredURLS);
+ printNotUsedURLS("Selected", %selectedURLS);
+ printNotUsedURLS("KnownInvalid", %extraURLS);
+}
+
print "\n$errorcount URL-tests failed out of $URLScount\n\n";
+if (defined($summaryFile)) {
+ close(SFO);
+}
exit($errorcount);
###############################################################################
+sub printx($$)
+{
+ my ($txt, $outSum) = @_;
+ print "$txt";
+ if ($outSum && defined($summaryFile)) {
+ print SFO "$txt";
+ }
+}
+
+sub printNotUsedURLS($\%)
+{
+ my ($txt, $rURLS) = @_;
+ my @msg = ();
+ for my $u ( sort keys %{$rURLS}) {
+ if ($rURLS->{$u}->{count} < 2) {
+ my @submsg = ();
+ for my $f (sort keys %{$rURLS->{$u}}) {
+ next if ($f eq "count");
+ push(@submsg, "$f:" . $rURLS->{$u}->{$f});
+ }
+ push(@msg, "\n $u\n " . join("\n ", @submsg) . "\n");
+ }
+ }
+ if (@msg) {
+ print "\n$txt URLs not found in sources: " . join(' ',@msg) . "\n";
+ }
+}
-sub readUrls($$)
+sub replaceSpecialChar($)
+{
+ my ($l) = @_;
+ $l =~ s/\\SpecialChar(NoPassThru)?\s*(TeX|LaTeX|LyX)[\s]?/\2/;
+ return($l);
+}
+
+sub readUrls($\%)
{
my ($file, $rUrls) = @_;
die("Could not read file $file") if (! open(ULIST, $file));
+ my $line = 0;
while (my $l = <ULIST>) {
+ $line++;
$l =~ s/[\r\n]+$//; # remove eol
$l =~ s/\s*\#.*$//; # remove comment
+ $l = &replaceSpecialChar($l);
next if ($l eq "");
- $rUrls->{$l} = 1;
+ my $use_curl = 0;
+ if ($l =~ s/^\s*UseCurl\s*//) {
+ $use_curl = 1;
+ }
+ if (! defined($rUrls->{$l} )) {
+ $rUrls->{$l} = {$file => $line, count => 1, use_curl => $use_curl};
+ }
}
close(ULIST);
}
sub parse_file($)
{
my($f) = @_;
- my $status = "out"; # outside of URL
+ my $status = "out"; # outside of URL/href
return if ($f =~ /\/attic\//);
if(open(FI, $f)) {
+ my $line = 0;
while(my $l = <FI>) {
+ $line++;
$l =~ s/[\r\n]+$//; # Simulate chomp
- if($status eq "out") {
+ if ($status eq "out") {
# searching for "\begin_inset Flex URL"
if($l =~ /^\s*\\begin_inset\s+Flex\s+URL\s*$/) {
- $status = "ininset";
+ $status = "inUrlInset";
+ }
+ elsif ($l =~ /^\s*\\begin_inset\s+CommandInset\s+href\s*$/) {
+ $status = "inHrefInset";
+ }
+ else {
+ # Outside of url, check also
+ if ($l =~ /"((ftp|http|https):\/\/[^ ]+)"/) {
+ my $url = $1;
+ handle_url($url, $f, "x$line");
+ }
}
}
else {
if($l =~ /^\s*\\end_(layout|inset)\s*$/) {
$status = "out";
}
- else {
- if($l =~ /\s*([a-z]+:\/\/.+)\s*$/) {
+ elsif ($status eq "inUrlInset") {
+ if ($l =~ /\s*([a-z]+:\/\/.+)\s*$/) {
my $url = $1;
$status = "out";
- &handle_url($url, $f);
+ handle_url($url, $f, "u$line");
+ }
+ }
+ elsif ($status eq "inHrefInset") {
+ if ($l =~ /^target\s+"([a-z]+:\/\/[^ ]+)"$/) {
+ my $url = $1;
+ $status = "out";
+ handle_url($url, $f, "h$line");
}
}
}
}
}
-sub handle_url($$)
+sub handle_url($$$)
{
- my($url, $f) = @_;
+ my($url, $f, $line) = @_;
+ $url = &replaceSpecialChar($url);
if(!defined($URLS{$url})) {
$URLS{$url} = {};
}
- $URLS{$url}->{$f} = 1;
+ if(!defined($URLS{$url}->{$f})) {
+ $URLS{$url}->{$f} = [];
+ }
+ push(@{$URLS{$url}->{$f}}, $line);
}