5 # script to search for url's in lyxfiles
6 # and testing their validity.
8 # Syntax: search_url.pl [(filesToScan|(ignored|reverted|extra|selected)URLS)={path_to_control]*
9 # Param value is a path to a file containing list of xxx:
10 # filesToScan={xxx = lyx-file-names to be scanned for}
11 # ignoredURLS={xxx = urls that are discarded from test}
12 # revertedURLS={xxx = urls that should fail, to test the test with invalid urls}
13 # extraURLS={xxx = urls which should be also checked}
15 # This file is free software; you can redistribute it and/or
16 # modify it under the terms of the GNU General Public
17 # License as published by the Free Software Foundation; either
18 # version 2 of the License, or (at your option) any later version.
20 # This software is distributed in the hope that it will be useful,
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 # General Public License for more details.
25 # You should have received a copy of the GNU General Public
26 # License along with this software; if not, write to the Free Software
27 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 # Copyright (c) 2013 Kornel Benko <kornel@lyx.org>
30 # (c) 2013 Scott Kostyshak <skotysh@lyx.org>
36 my $p = File::Spec->rel2abs(__FILE__);
37 $p =~ s/[\/\\]?[^\/\\]+$//;
44 use POSIX qw(locale_h);
46 setlocale(LC_CTYPE, "");
47 setlocale(LC_MESSAGES, "en_US.UTF-8");
50 sub printNotUsedURLS($\%);
51 sub replaceSpecialChar($);
59 my %revertedURLS = ();
61 my %selectedURLS = ();
62 my %knownToRegisterURLS = ();
63 my $summaryFile = undef;
65 my $checkSelectedOnly = 0;
67 die("Bad argument \"$arg\"") if ($arg !~ /=/);
68 my ($type,$val) = split("=", $arg);
69 if ($type eq "filesToScan") {
70 #The file should be a list of files to search in
71 if (open(FLIST, $val)) {
72 while (my $l = <FLIST>) {
79 elsif ($type eq "ignoredURLS") {
80 readUrls($val, %ignoredURLS);
82 elsif ($type eq "revertedURLS") {
83 readUrls($val, %revertedURLS);
85 elsif ($type eq "extraURLS") {
86 readUrls($val, %extraURLS);
88 elsif ($type eq "selectedURLS") {
89 $checkSelectedOnly = 1;
90 readUrls($val, %selectedURLS);
92 elsif ($type eq "knownToRegisterURLS") {
93 readUrls($val, %knownToRegisterURLS);
95 elsif ($type eq "summaryFile") {
96 if (open(SFO, '>', "$val")) {
101 die("Invalid argument \"$arg\"");
105 my @urls = sort keys %URLS, keys %extraURLS;
107 #my @urls = ("ftp://ftp.edpsciences.org/pub/aa/readme.html", "ftp://ftp.springer.de/pub/tex/latex/compsc/proc/author");
113 if (defined($ignoredURLS{$u})) {
114 $ignoredURLS{$u}->{count} += 1;
118 if (defined($knownToRegisterURLS{$u})) {
119 if ($knownToRegisterURLS{$u}->{use_curl}) {
126 if (defined($selectedURLS{$u})) {
127 ${selectedURLS}{$u}->{count} += 1;
129 next if ($checkSelectedOnly && ! defined($selectedURLS{$u}));
131 print "Checking '$u': ";
132 my ($res, $prnt, $outSum);
134 $res = check_url($u, $use_curl);
146 $prnt = "Failed, caught error: $_\n";
150 printx("$prnt", $outSum);
151 my $printSourceFiles = 0;
152 my $err_txt = "Error url:";
154 if ($res || $checkSelectedOnly) {
155 $printSourceFiles = 1;
157 if ($res && defined($revertedURLS{$u})) {
158 $err_txt = "Failed url:";
160 $res = ! $res if (defined($revertedURLS{$u}));
161 if ($res || $checkSelectedOnly) {
162 printx("$err_txt \"$u\"\n", $outSum);
164 if ($printSourceFiles) {
165 if (defined($URLS{$u})) {
166 for my $f(sort keys %{$URLS{$u}}) {
167 my $lines = ":" . join(',', @{$URLS{$u}->{$f}});
168 printx(" $f$lines\n", $outSum);
178 printNotUsedURLS("Ignored", %ignoredURLS);
179 printNotUsedURLS("Selected", %selectedURLS);
180 printNotUsedURLS("KnownInvalid", %extraURLS);
183 print "\n$errorcount URL-tests failed out of $URLScount\n\n";
184 if (defined($summaryFile)) {
189 ###############################################################################
192 my ($txt, $outSum) = @_;
194 if ($outSum && defined($summaryFile)) {
199 sub printNotUsedURLS($\%)
201 my ($txt, $rURLS) = @_;
203 for my $u ( sort keys %{$rURLS}) {
204 if ($rURLS->{$u}->{count} < 2) {
206 for my $f (sort keys %{$rURLS->{$u}}) {
207 next if ($f eq "count");
208 push(@submsg, "$f:" . $rURLS->{$u}->{$f});
210 push(@msg, "\n $u\n " . join("\n ", @submsg) . "\n");
214 print "\n$txt URLs not found in sources: " . join(' ',@msg) . "\n";
218 sub replaceSpecialChar($)
221 $l =~ s/\\SpecialChar(NoPassThru)?\s*(TeX|LaTeX|LyX)[\s]?/\2/;
227 my ($file, $rUrls) = @_;
229 die("Could not read file $file") if (! open(ULIST, $file));
231 while (my $l = <ULIST>) {
233 $l =~ s/[\r\n]+$//; # remove eol
234 $l =~ s/\s*\#.*$//; # remove comment
235 $l = &replaceSpecialChar($l);
238 if ($l =~ s/^\s*UseCurl\s*//) {
241 if (! defined($rUrls->{$l} )) {
242 $rUrls->{$l} = {$file => $line, count => 1, use_curl => $use_curl};
251 my $status = "out"; # outside of URL/href
253 return if ($f =~ /\/attic\//);
256 while(my $l = <FI>) {
258 $l =~ s/[\r\n]+$//; # Simulate chomp
259 if ($status eq "out") {
260 # searching for "\begin_inset Flex URL"
261 if($l =~ /^\s*\\begin_inset\s+Flex\s+URL\s*$/) {
262 $status = "inUrlInset";
264 elsif ($l =~ /^\s*\\begin_inset\s+CommandInset\s+href\s*$/) {
265 $status = "inHrefInset";
268 # Outside of url, check also
269 if ($l =~ /"((ftp|http|https):\/\/[^ ]+)"/) {
271 handle_url($url, $f, "x$line");
276 if($l =~ /^\s*\\end_(layout|inset)\s*$/) {
279 elsif ($status eq "inUrlInset") {
280 if ($l =~ /\s*([a-z]+:\/\/.+)\s*$/) {
283 handle_url($url, $f, "u$line");
286 elsif ($status eq "inHrefInset") {
287 if ($l =~ /^target\s+"([a-z]+:\/\/[^ ]+)"$/) {
290 handle_url($url, $f, "h$line");
301 my($url, $f, $line) = @_;
303 $url = &replaceSpecialChar($url);
304 if(!defined($URLS{$url})) {
307 if(!defined($URLS{$url}->{$f})) {
308 $URLS{$url}->{$f} = [];
310 push(@{$URLS{$url}->{$f}}, $line);