5 # script to search for url's in lyxfiles
6 # and testing their validity.
8 # Syntax: search_url.pl [(filesToScan|(ignored|reverted|extra|selected)URLS)={path_to_control]*
9 # Param value is a path to a file containing list of xxx:
10 # filesToScan={xxx = lyx-file-names to be scanned for}
11 # ignoredURLS={xxx = urls that are discarded from test}
12 # revertedURLS={xxx = urls that should fail, to test the test with invalid urls}
13 # extraURLS={xxx = urls which should be also checked}
15 # This file is free software; you can redistribute it and/or
16 # modify it under the terms of the GNU General Public
17 # License as published by the Free Software Foundation; either
18 # version 2 of the License, or (at your option) any later version.
20 # This software is distributed in the hope that it will be useful,
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 # General Public License for more details.
25 # You should have received a copy of the GNU General Public
26 # License along with this software; if not, write to the Free Software
27 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 # Copyright (c) 2013 Kornel Benko <kornel@lyx.org>
30 # (c) 2013 Scott Kostyshak <skotysh@lyx.org>
36 my $p = File::Spec->rel2abs(__FILE__);
37 $p =~ s/[\/\\]?[^\/\\]+$//;
44 $ENV{LANGUAGE} = "en";
48 my %revertedURLS = ();
50 my %selectedURLS = ();
52 my $checkSelectedOnly = 0;
54 die("Bad argument \"$arg\"") if ($arg !~ /=/);
55 my ($type,$val) = split("=", $arg);
56 if ($type eq "filesToScan") {
57 #The file should be a list of files to search in
58 if (open(FLIST, $val)) {
59 while (my $l = <FLIST>) {
66 elsif ($type eq "ignoredURLS") {
67 &readUrls($val, \%ignoredURLS);
69 elsif ($type eq "revertedURLS") {
70 &readUrls($val, \%revertedURLS);
72 elsif ($type eq "extraURLS") {
73 &readUrls($val, \%extraURLS);
75 elsif ($type eq "selectedURLS") {
76 $checkSelectedOnly = 1;
77 &readUrls($val, \%selectedURLS);
80 die("Invalid argument \"$arg\"");
84 my @urls = sort keys %URLS, keys %extraURLS;
90 if (defined($ignoredURLS{$u})) {
91 $ignoredURLS{$u} += 1;
94 next if ($checkSelectedOnly && ! defined(${selectedURLS}{$u}));
95 if (defined(${selectedURLS}{$u})) {
96 ${selectedURLS}{$u} += 1;
99 print "Checking '$u'";
100 my $res = &check_url($u);
107 my $printSourceFiles = 0;
108 my $err_txt = "Error url:";
110 if ($res || $checkSelectedOnly) {
111 $printSourceFiles = 1;
113 if ($res && defined($revertedURLS{$u})) {
114 $err_txt = "Failed url:";
116 $res = ! $res if (defined($revertedURLS{$u}));
117 if ($res || $checkSelectedOnly) {
118 print "$err_txt \"$u\"\n";
120 if ($printSourceFiles) {
121 if (defined($URLS{$u})) {
122 for my $f(sort keys %{$URLS{$u}}) {
123 my $lines = ":" . join(',', @{$URLS{$u}->{$f}});
133 &printNotUsedURLS("Ignored", \%ignoredURLS);
134 &printNotUsedURLS("Selected", \%selectedURLS);
136 print "\n$errorcount URL-tests failed out of $URLScount\n\n";
139 ###############################################################################
141 sub printNotUsedURLS($$)
143 my ($txt, $rURLS) = @_;
145 for my $u ( sort keys %{$rURLS}) {
146 if ($rURLS->{$u} < 2) {
151 print "\n$txt URLs not found in sources: " . join(' ',@msg) . "\n";
157 my ($file, $rUrls) = @_;
159 die("Could not read file $file") if (! open(ULIST, $file));
160 while (my $l = <ULIST>) {
161 $l =~ s/[\r\n]+$//; # remove eol
162 $l =~ s/\s*\#.*$//; # remove comment
172 my $status = "out"; # outside of URL
174 return if ($f =~ /\/attic\//);
177 while(my $l = <FI>) {
179 $l =~ s/[\r\n]+$//; # Simulate chomp
180 if ($status eq "out") {
181 # searching for "\begin_inset Flex URL"
182 if($l =~ /^\s*\\begin_inset\s+Flex\s+URL\s*$/) {
183 $status = "inUrlInset";
185 elsif ($l =~ /^\s*\\begin_inset\s+CommandInset\s+href\s*$/) {
186 $status = "inHrefInset";
189 # Outside of url, check also
190 if ($l =~ /"((ftp|http|https):\/\/[^ ]+)"/) {
192 &handle_url($url, $f, "x$line");
197 if($l =~ /^\s*\\end_(layout|inset)\s*$/) {
200 elsif ($status eq "inUrlInset") {
201 if ($l =~ /\s*([a-z]+:\/\/.+)\s*$/) {
204 &handle_url($url, $f, "u$line");
207 elsif ($status eq "inHrefInset") {
208 if ($l =~ /^target\s+"([a-z]+:\/\/[^ ]+)"$/) {
211 &handle_url($url, $f, "h$line");
222 my($url, $f, $line) = @_;
224 if(!defined($URLS{$url})) {
226 $URLS{$url}->{$f} = [];
228 push(@{$URLS{$url}->{$f}}, $line);