From 3f48486a5cd2e66713430885f8826bd2af39412d Mon Sep 17 00:00:00 2001
From: Kornel Benko <kornel@lyx.org>
Date: Sun, 6 Oct 2024 18:04:00 +0200
Subject: [PATCH] Cmake build tests: check urls optimized

The original test took about ~50 minutes. Making the checks in parallel,
now it takes about 150 seconds.
---
 development/checkurls/CheckURL.pm         | 173 ++++++------
 development/checkurls/knownInvalidURLS    |   1 +
 development/checkurls/knownToRegisterURLS |   1 +
 development/checkurls/search_url.pl       | 326 ++++++++++++++--------
 4 files changed, 304 insertions(+), 197 deletions(-)
diff --git a/development/checkurls/CheckURL.pm b/development/checkurls/CheckURL.pm
index 4ab70abfcb..330f7fe3f7 100755
--- a/development/checkurls/CheckURL.pm
+++ b/development/checkurls/CheckURL.pm
@@ -1,5 +1,6 @@
 # -*- mode: perl; -*-
 package CheckURL;
+
 # file CheckURL.pm
 #
 # This file is part of LyX, the document processor.
@@ -13,9 +14,10 @@ package CheckURL;
 #
 use strict;
 
-our(@EXPORT, @ISA);
+our (@EXPORT, @ISA);
+
 BEGIN {
-  use Exporter   ();
+  use Exporter ();
   @ISA    = qw(Exporter);
   @EXPORT = qw(check_url);
 }
@@ -25,16 +27,18 @@ sub check_http_url($$$$);
 sub check_ftp_dir_entry($$);
 sub check_ftp_url($$$$);
 sub check_unknown_url($$$$);
-sub check_url($$);
+sub check_url($$$$);
 ################
 
-sub check_http_url($$$$)
-{
+my $fe;
+my $fs;
+
+sub check_http_url($$$$) {
   require LWP::UserAgent;
 
   my ($protocol, $host, $path, $file) = @_;
 
-  my $ua = LWP::UserAgent->new;
+  my $ua   = LWP::UserAgent->new(timeout => 20);
   my $getp = "/";
   if ($path ne "") {
     $getp .= $path;
@@ -54,11 +58,11 @@ sub check_http_url($$$$)
     $buf = $response->decoded_content;
   }
   else {
-    print " " . $response->status_line . ": ";
+    print $fe " " . $response->status_line . ": ";
     return 3;
   }
   my @title = ();
-  my $res = 0;
+  my $res   = 0;
   while ($buf =~ s/\<title\>([^\<]*)\<\/title\>//i) {
     my $title = $1;
     $title =~ s/[\r\n]/ /g;
@@ -66,9 +70,9 @@ sub check_http_url($$$$)
     $title =~ s/^ //;
     $title =~ s/ $//;
     push(@title, $title);
-    print "title = \"$title\": ";
+    print $fe "title = \"$title\": ";
     if ($title =~ /Error 404|Not Found/) {
-      print " Page reports 'Not Found' from \"$protocol://$host$getp\": ";
+      print $fe " Page reports 'Not Found' from \"$protocol://$host$getp\": ";
       $res = 3;
     }
   }
@@ -79,39 +83,39 @@ sub check_http_url($$$$)
 # returns 0, x if file does not match entry
 #         1, x everything OK
 #         2, x if not accesible (permission)
-sub check_ftp_dir_entry($$)
-{
+sub check_ftp_dir_entry($$) {
   my ($file, $e) = @_;
   my $other = '---';
   my $isdir = 0;
 
-  #print "Checking '$file' against '$e'\n";
+  #print $fe "Checking '$file' against '$e'\n";
   $file =~ s/^\///;
   $isdir = 1 if ($e =~ /^d/);
-  return(0,$isdir) if ($e !~ /\s$file$/);
+  return (0, $isdir) if ($e !~ /\s$file$/);
   if ($e =~ /^.[-rwx]{6}([-rwx]{3})\s/) {
     $other = $1;
   }
   else {
-    #print "Invalid entry\n";
+    #print $fe "Invalid entry\n";
     # Invalid entry
-    return(0,$isdir);
+    return (0, $isdir);
   }
-  return(2,$isdir) if ($other !~ /^r/); # not readable
+  return (2, $isdir) if ($other !~ /^r/);    # not readable
   if ($isdir) {
+
     #return(2,$isdir) if ($other !~ /x$/); # directory, but not executable
   }
-  return(1,$isdir);
+  return (1, $isdir);
 }
 
-sub check_ftp2_url($$$$)
-{
+sub check_ftp2_url($$$$) {
   my ($protocol, $host, $path, $file) = @_;
 
   my $checkentry = 1;
-  print "\nhost $host\n";
-  print "path $path\n";
-  print "file $file\n";
+
+  #print $fe "\nhost $host\n";
+  #print $fe "path $path\n";
+  #print $fe "file $file\n";
   my $url = "$protocol://$host";
   $path =~ s/\/$//;
   if (defined($file)) {
@@ -120,7 +124,8 @@ sub check_ftp2_url($$$$)
   else {
     $url = "$url/$path/.";
   }
-  print "curl $url, file = $file\n";
+
+  #print $fe "curl $url, file = $file\n";
   my %listfiles = ();
   if (open(FFTP, "curl --anyauth -l $url|")) {
     while (my $l = <FFTP>) {
@@ -130,44 +135,44 @@ sub check_ftp2_url($$$$)
     close(FFTP);
   }
   if (%listfiles) {
-    if (! defined($file)) {
-      return(0, "OK");
+    if (!defined($file)) {
+      return (0, "OK");
     }
     elsif (defined($listfiles{$file})) {
-      return(0, "OK");
+      return (0, "OK");
     }
     elsif (defined($listfiles{"ftpinfo.txt"})) {
-      return(0, "Probably a directory");
+      return (0, "Probably a directory");
     }
     else {
-      return(1, "Not found");
+      return (1, "Not found");
     }
   }
   else {
-    return(1, "Error");
+    return (1, "Error");
   }
 }
 
-sub check_ftp_url($$$$)
-{
+sub check_ftp_url($$$$) {
   use Net::FTP;
 
   my ($protocol, $host, $path, $file) = @_;
-  my $res = 0;
+  my $res     = 0;
   my $message = "";
 
-  my $ftp = Net::FTP->new($host, Debug => 0, Timeout => 120);
-  if(!$ftp) {
-    return(3,"Cannot connect to $host");
+  my $ftp = Net::FTP->new($host, Debug => 0, Timeout => 20);
+  if (!$ftp) {
+    return (3, "Cannot connect to $host");
   }
-  if (! $ftp->login("anonymous",'-anonymous@')) {
+  if (!$ftp->login("anonymous", '-anonymous@')) {
     $message = $ftp->message;
-    $res = 3;
+    $res     = 3;
   }
   else {
     my $rEntries;
     if ($path ne "") {
-      #print "Path = $path\n";
+
+      #print $fe "Path = $path\n";
       #if (!$ftp->cwd($path)) {
       # $message = $ftp->message;
       # $res = 3;
@@ -177,41 +182,43 @@ sub check_ftp_url($$$$)
     else {
       $rEntries = $ftp->dir();
     }
-    if (! $rEntries) {
-      $res = 3;
+    if (!$rEntries) {
+      $res     = 3;
       $message = "Could not read directory \"$path\"";
     }
     elsif (defined($file)) {
-      my $found = 0;
+      my $found  = 0;
       my $found2 = 0;
-      for my $f ( @{$rEntries}) {
-	#print "Entry: $path $f\n";
-	my ($res1,$isdir) = check_ftp_dir_entry($file,$f);
-	if ($res1 == 1) {
-	  $found = 1;
-	  last;
-	}
-	elsif ($res1 == 2) {
-	  # found, but not accessible
-	  $found2 = 1;
-	  $message = "Permission denied for '$file'";
-	}
+      for my $f (@{$rEntries}) {
+
+        #print $fe "Entry: $path $f\n";
+        my ($res1, $isdir) = check_ftp_dir_entry($file, $f);
+        if ($res1 == 1) {
+          $found = 1;
+          last;
+        }
+        elsif ($res1 == 2) {
+
+          # found, but not accessible
+          $found2  = 1;
+          $message = "Permission denied for '$file'";
+        }
       }
-      if (! $found) {
-	$res = 4;
-	if (! $found2) {
-	  $message = "File or directory '$file' not found";
-	}
+      if (!$found) {
+        $res = 4;
+        if (!$found2) {
+          $message = "File or directory '$file' not found";
+        }
       }
     }
   }
   $ftp->quit;
-  #print "returning ($res,$message)\n";
-  return($res, $message);
+
+  #print $fe "returning ($res,$message)\n";
+  return ($res, $message);
 }
 
-sub check_unknown_url($$$$)
-{
+sub check_unknown_url($$$$) {
   use LWP::Simple;
 
   my ($protocol, $host, $path, $file) = @_;
@@ -226,49 +233,53 @@ sub check_unknown_url($$$$)
       $url .= "/$path";
     }
   }
-  if(defined($file)) {
-    #print "Trying $url$file\n";
+  if (defined($file)) {
+
+    #print $fe "Trying $url$file\n";
     $res = head("$url/$file");
-    if(! $res) {
+    if (!$res) {
+
       # try to check for directory '/';
-      #print "Trying $url$file/\n";
+      #print $fe "Trying $url$file/\n";
       $res = head("$url/$file/");
     }
   }
   else {
-    #print "Trying $url\n";
+    #print $fe "Trying $url\n";
     $res = head($url);
   }
-  return(! $res);
+  return (!$res);
 }
 
 #
 # Main entry
-sub check_url($$)
-{
-  my($url,$use_curl) = @_;
+sub check_url($$$$) {
+  my ($url, $use_curl, $fex, $fsx) = @_;
+  $fe = $fex;
+  $fs = $fsx;
   my $file = undef;
-  my ($protocol,$host,$path);
+  my ($protocol, $host, $path);
 
   my $res = 0;
 
   # Split the url to protocol,host,path
   if ($url =~ /^([a-z]+):\/\/([^\/]+)(.*)$/) {
     $protocol = $1;
-    $host = $2;
-    $path = $3;
+    $host     = $2;
+    $path     = $3;
     $path =~ s/^\///;
-    if($path =~ s/\/([^\/]+)$//) {
+    if ($path =~ s/\/([^\/]+)$//) {
       $file = $1;
-      if($file =~ / /) {
-	# Filename contains ' ', maybe invalid. Don't check
-	$file = undef;
+      if ($file =~ / /) {
+
+        # Filename contains ' ', maybe invalid. Don't check
+        $file = undef;
       }
       $path .= "/";
     }
   }
   else {
-    print " Invalid url '$url'";
+    print $fe " Invalid url '$url'";
     return 2;
   }
   if ($protocol =~ /^https?$/) {
@@ -286,7 +297,7 @@ sub check_url($$)
   }
   else {
     # it never should reach this point
-    print " What protocol is '$protocol'?";
+    print $fe " What protocol is '$protocol'?";
     $res = check_unknown_url($protocol, $host, $path, $file);
     return $res;
   }
diff --git a/development/checkurls/knownInvalidURLS b/development/checkurls/knownInvalidURLS
index 02509713e3..4157c712ef 100644
--- a/development/checkurls/knownInvalidURLS
+++ b/development/checkurls/knownInvalidURLS
@@ -2,3 +2,4 @@ http://www.uon.edu/doe
 ftp://www.test.test
 http://www.test.test
 #proto://host.xx.ab/abcd
+http://example.com/%20foo
diff --git a/development/checkurls/knownToRegisterURLS b/development/checkurls/knownToRegisterURLS
index 335deae1d8..b6e0681024 100644
--- a/development/checkurls/knownToRegisterURLS
+++ b/development/checkurls/knownToRegisterURLS
@@ -10,6 +10,7 @@ http://jasa.peerx-press.org/html/jasa/Using_LaTeX
 http://spie.org/app/Publications/index.cfm?fuseaction=authinfo&type=proceedings
 http://www.jstatsoft.org/downloads/JSSstyle.zip
 http://www.photogrammetry.ethz.ch/tarasp_workshop/isprs.cls
+https://journals.aps.org/revtex
 
 # The following ftp url is correct, but
 # ftp commands like 'dir', 'get' do not work.
diff --git a/development/checkurls/search_url.pl b/development/checkurls/search_url.pl
index 8bba11c1fc..04b11bfb1c 100755
--- a/development/checkurls/search_url.pl
+++ b/development/checkurls/search_url.pl
@@ -31,47 +31,55 @@
 
 use strict;
 
-BEGIN  {
+BEGIN {
   use File::Spec;
   my $p = File::Spec->rel2abs(__FILE__);
   $p =~ s/[\/\\]?[^\/\\]+$//;
   unshift(@INC, "$p");
 }
 
+use warnings;
+use Cwd qw(abs_path);
 use CheckURL;
 use Try::Tiny;
 use locale;
 use POSIX qw(locale_h);
 
-setlocale(LC_CTYPE, "");
+setlocale(LC_CTYPE,    "");
 setlocale(LC_MESSAGES, "en_US.UTF-8");
 
+use File::Temp qw/ tempfile tempdir /;
+use File::Spec;
+use Fcntl qw(:flock SEEK_END);
+
 # Prototypes
 sub printNotUsedURLS($\%);
 sub replaceSpecialChar($);
 sub readUrls($\%);
 sub parse_file($ );
 sub handle_url($$$ );
+sub printx($$$$);
 ##########
 
-my %URLS = ();
-my %ignoredURLS = ();
-my %revertedURLS = ();
-my %extraURLS = ();
-my %selectedURLS = ();
+my %URLS                = ();
+my %ignoredURLS         = ();
+my %revertedURLS        = ();
+my %extraURLS           = ();
+my %selectedURLS        = ();
 my %knownToRegisterURLS = ();
-my $summaryFile = undef;
+my $summaryFile         = undef;
 
 my $checkSelectedOnly = 0;
 for my $arg (@ARGV) {
   die("Bad argument \"$arg\"") if ($arg !~ /=/);
-  my ($type,$val) = split("=", $arg);
+  my ($type, $val) = split("=", $arg);
   if ($type eq "filesToScan") {
+
     #The file should be a list of files to search in
     if (open(FLIST, $val)) {
       while (my $l = <FLIST>) {
-	chomp($l);
-	parse_file($l);
+        chomp($l);
+        parse_file($l);
       }
       close(FLIST);
     }
@@ -102,7 +110,9 @@ for my $arg (@ARGV) {
   }
 }
 
-my @urls = sort keys %URLS, keys %extraURLS;
+my @urls     = sort keys %URLS, keys %extraURLS;
+my @testvals = ();
+
 # Tests
 #my @urls = ("ftp://ftp.edpsciences.org/pub/aa/readme.html", "ftp://ftp.springer.de/pub/tex/latex/compsc/proc/author");
 my $errorcount = 0;
@@ -126,57 +136,146 @@ for my $u (@urls) {
   if (defined($selectedURLS{$u})) {
     ${selectedURLS}{$u}->{count} += 1;
   }
-  next if ($checkSelectedOnly && ! defined($selectedURLS{$u}));
+  next if ($checkSelectedOnly && !defined($selectedURLS{$u}));
   $URLScount++;
-  print "Checking '$u': ";
-  my ($res, $prnt, $outSum);
-  try {
-    $res = check_url($u, $use_curl);
-    if ($res) {
-      print "Failed\n";
-      $prnt = "";
-      $outSum = 1;
-    }
-    else {
-      $prnt = "OK\n";
-      $outSum = 0;
+  push(@testvals, {u => $u, use_curl => $use_curl,});
+}
+
+# Ready to go multitasking
+my ($vol, $dir, $file) = File::Spec->splitpath($summaryFile);
+my $tempdir   = tempdir("$dir/CounterXXXXXXX", CLEANUP => 1);
+my $countfile = "$tempdir/counter";
+my $counter   = 0;
+if (open(my $FO, '>', $countfile)) {
+  print {$FO} $counter;
+  close($FO);
+}
+else {
+  unlink($countfile);
+  die("Could not write to $countfile");
+}
+
+print "Using tempdir \"" . abs_path($tempdir) . "\"\n";
+
+my @wait = ();
+for (my $i = 0; $i < 10; $i++) {    # Number of subprocesses
+  my $pid = fork();
+  if ($pid == 0) {
+
+    # I am child
+    open(my $fe, '>:encoding(UTF-8)', "$tempdir/xxxError$i");
+    open(my $fs, '>:encoding(UTF-8)', "$tempdir/xxxSum$i");
+    while (1) {
+      open(my $fh, '+<', $countfile) or die("cannot open $countfile");
+      flock($fh, LOCK_EX)            or die "$i: Cannot lock $countfile - $!\n";
+      my $l    = <$fh>;    # get actual count number
+      my $diff = undef;
+      if (defined($testvals[$l + 150])) {
+        $diff = 5;
+      }
+      elsif (defined($testvals[$l + 50])) {
+        $diff = 3;
+      }
+      elsif (defined($testvals[$l + 20])) {
+        $diff = 2;
+      }
+      elsif (defined($testvals[$l])) {
+        $diff = 1;
+      }
+      else {
+        close($fs);
+        print $fe "NumberOfErrors $errorcount\n";
+        close($fe);
+        exit(0);
+      }
+      my $next = $l + $diff;
+      seek($fh, 0, 0);
+      truncate($fh, 0);
+      print $fh $next;
+      close($fh);
+      for (my $i = 0; $i < $diff; $i++) {
+        my $entryidx = $l + $i;
+        my $rentry   = $testvals[$entryidx];
+        next if (!defined($rentry));
+        my $u        = $rentry->{u};
+        my $use_curl = $rentry->{use_curl};
+
+        print $fe "Checking($entryidx) '$u': ";
+        my ($res, $prnt, $outSum);
+        try {
+          $res = check_url($u, $use_curl, $fe, $fs);
+          if ($res) {
+            print $fe "Failed\n";
+            $prnt   = "";
+            $outSum = 1;
+          }
+          else {
+            $prnt   = "OK\n";
+            $outSum = 0;
+          }
+        }
+        catch {
+          $prnt   = "Failed, caught error: $_\n";
+          $outSum = 1;
+          $res    = 700;
+        };
+        printx("$prnt", $outSum, $fe, $fs);
+        my $printSourceFiles = 0;
+        my $err_txt          = "Error url:";
+
+        if ($res || $checkSelectedOnly) {
+          $printSourceFiles = 1;
+        }
+        if ($res && defined($revertedURLS{$u})) {
+          $err_txt = "Failed url:";
+        }
+        $res = !$res if (defined($revertedURLS{$u}));
+        if ($res || $checkSelectedOnly) {
+          printx("$err_txt \"$u\"\n", $outSum, $fe, $fs);
+        }
+        if ($printSourceFiles) {
+          if (defined($URLS{$u})) {
+            for my $f (sort keys %{$URLS{$u}}) {
+              my $lines = ":" . join(',', @{$URLS{$u}->{$f}});
+              printx("  $f$lines\n", $outSum, $fe, $fs);
+            }
+          }
+          if ($res) {
+            $errorcount++;
+          }
+        }
+      }
     }
   }
-  catch {
-    $prnt = "Failed, caught error: $_\n";
-    $outSum = 1;
-    $res = 700;
-  };
-  printx("$prnt", $outSum);
-  my $printSourceFiles = 0;
-  my $err_txt = "Error url:";
-
-  if ($res || $checkSelectedOnly) {
-    $printSourceFiles = 1;
-  }
-  if ($res && defined($revertedURLS{$u})) {
-    $err_txt = "Failed url:";
-  }
-  $res = ! $res if (defined($revertedURLS{$u}));
-  if ($res || $checkSelectedOnly) {
-    printx("$err_txt \"$u\"\n", $outSum);
-  }
-  if ($printSourceFiles) {
-    if (defined($URLS{$u})) {
-      for my $f(sort keys %{$URLS{$u}}) {
-	my $lines = ":" . join(',', @{$URLS{$u}->{$f}});
-	printx("  $f$lines\n", $outSum);
+  $wait[$i] = $pid;
+}
+
+for (my $i = 0; $i < 10; $i++) {
+  my $p = $wait[$i];
+  if ($p > 0) {
+    waitpid($p, 0);
+    open(my $fe, '<', "$tempdir/xxxError$i");
+    while (my $l = <$fe>) {
+      if ($l =~ /^NumberOfErrors\s(\d+)/) {
+        $errorcount += $1;
+      }
+      else {
+        print $l;
       }
     }
-    if ($res ) {
-      $errorcount++;
+    close($fe);
+    open(my $fs, '<', "$tempdir/xxxSum$i");
+    while (my $l = <$fs>) {
+      print SFO $l;
     }
+    close($fs);
   }
 }
+unlink($countfile);
 
 if (%URLS) {
-  printNotUsedURLS("Ignored", %ignoredURLS);
-  printNotUsedURLS("Selected", %selectedURLS);
+  printNotUsedURLS("Ignored",      %ignoredURLS);
+  printNotUsedURLS("Selected",     %selectedURLS);
   printNotUsedURLS("KnownInvalid", %extraURLS);
 }
 
@@ -187,124 +286,119 @@ if (defined($summaryFile)) {
 exit($errorcount);
 
 ###############################################################################
-sub printx($$)
-{
-  my ($txt, $outSum) = @_;
-  print "$txt";
+sub printx($$$$) {
+  my ($txt, $outSum, $fe, $fs) = @_;
+  print $fe "$txt";
   if ($outSum && defined($summaryFile)) {
-    print SFO "$txt";
+    print $fs "$txt";
   }
 }
 
-sub printNotUsedURLS($\%)
-{
+sub printNotUsedURLS($\%) {
   my ($txt, $rURLS) = @_;
   my @msg = ();
-  for my $u ( sort keys %{$rURLS}) {
+  for my $u (sort keys %{$rURLS}) {
     if ($rURLS->{$u}->{count} < 2) {
       my @submsg = ();
       for my $f (sort keys %{$rURLS->{$u}}) {
-	next if ($f eq "count");
-	push(@submsg, "$f:" . $rURLS->{$u}->{$f});
+        next if ($f eq "count");
+        push(@submsg, "$f:" . $rURLS->{$u}->{$f});
       }
       push(@msg, "\n  $u\n    " . join("\n    ", @submsg) . "\n");
     }
   }
   if (@msg) {
-    print "\n$txt URLs not found in sources: " . join(' ',@msg) . "\n";
+    print "\n$txt URLs not found in sources: " . join(' ', @msg) . "\n";
   }
 }
 
-sub replaceSpecialChar($)
-{
+sub replaceSpecialChar($) {
   my ($l) = @_;
   $l =~ s/\\SpecialChar(NoPassThru)?\s*(TeX|LaTeX|LyX)[\s]?/\2/;
-  return($l);
+  return ($l);
 }
 
-sub readUrls($\%)
-{
+sub readUrls($\%) {
   my ($file, $rUrls) = @_;
 
-  die("Could not read file $file") if (! open(ULIST, $file));
+  die("Could not read file $file") if (!open(ULIST, $file));
   my $line = 0;
   while (my $l = <ULIST>) {
     $line++;
-    $l =~ s/[\r\n]+$//;		# remove eol
-    $l =~ s/\s*\#.*$//;		# remove comment
+    $l =~ s/[\r\n]+$//;    # remove eol
+    $l =~ s/\s*\#.*$//;    # remove comment
     $l = &replaceSpecialChar($l);
     next if ($l eq "");
     my $use_curl = 0;
     if ($l =~ s/^\s*UseCurl\s*//) {
       $use_curl = 1;
     }
-    if (! defined($rUrls->{$l} )) {
+    if (!defined($rUrls->{$l})) {
       $rUrls->{$l} = {$file => $line, count => 1, use_curl => $use_curl};
     }
   }
   close(ULIST);
 }
 
-sub parse_file($)
-{
-  my($f) = @_;
-  my $status = "out";		# outside of URL/href
+sub parse_file($) {
+  my ($f) = @_;
+  my $status = "out";    # outside of URL/href
 
   return if ($f =~ /\/attic\//);
-  if(open(FI, $f)) {
+  if (open(FI, $f)) {
     my $line = 0;
-    while(my $l = <FI>) {
+    while (my $l = <FI>) {
       $line++;
-      $l =~ s/[\r\n]+$//;	#  Simulate chomp
+      $l =~ s/[\r\n]+$//;    #  Simulate chomp
       if ($status eq "out") {
-	# searching for "\begin_inset Flex URL"
-	if($l =~ /^\s*\\begin_inset\s+Flex\s+URL\s*$/) {
-	  $status = "inUrlInset";
-	}
-	elsif ($l =~ /^\s*\\begin_inset\s+CommandInset\s+href\s*$/) {
-	  $status = "inHrefInset";
-	}
-	else {
-	  # Outside of url, check also
-	  if ($l =~ /"((ftp|http|https):\/\/[^ ]+)"/) {
-	    my $url = $1;
-	    handle_url($url, $f, "x$line");
-	  }
-	}
+
+        # searching for "\begin_inset Flex URL"
+        if ($l =~ /^\s*\\begin_inset\s+Flex\s+URL\s*$/) {
+          $status = "inUrlInset";
+        }
+        elsif ($l =~ /^\s*\\begin_inset\s+CommandInset\s+href\s*$/) {
+          $status = "inHrefInset";
+        }
+        else {
+          # Outside of url, check also
+          if ($l =~ /"((ftp|http|https):\/\/[^ ]+)"/) {
+            my $url = $1;
+            handle_url($url, $f, "x$line");
+          }
+        }
       }
       else {
-	if($l =~ /^\s*\\end_(layout|inset)\s*$/) {
-	  $status = "out";
-	}
-	elsif ($status eq "inUrlInset") {
-	  if ($l =~ /\s*([a-z]+:\/\/.+)\s*$/) {
-	    my $url = $1;
-	    $status = "out";
-	    handle_url($url, $f, "u$line");
-	  }
-	}
-	elsif ($status eq "inHrefInset") {
-	  if ($l =~ /^target\s+"([a-z]+:\/\/[^ ]+)"$/) {
-	    my $url = $1;
-	    $status = "out";
-	    handle_url($url, $f, "h$line");
-	  }
-	}
+        if ($l =~ /^\s*\\end_(layout|inset)\s*$/) {
+          $status = "out";
+        }
+        elsif ($status eq "inUrlInset") {
+          if ($l =~ /\s*([a-z]+:\/\/.+)\s*$/) {
+            my $url = $1;
+            $status = "out";
+            handle_url($url, $f, "u$line");
+          }
+        }
+        elsif ($status eq "inHrefInset") {
+          if ($l =~ /^target\s+"([a-z]+:\/\/[^ ]+)"$/) {
+            my $url = $1;
+            $status = "out";
+            handle_url($url, $f, "h$line");
+          }
+        }
       }
     }
     close(FI);
   }
 }
 
-sub handle_url($$$)
-{
-  my($url, $f, $line) = @_;
+sub handle_url($$$) {
+  my ($url, $f, $line) = @_;
 
   $url = &replaceSpecialChar($url);
-  if(!defined($URLS{$url})) {
+  if (!defined($URLS{$url})) {
     $URLS{$url} = {};
   }
-  if(!defined($URLS{$url}->{$f})) {
+  if (!defined($URLS{$url}->{$f})) {
     $URLS{$url}->{$f} = [];
   }
   push(@{$URLS{$url}->{$f}}, $line);
-- 
2.39.5