FindAdv: Fix interpretation of match-results

[features.git] / src / lyxfind.cpp
diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp

index dfa1aa8034c5e23eb3e50bbe8ee35fb124b53481..0b252f3a838cc3b3295428c2e45a8d85f5cbeab1 100644 (file)
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -52,7 +52,7 @@
  #include <map>
  #include <regex>
  
-//#define ResultsDebug
+#define ResultsDebug
  #define USE_QT_FOR_SEARCH
  #if defined(USE_QT_FOR_SEARCH)
         #include <QtCore>       // sets QT_VERSION
@@ -71,6 +71,8 @@ using namespace lyx::support;
  
  namespace lyx {
  
+typedef map<string, string> AccentsMap;
+static AccentsMap accents = map<string, string>();
  
  // Helper class for deciding what should be ignored
  class IgnoreFormats {
@@ -638,101 +640,6 @@ namespace {
  
  typedef vector<pair<string, string> > Escapes;
  
-/// A map of symbols and their escaped equivalent needed within a regex.
-/// @note Beware of order
-/*
-Escapes const & get_regexp_escapes()
-{
-       typedef std::pair<std::string, std::string> P;
-
-       static Escapes escape_map;
-       if (escape_map.empty()) {
-               escape_map.push_back(P("$", "_x_$"));
-               escape_map.push_back(P("{", "_x_{"));
-               escape_map.push_back(P("}", "_x_}"));
-               escape_map.push_back(P("[", "_x_["));
-               escape_map.push_back(P("]", "_x_]"));
-               escape_map.push_back(P("(", "_x_("));
-               escape_map.push_back(P(")", "_x_)"));
-               escape_map.push_back(P("+", "_x_+"));
-               escape_map.push_back(P("*", "_x_*"));
-               escape_map.push_back(P(".", "_x_."));
-               escape_map.push_back(P("\\", "(?:\\\\|\\\\backslash)"));
-               escape_map.push_back(P("~", "(?:\\\\textasciitilde|\\\\sim)"));
-               escape_map.push_back(P("^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\textasciicircum|\\\\mathcircumflex)"));
-               escape_map.push_back(P("_x_", "\\"));
-       }
-       return escape_map;
-}
-*/
-
-/// A map of lyx escaped strings and their unescaped equivalent.
-/*
-Escapes const & get_lyx_unescapes()
-{
-       typedef std::pair<std::string, std::string> P;
-
-       static Escapes escape_map;
-       if (escape_map.empty()) {
-               escape_map.push_back(P("\\%", "%"));
-               escape_map.push_back(P("\\{", "{"));
-               escape_map.push_back(P("\\}", "}"));
-               escape_map.push_back(P("\\mathcircumflex ", "^"));
-               escape_map.push_back(P("\\mathcircumflex", "^"));
-               escape_map.push_back(P("\\backslash ", "\\"));
-               escape_map.push_back(P("\\backslash", "\\"));
-               escape_map.push_back(P("\\sim ", "~"));
-               escape_map.push_back(P("\\sim", "~"));
-       }
-       return escape_map;
-}
-*/
-
-/// A map of escapes turning a regexp matching text to one matching latex.
-/*
-Escapes const & get_regexp_latex_escapes()
-{
-       typedef std::pair<std::string, std::string> P;
-
-       static Escapes escape_map;
-       if (escape_map.empty()) {
-               escape_map.push_back(P("\\\\", "(?:\\\\\\\\|\\\\backslash|\\\\textbackslash\\{\\}|\\\\textbackslash)"));
-               escape_map.push_back(P("(<?!\\\\\\\\textbackslash)\\{", "\\\\\\{"));
-               escape_map.push_back(P("(<?!\\\\\\\\textbackslash\\\\\\{)\\}", "\\\\\\}"));
-               escape_map.push_back(P("\\[", "\\{\\[\\}"));
-               escape_map.push_back(P("\\]", "\\{\\]\\}"));
-               escape_map.push_back(P("\\^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\textasciicircum|\\\\mathcircumflex)"));
-               escape_map.push_back(P("%", "\\\\\\%"));
-               escape_map.push_back(P("#", "\\\\#"));
-       }
-       return escape_map;
-}
-*/
-
-/** @todo Probably the maps need to be migrated to regexps, in order to distinguish if
- ** the found occurrence were escaped.
- **/
-/*
-string apply_escapes(string s, Escapes const & escape_map)
-{
-       LYXERR(Debug::FIND, "Escaping: '" << s << "'");
-       Escapes::const_iterator it;
-       for (it = escape_map.begin(); it != escape_map.end(); ++it) {
-//             LYXERR(Debug::FIND, "Escaping " << it->first << " as " << it->second);
-               unsigned int pos = 0;
-               while (pos < s.length() && (pos = s.find(it->first, pos)) < s.length()) {
-                       s.replace(pos, it->first.length(), it->second);
-                       LYXERR(Debug::FIND, "After escape: " << s);
-                       pos += it->second.length();
-//                     LYXERR(Debug::FIND, "pos: " << pos);
-               }
-       }
-       LYXERR(Debug::FIND, "Escaped : '" << s << "'");
-       return s;
-}
-*/
-
-
  string string2regex(string in)
  {
         static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\$\\])" };
@@ -758,7 +665,7 @@ string string2regex(string in)
         return temp2;
  }
  
-string correctRegex(string t)
+string correctRegex(string t, bool withformat)
  {
         /* Convert \backslash => \
          * and \{, \}, \[, \] => {, }, [, ]
@@ -767,6 +674,7 @@ string correctRegex(string t)
         regex wordre("(\\\\)*(\\\\((backslash|mathcircumflex) ?|[\\[\\]\\{\\}]))");
         size_t lastpos = 0;
         smatch sub;
+       bool backslashed = false;
         for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) {
                 sub = *it;
                 string replace;
@@ -774,10 +682,31 @@ string correctRegex(string t)
                         continue;
                 }
                 else {
-                       if (sub.str(4) == "backslash")
+                       if (sub.str(4) == "backslash") {
                                 replace = "\\";
+                               if (withformat) {
+                                       // transforms '\backslash \{' into '\{'
+                                       // and '\{' into '{'
+                                       string next = t.substr(sub.position(2) + sub.str(2).length(), 2);
+                                       if ((next == "\\{") || (next == "\\}")) {
+                                               replace = "";
+                                               backslashed = true;
+                                       }
+                               }
+                       }
                         else if (sub.str(4) == "mathcircumflex")
                                 replace = "^";
+                       else if (backslashed) {
+                               backslashed = false;
+                               if (withformat && (sub.str(3) == "{"))
+                                       replace = accents["braceleft"];
+                               else if (withformat && (sub.str(3) == "}"))
+                                       replace = accents["braceright"];
+                               else {
+                                       // else part should not exist
+                                       LASSERT(1, /**/);
+                               }
+                       }
                         else
                                 replace = sub.str(3);
                 }
@@ -796,7 +725,7 @@ string correctRegex(string t)
  /// Within \regexp{} apply get_lyx_unescapes() only (i.e., preserve regexp semantics of the string),
  /// while outside apply get_lyx_unescapes()+get_regexp_escapes().
  /// If match_latex is true, then apply regexp_latex_escapes() to \regexp{} contents as well.
-string escape_for_regex(string s)
+string escape_for_regex(string s, bool withformat)
  {
         size_t lastpos = 0;
         string result = "";
@@ -812,7 +741,7 @@ string escape_for_regex(string s)
                                 break;
                 }
                 size_t end_pos = s.find("\\endregexp{}}", regex_pos + 8);
-               result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)));
+               result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)), withformat);
                 lastpos = end_pos + 13;
         }
         return result;
@@ -833,62 +762,6 @@ bool regex_replace(string const & s, string & t, string const & searchstr,
         return rv;
  }
  
-#if 0
-/** Checks if supplied string segment is well-formed from the standpoint of matching open-closed braces.
- **
- ** Verify that closed braces exactly match open braces. This avoids that, for example,
- ** \frac{.*}{x} matches \frac{x+\frac{y}{x}}{z} with .* being 'x+\frac{y'.
- **
- ** @param unmatched
- ** Number of open braces that must remain open at the end for the verification to succeed.
- **/
-#if QTSEARCH
-bool braces_match(QString const & beg,
-                 int unmatched = 0)
-#else
-bool braces_match(string const & beg,
-               int unmatched = 0)
-#endif
-{
-       int open_pars = 0;
-#if QTSEARCH
-       LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg.toStdString() << "'");
-#else
-       LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg << "'");
-#endif
-       int lastidx = beg.size();
-       for (int i=0; i < lastidx; ++i) {
-               // Skip escaped braces in the count
-#if QTSEARCH
-               QChar c = beg.at(i);
-#else
-               char c = beg.at(i);
-#endif
-               if (c == '\\') {
-                       ++i;
-                       if (i >= lastidx)
-                               break;
-               } else if (c == '{') {
-                       ++open_pars;
-               } else if (c == '}') {
-                       if (open_pars == 0) {
-                               LYXERR(Debug::FIND, "Found unmatched closed brace");
-                               return false;
-                       } else
-                               --open_pars;
-               }
-       }
-       if (open_pars != unmatched) {
-               LYXERR(Debug::FIND, "Found " << open_pars
-                      << " instead of " << unmatched
-                      << " unmatched open braces at the end of count");
-               return false;
-       }
-       LYXERR(Debug::FIND, "Braces match as expected");
-       return true;
-}
-#endif
-
  class MatchResult {
  public:
         enum range {
@@ -902,22 +775,21 @@ public:
         int pos;
         int leadsize;
         int pos_len;
+       int searched_size;
         vector <string> result = vector <string>();
-       MatchResult(int len = 0): match_len(len),match_prefix(0),match2end(0), pos(0),leadsize(0),pos_len(-1) {};
+       MatchResult(int len = 0): match_len(len),match_prefix(0),match2end(0), pos(0),leadsize(0),pos_len(-1),searched_size(0) {};
  };
  
  static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres)
  {
-  int range = oldres.match_len;
-  if (range > 0) range--;
-  if (newres.match2end < oldres.match2end - oldres.match_len)
+  if (newres.match2end < oldres.match2end)
      return MatchResult::newIsTooFar;
    if (newres.match_len < oldres.match_len)
      return MatchResult::newIsTooFar;
-  if ((newres.match_len == oldres.match_len) &&
-      (newres.match2end < oldres.match2end + range) &&
-      (newres.match2end > oldres.match2end - range)) {
-    return MatchResult::newIsBetter;
+
+  if (newres.match_len == oldres.match_len) {
+    if (newres.match2end == oldres.match2end)
+      return MatchResult::newIsBetter;
    }
    return MatchResult::newIsInvalid;
  }
@@ -1382,9 +1254,6 @@ void Intervall::addIntervall(int low, int upper)
    }
  }
  
-typedef map<string, string> AccentsMap;
-static AccentsMap accents = map<string, string>();
-
  static void buildaccent(string n, string param, string values)
  {
    stringstream s(n);
@@ -1483,6 +1352,8 @@ static void buildAccentsMap()
    accents["latexe"]        = getutf8(0xf0013);
    accents["LaTeXe"]        = getutf8(0xf0013);
    accents["lyxarrow"]      = getutf8(0xf0020);
+  accents["braceleft"]     = getutf8(0xf0030);
+  accents["braceright"]    = getutf8(0xf0031);
    accents["backslash lyx"]           = getutf8(0xf0010);       // Used logos inserted with starting \backslash
    accents["backslash LyX"]           = getutf8(0xf0010);
    accents["backslash tex"]           = getutf8(0xf0011);
@@ -1557,7 +1428,7 @@ void Intervall::removeAccents()
      buildAccentsMap();
    static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|"
           "cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}"
-      "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|guillemot(left|right)|textasciicircum|mathcircumflex|sim)(?![a-zA-Z]))");
+      "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|(brace|guillemot)(left|right)|textasciicircum|mathcircumflex|sim)(?![a-zA-Z]))");
    smatch sub;
    for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
      sub = *itacc;
@@ -2573,7 +2444,12 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual)
      case KeyInfo::doRemove: {
        // Remove the key with all parameters and following spaces
        size_t pos;
-      for (pos = actual._dataEnd+1; pos < interval_.par.length(); pos++) {
+      size_t start;
+      if (interval_.par[actual._dataEnd-1] == ' ')
+        start = actual._dataEnd;
+      else
+        start = actual._dataEnd+1;
+      for (pos = start; pos < interval_.par.length(); pos++) {
          if ((interval_.par[pos] != ' ') && (interval_.par[pos] != '%'))
            break;
        }
@@ -2896,7 +2772,7 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with
                 // can be digested by our search engine
                 LYXERR(Debug::FIND, "input: \"" << par << "\"");
                 result = splitOnKnownMacros(par.substr(0,parlen), isPatternString);
-               LYXERR(Debug::FIND, "After split: \"" << result << "\"");
+               LYXERR(Debug::FIND, "After splitOnKnownMacros:\n\"" << result << "\"");
         }
         else
                 result = par.substr(0, parlen);
@@ -2925,6 +2801,7 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with
                                 return "";
                         }
                 }
+
         }
         else {
                 // LYXERR(Debug::INFO, "No regex formats");
@@ -3030,8 +2907,11 @@ static void modifyRegexForMatchWord(string &t)
                 s += "\\S";
                 lastpos = sub.position(2) + sub.length(2);
         }
-       if (lastpos == 0)
+       if (lastpos == 0) {
+               s = "\\b" + t + "\\b";
+               t = s;
                 return;
+       }
         else if (lastpos < t.length())
                 s += t.substr(lastpos, t.length() - lastpos);
        t = "\\b" + s + "\\b";
@@ -3105,27 +2985,11 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
                         LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'");
                         LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
                 }
-               LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'");
-               par_as_string = escape_for_regex(par_as_string);
+               // LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'");
+               par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat);
                 // Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them.
-               LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
-               LYXERR(Debug::FIND, "par_as_string after correctRegex is '" << par_as_string << "'");
+               // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
                 ++close_wildcards;
-               /*
-               if (
-                       // Insert .* before trailing '\$' ('$' has been escaped by escape_for_regex)
-                       regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\$)\\'", "$1(.*?)$2")
-                       // Insert .* before trailing '\\\]' ('\]' has been escaped by escape_for_regex)
-                       || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])( \\\\\\\\\\\\\\])\\'", "$1(.*?)$2")
-                       // Insert .* before trailing '\\end\{...}' ('\end{...}' has been escaped by escape_for_regex)
-                       || regex_replace(par_as_string, par_as_string,
-                                        "(.*[^\\\\])( \\\\\\\\end\\\\\\{[a-zA-Z_]*)(\\\\\\*)?(\\\\\\})\\'", "$1(.*?)$2$3$4")
-                       // Insert .* before trailing '\}' ('}' has been escaped by escape_for_regex)
-                       || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\})\\'", "$1(.*?)$2")
-                       ) {
-                       ++close_wildcards;
-               }
-               */
                 size_t lng = par_as_string.size();
                 if (!opt.ignoreformat) {
                         // Remove extra '\}' at end if not part of \{\.\}
@@ -3144,14 +3008,15 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
                         if (lng < par_as_string.size())
                                 par_as_string = par_as_string.substr(0,lng);
                 }
+               LYXERR(Debug::FIND, "par_as_string after correctRegex is '" << par_as_string << "'");
                 if ((lng > 0) && (par_as_string[0] == '^')) {
                         par_as_string = par_as_string.substr(1);
                         --lng;
                         opt.matchstart = true;
                 }
-               LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
-               LYXERR(Debug::FIND, "Open braces: " << open_braces);
-               LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string);
+               // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
+               // LYXERR(Debug::FIND, "Open braces: " << open_braces);
+               // LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string);
  
                 // If entered regexp must match at begin of searched string buffer
                 // Kornel: Added parentheses to use $1 for size of the leading string
@@ -3166,13 +3031,6 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
                                 string dest = "\\" + std::to_string(i+2);
                                 while (regex_replace(par_as_string, par_as_string, orig, dest));
                         }
-                       /* opt.matchword is ignored if using regex
-                         so expanding par_as_string with "\\b" seems appropriate here
-                         if regex contains for instance '.*' or '.+'
-                         1.) Nothing to do, if 'par_as_string' contains "\\b" already.
-                             (Means, that the user knows how to handle whole words
-                         2.) else replace '.' with "\\S" and wrap the regex with "\\b"
-                       */
                         if (opt.matchword) {
                                 modifyRegexForMatchWord(par_as_string);
                                 opt.matchword = false;
@@ -3186,93 +3044,37 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
         }
  }
  
-#if 0
-// Count number of characters in string
-// {]} ==> 1
-// \&  ==> 1
-// --- ==> 1
-// \\[a-zA-Z]+ ==> 1
-#if QTSEARCH
-static int computeSize(QStringRef s, int len)
-#define isLyxAlpha(arg) arg.isLetter()
-#else
-static int computeSize(string s, int len)
-#define isLyxAlpha(arg) isalpha(arg)
-#endif
-{
-       if (len == 0)
-               return 0;
-       int skip = 1;
-       int count = 0;
-       for (int i = 0; i < len; i += skip, count++) {
-               if (s.at(i) == '\\') {
-                       skip = 2;
-                       if (i + 1 < len && isLyxAlpha(s.at(i+1))) {
-                               for (int j = 2;  i+j < len; j++) {
-                                       if (! isLyxAlpha(s.at(i+j))) {
-                                               if (s.at(i+j) == ' ')
-                                                       skip++;
-                                               else if (s.at(i+j) == '{') {
-                                                       if (i+j+1 < len && s.at(i+j+1) == '}')
-                                                               skip += 2;
-                                                       else if (i + j + 1 >= len)
-                                                               skip++;
-                                               }
-                                               break;
-                                       }
-                                       skip++;
-                               }
-                       }
-               }
-               else if (s.at(i) == '{') {
-                       if (i + 1 < len && s.at(i+1) == '}')
-                               skip = 2;
-                       else
-                               skip = 3;
-               }
-               else if (s.at(i) == '-') {
-                       if (i+1 < len && s.at(i+1) == '-') {
-                               if (i + 2 < len && s.at(i+2) == '-')
-                                       skip = 3;
-                               else
-                                       skip = 2;
-                       }
-                       else
-                               skip = 1;
-               }
-               else {
-                       skip = 1;
-               }
-       }
-       return count;
-}
-#endif
-
  MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const
  {
         MatchResult mres;
  
+       mres.searched_size = len;
         if (at_begin &&
                 (opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) )
                 return mres;
  
         docstring docstr = stringifyFromForSearch(opt, cur, len);
         string str;
-       if (use_regexp || opt.casesensitive)
-               str = normalize(docstr);
-       else
-               str = normalize(lowercase(docstr));
+       str = normalize(docstr);
         if (!opt.ignoreformat) {
                 str = correctlanguagesetting(str, false, !opt.ignoreformat);
+               // remove closing '}' and '\n' to allow for use of '$' in regex
+               size_t lng = str.size();
+               while ((lng > 1) && ((str[lng -1] == '}') || (str[lng -1] == '\n')))
+                       lng--;
+               if (lng != str.size()) {
+                       str = str.substr(0, lng);
+               }
         }
         if (str.empty()) {
                 mres.match_len = -1;
                 return mres;
         }
-       LYXERR(Debug::FIND, "Matching against     '" << lyx::to_utf8(docstr) << "'");
-       LYXERR(Debug::FIND, "After normalization: '" << str << "'");
+       LYXERR(Debug::FIND, "After normalization: Matching against:\n'" << str << "'");
  
-       if (use_regexp) {
+       LASSERT(use_regexp, /**/);
+       {
+               // use_regexp always true
                 LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin);
  #if QTSEARCH
                 QString qstr = QString::fromStdString(str);
@@ -3396,34 +3198,6 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
  #endif
                 return mres;
         }
-
-       // else !use_regexp: but all code paths above return
-       LYXERR(Debug::FIND, "Searching in normal mode: par_as_string='"
-                                << par_as_string << "', str='" << str << "'");
-       LYXERR(Debug::FIND, "Searching in normal mode: lead_as_string='"
-                                << lead_as_string << "', par_as_string_nolead='"
-                                << par_as_string_nolead << "'");
-
-       if (at_begin) {
-               LYXERR(Debug::FIND, "size=" << par_as_string.size()
-                                        << ", substr='" << str.substr(0, par_as_string.size()) << "'");
-               if (str.substr(0, par_as_string.size()) == par_as_string) {
-                       mres.match_len = par_as_string.size();
-                       mres.match2end = str.size();
-                       mres.pos = 0;
-                       return mres;
-               }
-       } else {
-               // Start the search _after_ the leading part
-               size_t pos = str.find(par_as_string_nolead, lead_as_string.size());
-               if (pos != string::npos) {
-                       mres.match_len = par_as_string.size();
-                       mres.match2end = str.size() - pos;
-                       mres.pos = pos;
-                       return mres;
-               }
-       }
-       return mres;
  }
  
  
@@ -3444,43 +3218,6 @@ MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at
         }
         else
                 return mres;
-       /* DEAD CODE follows
-       if (res == 0 || !at_begin || !opt.matchword || !cur.inTexted())
-               return mres;
-       if ((len > 0) && (res < len)) {
-         mres.match_len = 0;
-         return mres;
-       }
-       Paragraph const & par = cur.paragraph();
-       bool ws_left = (cur.pos() > 0)
-               ? par.isWordSeparator(cur.pos() - 1)
-               : true;
-       bool ws_right;
-       if (len < 0)
-               ws_right = true;
-       else {
-               ws_right = (cur.pos() + len < par.size())
-               ? par.isWordSeparator(cur.pos() + len)
-               : true;
-       }
-       LYXERR(Debug::FIND,
-              "cur.pos()=" << cur.pos() << ", res=" << res
-              << ", separ: " << ws_left << ", " << ws_right
-              << ", len: " << len
-              << endl);
-       if (ws_left && ws_right) {
-         // Check for word separators inside the found 'word'
-         for (int i = 0; i < len; i++) {
-           if (par.isWordSeparator(cur.pos() + i)) {
-             mres.match_len = 0;
-             return mres;
-           }
-         }
-         return mres;
-       }
-       mres.match_len = 0;
-       return mres;
-       */
  }
  
  #if 0
@@ -3542,7 +3279,7 @@ string MatchStringAdv::normalize(docstring const & s) const
         // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify
         // Kornel: Added textsl, textsf, textit, texttt and noun
         // + allow to seach for colored text too
-       LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, \\*section{} macros from: " << t);
+       LYXERR(Debug::FIND, "Removing stale empty macros from: " << t);
         while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", ""))
                 LYXERR(Debug::FIND, "  further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
         while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
@@ -3601,9 +3338,11 @@ docstring stringifyFromCursor(DocIterator const & cur, int len)
   */
  docstring latexifyFromCursor(DocIterator const & cur, int len)
  {
+       /*
         LYXERR(Debug::FIND, "Latexifying with len=" << len << " from cursor at pos: " << cur);
         LYXERR(Debug::FIND, "  with cur.lastpost=" << cur.lastpos() << ", cur.lastrow="
                << cur.lastrow() << ", cur.lastcol=" << cur.lastcol());
+       */
         Buffer const & buf = *cur.buffer();
  
         odocstringstream ods;
@@ -3670,54 +3409,37 @@ docstring latexifyFromCursor(DocIterator const & cur, int len)
  
  #if defined(ResultsDebug)
  // Debugging output
-static void displayMResult(MatchResult &mres, int increment, string from)
+static void displayMResult(MatchResult &mres, string from, DocIterator & cur)
  {
-  LYXERR0( "from:\t\t\t" << from);
-  LYXERR0( "pos: " << mres.pos << " increment " << increment);
-  LYXERR0( "leadsize: " << mres.leadsize);
-  LYXERR0( "match_len: " << mres.match_len);
-  LYXERR0( "match_prefix: " << mres.match_prefix);
-  LYXERR0( "match2end: " << mres.match2end);
-  LYXERR0( "pos_len: " << mres.pos_len);       // Set in finalize
-  for (size_t i = 0; i < mres.result.size(); i++)
-    LYXERR0( "Match " << i << " = \"" << mres.result[i] << "\"");
-}
-       #define displayMres(s,i, txt) displayMResult(s,i, xtx);
-#else
-       #define displayMres(s,i, txt)
-#endif
-
-/*
- * Not good, we miss possible matches containing also characters not found in
- * the innermost depth.
-static bool findAdvForwardInnermost(DocIterator & cur)
-{
-       size_t d;
-       DocIterator old_cur = cur;
-       int forwardCount = 0;
-       do {
-               d = cur.depth();
-               old_cur = cur;
-               cur.forwardPos();
-               if (!cur) {
-                       break;
-               }
-               if (cur.depth() > d) {
-                       forwardCount++;
-                       continue;
+       LYXERR0( "from:\t\t\t" << from);
+       string status;
+       if (mres.pos_len > 0) {
+               // Set in finalize
+               status = "FINALSEARCH";
+       }
+       else {
+               if (mres.match_len > 0) {
+                       if ((mres.match_prefix == 0) && (mres.pos == mres.leadsize))
+                               status = "Good Match";
+                       else
+                               status = "Matched in";
                 }
-               if (cur.depth() == d)
-                       break;
-       } while(1);
-       cur = old_cur;
-       if (forwardCount > 0) {
-               LYXERR(Debug::FIND, "Forwarded " << forwardCount << " step(s) (searching for innermost match)");
-               return true;;
+               else
+                       status = "MissedSearch";
         }
-       else
-               return false;
+
+       LYXERR0( status << "(" << cur.pos() << " ... " << mres.searched_size + cur.pos() << ") cur.lastpos(" << cur.lastpos() << ")");
+       if ((mres.leadsize > 0) || (mres.match_len > 0) || (mres.match2end > 0))
+               LYXERR0( "leadsize(" << mres.leadsize << ") match_len(" << mres.match_len << ") match2end(" << mres.match2end << ")");
+       if ((mres.pos > 0) || (mres.match_prefix > 0))
+               LYXERR0( "pos(" << mres.pos << ") match_prefix(" << mres.match_prefix << ")");
+       for (size_t i = 0; i < mres.result.size(); i++)
+               LYXERR0( "Match " << i << " = \"" << mres.result[i] << "\"");
  }
-*/
+       #define displayMres(s, txt, cur) displayMResult(s, txt, cur);
+#else
+       #define displayMres(s, txt, cur)
+#endif
  
  /** Finalize an advanced find operation, advancing the cursor to the innermost
   ** position that matches, plus computing the length of the matching text to
@@ -3738,8 +3460,9 @@ MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Ma
         // so the search for "www" gives prefix_len = 7 (== sizeof("http://")
         // and although we search for only 3 chars, we find the whole hyperlink inset
         bool at_begin = (expected.match_prefix == 0);
-       //if (findAdvForwardInnermost(cur)) {
-       if (expected.match_len > 0) {
+       LASSERT(at_begin, /**/);
+       if (expected.match_len > 0 && at_begin) {
+               // Search for deepest match
                 old_cur = cur;
                 max_match = expected;
                 do {
@@ -3749,16 +3472,21 @@ MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Ma
                                 break;
                         if (cur.depth() < d)
                                 break;
-                       if ((cur.depth() == d) && at_begin)
+                       if (cur.depth() == d)
                                 break;
-                       mres = match(cur, -1, at_begin);
-                       displayMres(mres, -1, "Checking innermost");
+                       size_t lastd = d;
+                       while (cur && cur.depth() > lastd) {
+                               lastd = cur.depth();
+                               mres = match(cur, -1, at_begin);
+                               displayMres(mres, "Checking innermost", cur);
+                               if (mres.match_len > 0)
+                                       break;
+                               // maybe deeper?
+                               cur.forwardPos();
+                       }
                         if (mres.match_len < expected.match_len)
                                 break;
-                       if (!at_begin && (mres.match_prefix > max_match.match_prefix))
-                               break;
                         max_match = mres;
-                       at_begin = (max_match.match_prefix == 0);
                         old_cur = cur;;
                 } while(1);
                 cur = old_cur;
@@ -3766,7 +3494,7 @@ MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Ma
         else {
                 // (expected.match_len <= 0)
                 mres = match(cur);      /* match valid only if not searching whole words */
-               displayMres(mres, 0, "Start with negative match");
+               displayMres(mres, "Start with negative match", cur);
                 max_match = mres;
         }
         if (max_match.match_len <= 0) return fail;
@@ -3776,43 +3504,27 @@ MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Ma
         int len = 1;
         if (cur.pos() + len > cur.lastpos())
           return fail;
-       // regexp should use \w+, \S+, or \b(some string)\b
-       // to search for whole words
-       if (match.opt.matchword && !match.use_regexp) {
-         LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
-         while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) {
-           ++len;
-           LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
-         }
-         // Length of matched text (different from len param)
-         static MatchResult old_match = match(cur, len, at_begin);
-         if (old_match.match_len < 0)
-           old_match = fail;
-         MatchResult new_match;
-         // Greedy behaviour while matching regexps
-         while ((new_match = match(cur, len + 1, at_begin)).match_len > old_match.match_len) {
-           ++len;
-           old_match = new_match;
-           LYXERR(Debug::FIND, "verifying   match with len = " << len);
-         }
-         return old_match;
-       }
-       else {
+
+       LASSERT(match.use_regexp, /**/);
+       {
            int minl = 1;
            int maxl = cur.lastpos() - cur.pos();
            // Greedy behaviour while matching regexps
            while (maxl > minl) {
              MatchResult mres2;
              mres2 = match(cur, len, at_begin);
-            displayMres(mres2, len, "Finalize loop");
-            int actual_match = mres2.match_len;
-            if (actual_match >= max_match.match_len) {
-              // actual_match > max_match _can_ happen,
+            displayMres(mres2, "Finalize loop", cur);
+            int actual_match_len = mres2.match_len;
+            if (actual_match_len >= max_match.match_len) {
+              // actual_match_len > max_match _can_ happen,
                // if the search area splits
                // some following word so that the regex
                // (e.g. 'r.*r\b' matches 'r' from the middle of the
                // splitted word)
                // This means, the len value is too big
+             actual_match_len = max_match.match_len;
+             max_match = mres2;
+             max_match.match_len = actual_match_len;
                maxl = len;
                if (maxl - minl < 4)
                  len = (int)((maxl + minl)/2);
@@ -3820,11 +3532,12 @@ MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Ma
                  len = (int)(minl + (maxl - minl + 3)/4);
              }
              else {
-              // (actual_match < max_match)
+              // (actual_match_len < max_match.match_len)
                minl = len + 1;
                len = (int)((maxl + minl)/2);
              }
            }
+         len = minl;
            old_cur = cur;
            // Search for real start of matched characters
            while (len > 1) {
@@ -3844,6 +3557,7 @@ MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Ma
                  // Ha, got it! The shorter selection has the same match length
                  len--;
                  old_cur = cur;
+               max_match = actual_match;
                }
                else {
                  // OK, the shorter selection matches less chars, revert to previous value
@@ -3864,6 +3578,7 @@ MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Ma
              return fail;
            else {
              max_match.pos_len = len;
+           displayMres(max_match, "SEARCH RESULT", cur)
              return max_match;
            }
          }
@@ -3874,28 +3589,52 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
  {
         if (!cur)
                 return 0;
+       bool repeat = false;
+       DocIterator orig_cur;   // to be used if repeat not successful
+       MatchResult orig_mres;
         while (!theApp()->longOperationCancelled() && cur) {
                 //(void) findAdvForwardInnermost(cur);
                 LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
                 MatchResult mres = match(cur, -1, false);
-               displayMres(mres,-1, "Starting findForwardAdv")
+               string msg = "Starting";
+               if (repeat)
+                       msg = "Repeated";
+               displayMres(mres, msg + " findForwardAdv", cur)
                 int match_len = mres.match_len;
                 if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) {
                         LYXERR(Debug::INFO, "BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end);
                         match_len = 0;
                 }
-               if (match_len > 0) {
+               if (match_len <= 0) {
+                       // This should exit nested insets, if any, or otherwise undefine the currsor.
+                       cur.pos() = cur.lastpos();
+                       LYXERR(Debug::FIND, "Advancing pos: cur=" << cur);
+                       cur.forwardPos();
+               }
+               else {  // match_len > 0
                         // Try to find the begin of searched string
                         int increment;
                         int firstInvalid = 100000;
-                       if (mres.match_prefix + mres.pos - mres.leadsize > 1)
-                         increment = (mres.match_prefix + mres.pos - mres.leadsize + 1)*3/4;
-                       else
-                         increment = 1;
+                       {
+                               int incrmatch = (mres.match_prefix + mres.pos - mres.leadsize + 1)*3/4;
+                               int incrcur = (cur.lastpos() - cur.pos() + 1 )*3/4;
+                               if (incrcur < incrmatch)
+                                       increment = incrcur;
+                               else
+                                       increment = incrmatch;
+                               if (increment < 1)
+                                       increment = 1;
+                       }
                         LYXERR(Debug::FIND, "Set increment to " << increment);
                         while (increment > 0) {
                                 DocIterator old_cur = cur;
-                               for (int i = 0; i < increment && cur; cur.forwardPos(), i++) {
+                               size_t skipping = cur.depth();
+                               for (int i = 0; i < increment && cur; i++) {
+                                       cur.forwardPos();
+                                       while (cur && cur.depth() > skipping) {
+                                               cur.pos() = cur.lastpos();
+                                               cur.forwardPos();
+                                       }
                                 }
                                 if (! cur || (cur.pit() > old_cur.pit())) {
                                         // Are we outside of the paragraph?
@@ -3905,7 +3644,7 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
                                 }
                                 else {
                                         MatchResult mres2 = match(cur, -1, false);
-                                       displayMres(mres2,increment, "findForwardAdv loop")
+                                       displayMres(mres2, "findForwardAdv loop", cur)
                                         switch (interpretMatch(mres, mres2)) {
                                         case MatchResult::newIsTooFar:
                                           // behind the expected match
@@ -3914,7 +3653,7 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
                                           increment /= 2;
                                           break;
                                         case MatchResult::newIsBetter:
-                                         // not reached ye, but cur.pos()+increment is bettert
+                                         // not reached yet, but cur.pos()+increment is bettert
                                           mres = mres2;
                                           firstInvalid -= increment;
                                           if (increment > firstInvalid*3/4)
@@ -3927,7 +3666,7 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
                                         default:
                                           // Todo@
                                           // Handle not like MatchResult::newIsTooFar
-                                         // LYXERR0( "Something is wrong: Increment = " << increment << " match_prefix = " << mres.match_prefix);
+                                         LYXERR0( "Probably too far: Increment = " << increment << " match_prefix = " << mres.match_prefix);
                                           firstInvalid--;
                                           increment = increment*3/4;
                                           cur = old_cur;
@@ -3935,81 +3674,34 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
                                         }
                                 }
                         }
-                       // LYXERR0("Leaving first loop");
-                       {
-                         LYXERR(Debug::FIND, "Finalizing 1");
-                         MatchResult found_match = findAdvFinalize(cur, match, mres);
-                         if (found_match.match_len > 0) {
-                           LASSERT(found_match.pos_len > 0, /**/);
-                           match.FillResults(found_match);
-                           return found_match.pos_len;
-                         }
-                         else {
-                           // try next possible match
-                           cur.forwardPos();
-                           continue;
-                         }
-                       }
-                       // The following code is newer reached
-                       // but parts of it may be needed in future
-                       int match_len_zero_count = 0;
-                       MatchResult mres3;
-                       for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) {
-                               if (i++ > 3) {
-                                       mres3 = match(cur, -1, false);
-                                       displayMres(mres3, 1, 1 "Prepare finalize in findForwardAdv")
-                                       int remaining_len = mres3.match_len;
-                                       if (remaining_len <= 0) {
-                                               // Apparently the searched string is not in the remaining part
-                                               break;
-                                       }
-                                       else {
-                                               i = 0;
-                                       }
-                               }
-                               LYXERR(Debug::FIND, "Advancing cur: " << cur);
-                               mres3 = match(cur, 1);
-                               displayMres(mres3, 1, "Prepare 2 finalize in findForwardAdv")
-                               int match_len3 = mres3.match_len;
-                               if (match_len3 < 0)
+                       if (mres.match_len > 0) {
+                               if (mres.match_prefix + mres.pos - mres.leadsize > 0) {
+                                       repeat = true;
+                                       orig_cur = cur;
+                                       orig_mres = mres;
+                                       cur.forwardPos();
                                         continue;
-                               mres3 = match(cur);
-                               displayMres(mres3, 1, "Prepare 3 finalize in findForwardAdv")
-                               int match_len2 = mres3.match_len;
-                               LYXERR(Debug::FIND, "match_len2: " << match_len2);
-                               if (match_len2 > 0) {
-                                       // Sometimes in finalize we understand it wasn't a match
-                                       // and we need to continue the outest loop
-                                       LYXERR(Debug::FIND, "Finalizing 2");
-                                       MatchResult mres4 = findAdvFinalize(cur, match, mres.match_len);
-                                       if (mres4.match_len > 0) {
-                                               match.FillResults(mres4);
-                                               LASSERT(mres4.pos_len > 0, /**/);
-                                               return mres4.pos_len;
-                                       }
-                               }
-                               if (match_len2 > 0)
-                                       match_len_zero_count = 0;
-                               else if (match_len2 == 0)
-                                       match_len_zero_count++;
-                               if (match_len2 < 0) {
-                                       if (++match_len_zero_count > 3) {
-                                               LYXERR(Debug::FIND, "match_len2_zero_count: " << match_len_zero_count << ", match_len was " << match_len);
-                                       }
-                                       break;
                                 }
                         }
-                       if (!cur)
-                               return 0;
-               }
-               if (match_len >= 0 && cur.pit() < cur.lastpit()) {
-                       LYXERR(Debug::FIND, "Advancing par: cur=" << cur);
-                       cur.forwardPar();
-               } else {
-                       // This should exit nested insets, if any, or otherwise undefine the currsor.
-                       cur.pos() = cur.lastpos();
-                       LYXERR(Debug::FIND, "Advancing pos: cur=" << cur);
-                       cur.forwardPos();
+                       else if (repeat) {
+                               // seems to never be reached.
+                               cur = orig_cur;
+                               mres = orig_mres;
+                       }
+                       // LYXERR0("Leaving first loop");
+                       LYXERR(Debug::FIND, "Finalizing 1");
+                       MatchResult found_match = findAdvFinalize(cur, match, mres);
+                       if (found_match.match_len > 0) {
+                         LASSERT(found_match.pos_len > 0, /**/);
+                         match.FillResults(found_match);
+                         return found_match.pos_len;
+                       }
+                       else {
+                         // try next possible match
+                         cur.forwardPos();
+                         repeat = false;
+                         continue;
+                       }
                 }
         }
         return 0;
@@ -4174,7 +3866,6 @@ static void changeFirstCase(Buffer & buffer, TextCase first_case, TextCase other
  }
  } // namespace
  
-#if 1
  static bool replaceMatches(string &t, int maxmatchnum, vector <string> const & replacements)
  {
    // Should replace the string "$" + std::to_string(matchnum) with replacement
@@ -4202,7 +3893,6 @@ static bool replaceMatches(string &t, int maxmatchnum, vector <string> const & r
    t = s;
    return true;
  }
-#endif
  
  ///
  static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv)
@@ -4390,7 +4080,7 @@ ostringstream & operator<<(ostringstream & os, FindAndReplaceOptions const & opt
  
  istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt)
  {
-       LYXERR(Debug::FIND, "parsing");
+       // LYXERR(Debug::FIND, "parsing");
         string s;
         string line;
         getline(is, line);
@@ -4402,7 +4092,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt)
                         break;
                 getline(is, line);
         }
-       LYXERR(Debug::FIND, "file_buf_name: '" << s << "'");
+       // LYXERR(Debug::FIND, "file_buf_name: '" << s << "'");
         opt.find_buf_name = from_utf8(s);
         is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat >> opt.replace_all;
         is.get();       // Waste space before replace string
@@ -4416,7 +4106,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt)
                         break;
                 getline(is, line);
         }
-       LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'");
+       // LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'");
         opt.repl_buf_name = from_utf8(s);
         is >> opt.keep_case;
         int i;
@@ -4425,9 +4115,11 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt)
         is >> i;
         opt.restr = FindAndReplaceOptions::SearchRestriction(i);
  
+       /*
         LYXERR(Debug::FIND, "parsed: " << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' '
                << opt.expandmacros << ' ' << opt.ignoreformat << ' ' << opt.keep_case << ' '
                << opt.scope << ' ' << opt.restr);
+       */
         return is;
  }