]> git.lyx.org Git - features.git/blobdiff - src/lyxfind.cpp
FindAdv: Fix interpretation of match-results
[features.git] / src / lyxfind.cpp
index ab20921d0e73cc93c6a4ca798f9630f8eccb825f..0b252f3a838cc3b3295428c2e45a8d85f5cbeab1 100644 (file)
@@ -52,7 +52,7 @@
 #include <map>
 #include <regex>
 
-//#define ResultsDebug
+#define ResultsDebug
 #define USE_QT_FOR_SEARCH
 #if defined(USE_QT_FOR_SEARCH)
        #include <QtCore>       // sets QT_VERSION
@@ -640,101 +640,6 @@ namespace {
 
 typedef vector<pair<string, string> > Escapes;
 
-/// A map of symbols and their escaped equivalent needed within a regex.
-/// @note Beware of order
-/*
-Escapes const & get_regexp_escapes()
-{
-       typedef std::pair<std::string, std::string> P;
-
-       static Escapes escape_map;
-       if (escape_map.empty()) {
-               escape_map.push_back(P("$", "_x_$"));
-               escape_map.push_back(P("{", "_x_{"));
-               escape_map.push_back(P("}", "_x_}"));
-               escape_map.push_back(P("[", "_x_["));
-               escape_map.push_back(P("]", "_x_]"));
-               escape_map.push_back(P("(", "_x_("));
-               escape_map.push_back(P(")", "_x_)"));
-               escape_map.push_back(P("+", "_x_+"));
-               escape_map.push_back(P("*", "_x_*"));
-               escape_map.push_back(P(".", "_x_."));
-               escape_map.push_back(P("\\", "(?:\\\\|\\\\backslash)"));
-               escape_map.push_back(P("~", "(?:\\\\textasciitilde|\\\\sim)"));
-               escape_map.push_back(P("^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\textasciicircum|\\\\mathcircumflex)"));
-               escape_map.push_back(P("_x_", "\\"));
-       }
-       return escape_map;
-}
-*/
-
-/// A map of lyx escaped strings and their unescaped equivalent.
-/*
-Escapes const & get_lyx_unescapes()
-{
-       typedef std::pair<std::string, std::string> P;
-
-       static Escapes escape_map;
-       if (escape_map.empty()) {
-               escape_map.push_back(P("\\%", "%"));
-               escape_map.push_back(P("\\{", "{"));
-               escape_map.push_back(P("\\}", "}"));
-               escape_map.push_back(P("\\mathcircumflex ", "^"));
-               escape_map.push_back(P("\\mathcircumflex", "^"));
-               escape_map.push_back(P("\\backslash ", "\\"));
-               escape_map.push_back(P("\\backslash", "\\"));
-               escape_map.push_back(P("\\sim ", "~"));
-               escape_map.push_back(P("\\sim", "~"));
-       }
-       return escape_map;
-}
-*/
-
-/// A map of escapes turning a regexp matching text to one matching latex.
-/*
-Escapes const & get_regexp_latex_escapes()
-{
-       typedef std::pair<std::string, std::string> P;
-
-       static Escapes escape_map;
-       if (escape_map.empty()) {
-               escape_map.push_back(P("\\\\", "(?:\\\\\\\\|\\\\backslash|\\\\textbackslash\\{\\}|\\\\textbackslash)"));
-               escape_map.push_back(P("(<?!\\\\\\\\textbackslash)\\{", "\\\\\\{"));
-               escape_map.push_back(P("(<?!\\\\\\\\textbackslash\\\\\\{)\\}", "\\\\\\}"));
-               escape_map.push_back(P("\\[", "\\{\\[\\}"));
-               escape_map.push_back(P("\\]", "\\{\\]\\}"));
-               escape_map.push_back(P("\\^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\textasciicircum|\\\\mathcircumflex)"));
-               escape_map.push_back(P("%", "\\\\\\%"));
-               escape_map.push_back(P("#", "\\\\#"));
-       }
-       return escape_map;
-}
-*/
-
-/** @todo Probably the maps need to be migrated to regexps, in order to distinguish if
- ** the found occurrence were escaped.
- **/
-/*
-string apply_escapes(string s, Escapes const & escape_map)
-{
-       LYXERR(Debug::FIND, "Escaping: '" << s << "'");
-       Escapes::const_iterator it;
-       for (it = escape_map.begin(); it != escape_map.end(); ++it) {
-//             LYXERR(Debug::FIND, "Escaping " << it->first << " as " << it->second);
-               unsigned int pos = 0;
-               while (pos < s.length() && (pos = s.find(it->first, pos)) < s.length()) {
-                       s.replace(pos, it->first.length(), it->second);
-                       LYXERR(Debug::FIND, "After escape: " << s);
-                       pos += it->second.length();
-//                     LYXERR(Debug::FIND, "pos: " << pos);
-               }
-       }
-       LYXERR(Debug::FIND, "Escaped : '" << s << "'");
-       return s;
-}
-*/
-
-
 string string2regex(string in)
 {
        static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\$\\])" };
@@ -780,12 +685,10 @@ string correctRegex(string t, bool withformat)
                        if (sub.str(4) == "backslash") {
                                replace = "\\";
                                if (withformat) {
-                                       // tranforms '\backslash \{' into '\{'
+                                       // transforms '\backslash \{' into '\{'
                                        // and '\{' into '{'
-                                       sregex_iterator it2 = it;
-                                       ++it2;
-                                       smatch sub2 = *it2;
-                                       if ((sub2.str(3) == "{") ||  (sub2.str(3) == "}")) {
+                                       string next = t.substr(sub.position(2) + sub.str(2).length(), 2);
+                                       if ((next == "\\{") || (next == "\\}")) {
                                                replace = "";
                                                backslashed = true;
                                        }
@@ -859,62 +762,6 @@ bool regex_replace(string const & s, string & t, string const & searchstr,
        return rv;
 }
 
-#if 0
-/** Checks if supplied string segment is well-formed from the standpoint of matching open-closed braces.
- **
- ** Verify that closed braces exactly match open braces. This avoids that, for example,
- ** \frac{.*}{x} matches \frac{x+\frac{y}{x}}{z} with .* being 'x+\frac{y'.
- **
- ** @param unmatched
- ** Number of open braces that must remain open at the end for the verification to succeed.
- **/
-#if QTSEARCH
-bool braces_match(QString const & beg,
-                 int unmatched = 0)
-#else
-bool braces_match(string const & beg,
-               int unmatched = 0)
-#endif
-{
-       int open_pars = 0;
-#if QTSEARCH
-       LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg.toStdString() << "'");
-#else
-       LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg << "'");
-#endif
-       int lastidx = beg.size();
-       for (int i=0; i < lastidx; ++i) {
-               // Skip escaped braces in the count
-#if QTSEARCH
-               QChar c = beg.at(i);
-#else
-               char c = beg.at(i);
-#endif
-               if (c == '\\') {
-                       ++i;
-                       if (i >= lastidx)
-                               break;
-               } else if (c == '{') {
-                       ++open_pars;
-               } else if (c == '}') {
-                       if (open_pars == 0) {
-                               LYXERR(Debug::FIND, "Found unmatched closed brace");
-                               return false;
-                       } else
-                               --open_pars;
-               }
-       }
-       if (open_pars != unmatched) {
-               LYXERR(Debug::FIND, "Found " << open_pars
-                      << " instead of " << unmatched
-                      << " unmatched open braces at the end of count");
-               return false;
-       }
-       LYXERR(Debug::FIND, "Braces match as expected");
-       return true;
-}
-#endif
-
 class MatchResult {
 public:
        enum range {
@@ -935,16 +782,14 @@ public:
 
 static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres)
 {
-  int range = oldres.match_len;
-  if (range > 0) range--;
-  if (newres.match2end < oldres.match2end - oldres.match_len)
+  if (newres.match2end < oldres.match2end)
     return MatchResult::newIsTooFar;
   if (newres.match_len < oldres.match_len)
     return MatchResult::newIsTooFar;
-  if ((newres.match_len == oldres.match_len) &&
-      (newres.match2end < oldres.match2end + range) &&
-      (newres.match2end > oldres.match2end - range)) {
-    return MatchResult::newIsBetter;
+
+  if (newres.match_len == oldres.match_len) {
+    if (newres.match2end == oldres.match2end)
+      return MatchResult::newIsBetter;
   }
   return MatchResult::newIsInvalid;
 }
@@ -2927,7 +2772,7 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with
                // can be digested by our search engine
                LYXERR(Debug::FIND, "input: \"" << par << "\"");
                result = splitOnKnownMacros(par.substr(0,parlen), isPatternString);
-               LYXERR(Debug::FIND, "After split\"" << result << "\"");
+               LYXERR(Debug::FIND, "After splitOnKnownMacros:\n\"" << result << "\"");
        }
        else
                result = par.substr(0, parlen);
@@ -3140,11 +2985,10 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
                        LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'");
                        LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
                }
-               LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'");
+               // LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'");
                par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat);
                // Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them.
-               LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
-               LYXERR(Debug::FIND, "par_as_string after correctRegex is '" << par_as_string << "'");
+               // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
                ++close_wildcards;
                size_t lng = par_as_string.size();
                if (!opt.ignoreformat) {
@@ -3164,14 +3008,15 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
                        if (lng < par_as_string.size())
                                par_as_string = par_as_string.substr(0,lng);
                }
+               LYXERR(Debug::FIND, "par_as_string after correctRegex is '" << par_as_string << "'");
                if ((lng > 0) && (par_as_string[0] == '^')) {
                        par_as_string = par_as_string.substr(1);
                        --lng;
                        opt.matchstart = true;
                }
-               LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
-               LYXERR(Debug::FIND, "Open braces: " << open_braces);
-               LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string);
+               // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
+               // LYXERR(Debug::FIND, "Open braces: " << open_braces);
+               // LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string);
 
                // If entered regexp must match at begin of searched string buffer
                // Kornel: Added parentheses to use $1 for size of the leading string
@@ -3199,68 +3044,6 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
        }
 }
 
-#if 0
-// Count number of characters in string
-// {]} ==> 1
-// \&  ==> 1
-// --- ==> 1
-// \\[a-zA-Z]+ ==> 1
-#if QTSEARCH
-static int computeSize(QStringRef s, int len)
-#define isLyxAlpha(arg) arg.isLetter()
-#else
-static int computeSize(string s, int len)
-#define isLyxAlpha(arg) isalpha(arg)
-#endif
-{
-       if (len == 0)
-               return 0;
-       int skip = 1;
-       int count = 0;
-       for (int i = 0; i < len; i += skip, count++) {
-               if (s.at(i) == '\\') {
-                       skip = 2;
-                       if (i + 1 < len && isLyxAlpha(s.at(i+1))) {
-                               for (int j = 2;  i+j < len; j++) {
-                                       if (! isLyxAlpha(s.at(i+j))) {
-                                               if (s.at(i+j) == ' ')
-                                                       skip++;
-                                               else if (s.at(i+j) == '{') {
-                                                       if (i+j+1 < len && s.at(i+j+1) == '}')
-                                                               skip += 2;
-                                                       else if (i + j + 1 >= len)
-                                                               skip++;
-                                               }
-                                               break;
-                                       }
-                                       skip++;
-                               }
-                       }
-               }
-               else if (s.at(i) == '{') {
-                       if (i + 1 < len && s.at(i+1) == '}')
-                               skip = 2;
-                       else
-                               skip = 3;
-               }
-               else if (s.at(i) == '-') {
-                       if (i+1 < len && s.at(i+1) == '-') {
-                               if (i + 2 < len && s.at(i+2) == '-')
-                                       skip = 3;
-                               else
-                                       skip = 2;
-                       }
-                       else
-                               skip = 1;
-               }
-               else {
-                       skip = 1;
-               }
-       }
-       return count;
-}
-#endif
-
 MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const
 {
        MatchResult mres;
@@ -3272,10 +3055,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
 
        docstring docstr = stringifyFromForSearch(opt, cur, len);
        string str;
-       if (use_regexp || opt.casesensitive)
-               str = normalize(docstr);
-       else
-               str = normalize(lowercase(docstr));
+       str = normalize(docstr);
        if (!opt.ignoreformat) {
                str = correctlanguagesetting(str, false, !opt.ignoreformat);
                // remove closing '}' and '\n' to allow for use of '$' in regex
@@ -3290,10 +3070,11 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
                mres.match_len = -1;
                return mres;
        }
-       LYXERR(Debug::FIND, "Matching against     '" << lyx::to_utf8(docstr) << "'");
-       LYXERR(Debug::FIND, "After normalization: '" << str << "'");
+       LYXERR(Debug::FIND, "After normalization: Matching against:\n'" << str << "'");
 
-       if (use_regexp) {
+       LASSERT(use_regexp, /**/);
+       {
+               // use_regexp always true
                LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin);
 #if QTSEARCH
                QString qstr = QString::fromStdString(str);
@@ -3417,34 +3198,6 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
 #endif
                return mres;
        }
-
-       // else !use_regexp: but all code paths above return
-       LYXERR(Debug::FIND, "Searching in normal mode: par_as_string='"
-                                << par_as_string << "', str='" << str << "'");
-       LYXERR(Debug::FIND, "Searching in normal mode: lead_as_string='"
-                                << lead_as_string << "', par_as_string_nolead='"
-                                << par_as_string_nolead << "'");
-
-       if (at_begin) {
-               LYXERR(Debug::FIND, "size=" << par_as_string.size()
-                                        << ", substr='" << str.substr(0, par_as_string.size()) << "'");
-               if (str.substr(0, par_as_string.size()) == par_as_string) {
-                       mres.match_len = par_as_string.size();
-                       mres.match2end = str.size();
-                       mres.pos = 0;
-                       return mres;
-               }
-       } else {
-               // Start the search _after_ the leading part
-               size_t pos = str.find(par_as_string_nolead, lead_as_string.size());
-               if (pos != string::npos) {
-                       mres.match_len = par_as_string.size();
-                       mres.match2end = str.size() - pos;
-                       mres.pos = pos;
-                       return mres;
-               }
-       }
-       return mres;
 }
 
 
@@ -3526,7 +3279,7 @@ string MatchStringAdv::normalize(docstring const & s) const
        // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify
        // Kornel: Added textsl, textsf, textit, texttt and noun
        // + allow to seach for colored text too
-       LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, \\*section{} macros from: " << t);
+       LYXERR(Debug::FIND, "Removing stale empty macros from: " << t);
        while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", ""))
                LYXERR(Debug::FIND, "  further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
        while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
@@ -3585,9 +3338,11 @@ docstring stringifyFromCursor(DocIterator const & cur, int len)
  */
 docstring latexifyFromCursor(DocIterator const & cur, int len)
 {
+       /*
        LYXERR(Debug::FIND, "Latexifying with len=" << len << " from cursor at pos: " << cur);
        LYXERR(Debug::FIND, "  with cur.lastpost=" << cur.lastpos() << ", cur.lastrow="
               << cur.lastrow() << ", cur.lastcol=" << cur.lastcol());
+       */
        Buffer const & buf = *cur.buffer();
 
        odocstringstream ods;
@@ -3749,29 +3504,9 @@ MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Ma
        int len = 1;
        if (cur.pos() + len > cur.lastpos())
          return fail;
-       // regexp should use \w+, \S+, or \b(some string)\b
-       // to search for whole words
-       if (match.opt.matchword && !match.use_regexp) {
-         LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
-         while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) {
-           ++len;
-           LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
-         }
-         // Length of matched text (different from len param)
-         static MatchResult old_match = match(cur, len, at_begin);
-         if (old_match.match_len < 0)
-           old_match = fail;
-         MatchResult new_match;
-         // Greedy behaviour while matching regexps
-         while ((new_match = match(cur, len + 1, at_begin)).match_len > old_match.match_len) {
-           ++len;
-           old_match = new_match;
-           LYXERR(Debug::FIND, "verifying   match with len = " << len);
-         }
-         displayMres(old_match, "SEARCH RESULT", cur)
-         return old_match;
-       }
-       else {
+
+       LASSERT(match.use_regexp, /**/);
+       {
           int minl = 1;
           int maxl = cur.lastpos() - cur.pos();
           // Greedy behaviour while matching regexps
@@ -3855,6 +3590,8 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
        if (!cur)
                return 0;
        bool repeat = false;
+       DocIterator orig_cur;   // to be used if repeat not successful
+       MatchResult orig_mres;
        while (!theApp()->longOperationCancelled() && cur) {
                //(void) findAdvForwardInnermost(cur);
                LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
@@ -3929,7 +3666,7 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
                                        default:
                                          // Todo@
                                          // Handle not like MatchResult::newIsTooFar
-                                         LYXERR0( "Something is wrong: Increment = " << increment << " match_prefix = " << mres.match_prefix);
+                                         LYXERR0( "Probably too far: Increment = " << increment << " match_prefix = " << mres.match_prefix);
                                          firstInvalid--;
                                          increment = increment*3/4;
                                          cur = old_cur;
@@ -3937,10 +3674,19 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
                                        }
                                }
                        }
-                       if (mres.match_len > 0 && mres.match_prefix + mres.pos - mres.leadsize > 0) {
-                               repeat = true;
-                               cur.forwardPos();
-                               continue;
+                       if (mres.match_len > 0) {
+                               if (mres.match_prefix + mres.pos - mres.leadsize > 0) {
+                                       repeat = true;
+                                       orig_cur = cur;
+                                       orig_mres = mres;
+                                       cur.forwardPos();
+                                       continue;
+                               }
+                       }
+                       else if (repeat) {
+                               // seems to never be reached.
+                               cur = orig_cur;
+                               mres = orig_mres;
                        }
                        // LYXERR0("Leaving first loop");
                        LYXERR(Debug::FIND, "Finalizing 1");
@@ -4120,7 +3866,6 @@ static void changeFirstCase(Buffer & buffer, TextCase first_case, TextCase other
 }
 } // namespace
 
-#if 1
 static bool replaceMatches(string &t, int maxmatchnum, vector <string> const & replacements)
 {
   // Should replace the string "$" + std::to_string(matchnum) with replacement
@@ -4148,7 +3893,6 @@ static bool replaceMatches(string &t, int maxmatchnum, vector <string> const & r
   t = s;
   return true;
 }
-#endif
 
 ///
 static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv)
@@ -4336,7 +4080,7 @@ ostringstream & operator<<(ostringstream & os, FindAndReplaceOptions const & opt
 
 istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt)
 {
-       LYXERR(Debug::FIND, "parsing");
+       // LYXERR(Debug::FIND, "parsing");
        string s;
        string line;
        getline(is, line);
@@ -4348,7 +4092,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt)
                        break;
                getline(is, line);
        }
-       LYXERR(Debug::FIND, "file_buf_name: '" << s << "'");
+       // LYXERR(Debug::FIND, "file_buf_name: '" << s << "'");
        opt.find_buf_name = from_utf8(s);
        is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat >> opt.replace_all;
        is.get();       // Waste space before replace string
@@ -4362,7 +4106,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt)
                        break;
                getline(is, line);
        }
-       LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'");
+       // LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'");
        opt.repl_buf_name = from_utf8(s);
        is >> opt.keep_case;
        int i;
@@ -4371,9 +4115,11 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt)
        is >> i;
        opt.restr = FindAndReplaceOptions::SearchRestriction(i);
 
+       /*
        LYXERR(Debug::FIND, "parsed: " << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' '
               << opt.expandmacros << ' ' << opt.ignoreformat << ' ' << opt.keep_case << ' '
               << opt.scope << ' ' << opt.restr);
+       */
        return is;
 }