FindAdv:

[features.git] / src / lyxfind.cpp
diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp

index 4df0f1e1c5334a17510d2a15559d895401685f55..609f38c7815fce43eed133435c94a27cde1ee14b 100644 (file)
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -812,6 +812,17 @@ namespace {
  
  typedef vector<pair<string, string> > Escapes;
  
+static string getRegexSpaceCount(int count)
+{
+       if (count > 0) {
+               if (count > 1)
+                       return "\\s{" + std::to_string(count) + "}";
+               else
+                       return "\\s";
+       }
+       return "";
+}
+
  string string2regex(string in)
  {
         static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\$\\])" };
@@ -833,20 +844,14 @@ string string2regex(string in)
                 }
                 else {
                         if (blanks > 0) {
-                               if (blanks > 1)
-                                       temp += "\\s+";
-                               else
-                                       temp += "\\s";
+                               temp += getRegexSpaceCount(blanks);
                         }
                         temp += tempx[i];
                         blanks = 0;
                 }
         }
         if (blanks > 0) {
-               if (blanks > 1)
-                       temp += "\\s+";
-               else
-                       temp += "\\s";
+               temp += getRegexSpaceCount(blanks);
         }
  
         string temp2("");
@@ -887,6 +892,7 @@ string correctRegex(string t, bool withformat)
                 buildAccentsMap();
  
         //LYXERR0("correctRegex input '" << t << "'");
+       int skip = 0;
         for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) {
                 sub = *it;
                 string replace;
@@ -903,6 +909,10 @@ string correctRegex(string t, bool withformat)
                                                 replace = "";
                                                 backslashed = true;
                                         }
+                                       else if (withformat && next[0] == '$') {
+                                               replace = accents["lyxdollar"];
+                                               skip = 1;       // Skip following '$'
+                                       }
                                 }
                         }
                         else if (sub.str(4) == "mathcircumflex")
@@ -955,7 +965,8 @@ string correctRegex(string t, bool withformat)
                 if (lastpos < (size_t) sub.position(2))
                         s += std::regex_replace(t.substr(lastpos, sub.position(2) - lastpos), protectedSpace, R"( )");
                 s += replace;
-               lastpos = sub.position(2) + sub.length(2);
+               lastpos = sub.position(2) + sub.length(2) + skip;
+               skip = 0;
         }
         if (lastpos == 0)
                 s = std::regex_replace(t, protectedSpace, R"( )");
@@ -1153,11 +1164,11 @@ static docstring buffer_to_latex(Buffer & buffer)
         runparams.nice = true;
         setFindParams(runparams);
         if (ignoreFormats.getDeleted())
-               runparams.for_search = OutputParams::SearchWithoutDeleted;
+               runparams.find_set_feature(OutputParams::SearchWithoutDeleted);
         else
-               runparams.for_search = OutputParams::SearchWithDeleted;
+               runparams.find_set_feature(OutputParams::SearchWithDeleted);
         if (ignoreFormats.getNonContent()) {
-               runparams.for_search |= OutputParams::SearchNonOutput;
+               runparams.find_add_feature(OutputParams::SearchNonOutput);
         }
         pit_type const endpit = buffer.paragraphs().size();
         for (pit_type pit = 0; pit != endpit; ++pit) {
@@ -1231,13 +1242,13 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co
                 int option = AS_STR_INSETS |AS_STR_PLAINTEXT;
                 if (ignoreFormats.getDeleted()) {
                         option |= AS_STR_SKIPDELETE;
-                       runparams.for_search = OutputParams::SearchWithoutDeleted;
+                       runparams.find_set_feature(OutputParams::SearchWithoutDeleted);
                 }
                 else {
-                       runparams.for_search = OutputParams::SearchWithDeleted;
+                       runparams.find_set_feature(OutputParams::SearchWithDeleted);
                 }
                 if (ignoreFormats.getNonContent()) {
-                       runparams.for_search |= OutputParams::SearchNonOutput;
+                       runparams.find_add_feature(OutputParams::SearchNonOutput);
                 }
                 string t("");
                 for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) {
@@ -1417,8 +1428,8 @@ public:
         string par;
         int ignoreidx;
         static vector<Border> borders;
-       int depts[MAXOPENED];
-       int closes[MAXOPENED];
+       static vector<int> depts;
+       static vector<int> closes;
         int actualdeptindex;
         int previousNotIgnored(int) const;
         int nextNotIgnored(int) const;
@@ -1430,6 +1441,7 @@ public:
         void removeAccents();
         void setForDefaultLang(KeyInfo const & defLang) const;
         int findclosing(int start, int end, char up, char down, int repeat);
+       void removeInvalidClosings(void);
         void handleParentheses(int lastpos, bool closingAllowed);
         bool hasTitle;
         // Number of disabled language specs up
@@ -1442,6 +1454,8 @@ public:
  };
  
  vector<Border> Intervall::borders = vector<Border>(30);
+vector<int> Intervall::depts = vector<int>(30);
+vector<int> Intervall::closes = vector<int>(30);
  
  int Intervall::isOpeningPar(int pos) const
  {
@@ -1475,6 +1489,8 @@ void Intervall::setForDefaultLang(KeyInfo const & defLang) const
         }
  }
  
+#if 0
+// Not needed, because dpts and closes are now dynamically expanded
  static void checkDepthIndex(int val)
  {
         static int maxdepthidx = MAXOPENED-2;
@@ -1488,6 +1504,7 @@ static void checkDepthIndex(int val)
                 LYXERR(Debug::INFO, "maxdepthidx now " << val);
         }
  }
+#endif
  
  #if 0
  // Not needed, because borders are now dynamically expanded
@@ -1961,6 +1978,7 @@ static void buildAccentsMap()
         accents["braceright"]    = getutf8(0xf0031);
         accents["lyxtilde"]      = getutf8(0xf0032);
         accents["sim"]           = getutf8(0xf0032);
+       accents["lyxdollar"]     = getutf8(0xf0033);
         accents["backslash lyx"]           = getutf8(0xf0010);  // Used logos inserted with starting \backslash
         accents["backslash LyX"]           = getutf8(0xf0010);
         accents["backslash tex"]           = getutf8(0xf0011);
@@ -2036,7 +2054,8 @@ void Intervall::removeAccents()
         if (accents.empty())
                 buildAccentsMap();
         static regex const accre("\\\\("
-                                "([\\S]|[A-Za-z]+)\\{[^\\{\\}]+\\}"
+                                "([\\S]|[A-Za-z]+)\\{[^\\\\\\{\\}]+\\}"
+                                "|([\\S]|[A-Za-z]+)\\{\\\\[ij](math)?\\}"
                                  "|("
                                  "(backslash ([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))"
                                  "|[A-Za-z]+"
@@ -2072,9 +2091,13 @@ void Intervall::removeAccents()
  void Intervall::handleOpenP(int i)
  {
         actualdeptindex++;
+       if ((size_t) actualdeptindex >= depts.size()) {
+               depts.resize(actualdeptindex + 30);
+               closes.resize(actualdeptindex + 30);
+       }
         depts[actualdeptindex] = i+1;
         closes[actualdeptindex] = -1;
-       checkDepthIndex(actualdeptindex);
+       // checkDepthIndex(actualdeptindex);
  }
  
  void Intervall::handleCloseP(int i, bool closingAllowed)
@@ -2236,6 +2259,27 @@ int Intervall::findclosing(int start, int end, char up = '{', char down = '}', i
         return end;
  }
  
+void Intervall::removeInvalidClosings(void)
+{
+       // this can happen, if there are deleted parts
+       int skip = 0;
+       int depth = 0;
+       for (unsigned i = 0; i < par.size(); i += 1 + skip) {
+               char c = par[i];
+               skip = 0;
+               if (c == '\\') skip = 1;
+               else if (c == '{')
+                       depth++;
+               else if (c == '}') {
+                       if (depth == 0) {
+                               addIntervall(i, i+1);
+                               LYXERR(Debug::FINDVERBOSE, "removed invalid closing '}' at " << i);
+                       }
+                       else
+                               --depth;
+               }
+       }
+}
  class MathInfo {
         class MathEntry {
         public:
@@ -2323,6 +2367,7 @@ void LatexInfo::buildEntries(bool isPatternString)
         static bool removeMathHull = false;
  
         interval_.removeAccents();
+       interval_.removeInvalidClosings();
  
         for (sregex_iterator itmath(interval_.par.begin(), interval_.par.end(), rmath), end; itmath != end; ++itmath) {
                 submath = *itmath;
@@ -2728,7 +2773,8 @@ void LatexInfo::buildKeys(bool isPatternString)
         if (keysBuilt && !isPatternString) return;
  
         // Keys to ignore in any case
-       makeKey("text|textcyrillic|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true);
+       makeKey("text|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true);
+       makeKey("nonumber|notag", KeyInfo(KeyInfo::headRemove, 0, true), true);
         // Known standard keys with 1 parameter.
         // Split is done, if not at start of region
         makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString);
@@ -3606,7 +3652,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
                 CreateRegexp(opt, "", "", "");
                 return;
         }
-       use_regexp = lyx::to_utf8(ds).find("\\regexp{") != std::string::npos;
+       use_regexp = ds.find(from_utf8("\\regexp{")) != std::string::npos;
         if (opt.replace_all && previous_single_replace) {
                 previous_single_replace = false;
                 num_replaced = 0;
@@ -3695,7 +3741,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
                                         break;
                         }
                         if (lng < par_as_string.size())
-                               par_as_string = par_as_string.substr(0,lng);
+                               par_as_string.resize(lng);
                 }
                 LYXERR(Debug::FINDVERBOSE, "par_as_string after correctRegex is '" << par_as_string << "'");
                 if ((lng > 0) && (par_as_string[0] == '^')) {
@@ -3738,9 +3784,6 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
         MatchResult mres;
  
         mres.searched_size = len;
-       if (at_begin &&
-                       (opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) )
-               return mres;
  
         docstring docstr = stringifyFromForSearch(opt, cur, len);
         string str;
@@ -3941,8 +3984,79 @@ static bool simple_replace(string &t, string from, string to)
  }
  #endif
  
-string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const
+#if 1
+static string convertLF2Space(docstring const &s, bool ignore_format)
  {
+       // Using original docstring to handle '\n'
+
+       if (s.size() == 0) return "";
+       stringstream t;
+       size_t pos;
+       size_t start = 0;
+       size_t end = s.size() - 1;
+       if (!ignore_format) {
+               while (s[start] == '\n' && start <= end)
+                       start++;
+               while (end >= start && s[end] == '\n')
+                       end--;
+               if (start >= end + 1)
+                       return "";
+       }
+       do {
+               bool dospace = true;
+               int skip = -1;
+               pos = s.find('\n', start);
+               if (pos >= end) {
+                       t << lyx::to_utf8(s.substr(start, end + 1 - start));
+                       break;
+               }
+               if (!ignore_format) {
+                       if ((pos > start + 1) &&
+                            s[pos-1] == '\\' &&
+                            s[pos-2] == '\\') {
+                               skip = 2;
+                               if ((pos > start + 2) &&
+                                   (s[pos+1] == '~' || isSpace(s[pos+1]) ||
+                                    s[pos-3] == '~' || isSpace(s[pos-3]))) {
+                                       // discard "\\\\\n", do not replace with space
+                                       dospace = false;
+                               }
+                       }
+                       else if (pos > start) {
+                               if (s[pos-1] == '%') {
+                                       skip = 1;
+                                       while ((pos > start+skip) && (s[pos-1-skip] == '%'))
+                                               skip++;
+                                       if ((pos > start+skip) &&
+                                           (s[pos+1] == '~' || isSpace(s[pos+1]) ||
+                                            s[pos-1-skip] == '~' || isSpace(s[pos-1-skip]))) {
+                                               // discard '%%%%%\n'
+                                               dospace = false;
+                                       }
+                               }
+                               else if (!isAlnumASCII(s[pos+1]) || !isAlnumASCII(s[pos-1])) {
+                                       dospace = false;
+                                       skip = 0;       // remove the '\n' only
+                               }
+                       }
+               }
+               else {
+                       dospace = true;
+                       skip = 0;
+               }
+               t << lyx::to_utf8(s.substr(start, pos-skip-start));
+               if (dospace)
+                       t << ' ';
+               start = pos+1;
+       } while (start <= end);
+       return(t.str());
+}
+
+#else
+static string convertLF2Space(docstring const & s, bool ignore_format)
+{
+       // Using utf8-converted string to handle '\n'
+
         string t;
         t = lyx::to_utf8(s);
         // Remove \n at begin
@@ -3957,10 +4071,11 @@ string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const
         while ((pos = t.find("\n")) != string::npos) {
                 if (pos > 1 && t[pos-1] == '\\' && t[pos-2] == '\\' ) {
                         // Handle '\\\n'
-                       if (isAlnumASCII(t[pos+1])) {
+                       if (isPrintableNonspace(t[pos+1]) && ((pos < 3) || isPrintableNonspace(t[pos-3]))) {
                                 t.replace(pos-2, 3, " ");
                         }
                         else {
+                               // Already a space there
                                 t.replace(pos-2, 3, "");
                         }
                 }
@@ -3984,19 +4099,29 @@ string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const
                         }
                 }
         }
-       // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify
-       // Kornel: Added textsl, textsf, textit, texttt and noun
-       // + allow to seach for colored text too
-       LYXERR(Debug::FINDVERBOSE, "Removing stale empty macros from: " << t);
-       while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", ""))
-               LYXERR(Debug::FINDVERBOSE, "  further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
-       while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
-               LYXERR(Debug::FINDVERBOSE, "  further removing stale empty \\section{}, \\part{}, \\paragraph{} macros from: " << t);
-       while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", ""));
+       return(t);
  
-       return t;
  }
+#endif
  
+string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const
+{
+       string t = convertLF2Space(s, ignore_format);
+
+       // The following replaces are not appropriate in non-format-search mode
+       if (!ignore_format) {
+               // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify
+               // Kornel: Added textsl, textsf, textit, texttt and noun
+               // + allow to seach for colored text too
+               LYXERR(Debug::FINDVERBOSE, "Removing stale empty macros from: " << t);
+               while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", ""))
+                       LYXERR(Debug::FINDVERBOSE, "  further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
+               while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
+                       LYXERR(Debug::FINDVERBOSE, "  further removing stale empty \\section{}, \\part{}, \\paragraph{} macros from: " << t);
+               while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", ""));
+       }
+       return t;
+}
  
  docstring stringifyFromCursor(DocIterator const & cur, int len)
  {
@@ -4014,13 +4139,13 @@ docstring stringifyFromCursor(DocIterator const & cur, int len)
                 int option = AS_STR_INSETS | AS_STR_PLAINTEXT;
                 if (ignoreFormats.getDeleted()) {
                         option |= AS_STR_SKIPDELETE;
-                       runparams.for_search = OutputParams::SearchWithoutDeleted;
+                       runparams.find_set_feature(OutputParams::SearchWithoutDeleted);
                 }
                 else {
-                       runparams.for_search = OutputParams::SearchWithDeleted;
+                       runparams.find_set_feature(OutputParams::SearchWithDeleted);
                 }
                 if (ignoreFormats.getNonContent()) {
-                       runparams.for_search |= OutputParams::SearchNonOutput;
+                       runparams.find_add_feature(OutputParams::SearchNonOutput);
                 }
                 LYXERR(Debug::FINDVERBOSE, "Stringifying with cur: "
                        << cur << ", from pos: " << cur.pos() << ", end: " << end);
@@ -4067,13 +4192,13 @@ docstring latexifyFromCursor(DocIterator const & cur, int len)
         runparams.nice = false;
         setFindParams(runparams);
         if (ignoreFormats.getDeleted()) {
-               runparams.for_search = OutputParams::SearchWithoutDeleted;
+               runparams.find_set_feature(OutputParams::SearchWithoutDeleted);
         }
         else {
-               runparams.for_search = OutputParams::SearchWithDeleted;
+               runparams.find_set_feature(OutputParams::SearchWithDeleted);
         }
         if (ignoreFormats.getNonContent()) {
-               runparams.for_search |= OutputParams::SearchNonOutput;
+               runparams.find_add_feature(OutputParams::SearchNonOutput);
         }
  
         if (cur.inTexted()) {
@@ -4220,7 +4345,10 @@ MatchResult findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Mat
                 displayMres(mres, "Start with negative match", cur);
                 max_match = mres;
         }
-       if (max_match.match_len <= 0) return fail;
+       // Only now we are really at_begin
+       if ((max_match.match_len <= 0) ||
+           (match.opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()))
+               return fail;
         LYXERR(Debug::FINDVERBOSE, "Ok");
  
         // Compute the match length