]> git.lyx.org Git - features.git/blobdiff - src/lyxfind.cpp
FindAdv:
[features.git] / src / lyxfind.cpp
index 4df0f1e1c5334a17510d2a15559d895401685f55..609f38c7815fce43eed133435c94a27cde1ee14b 100644 (file)
@@ -812,6 +812,17 @@ namespace {
 
 typedef vector<pair<string, string> > Escapes;
 
+static string getRegexSpaceCount(int count)
+{
+       if (count > 0) {
+               if (count > 1)
+                       return "\\s{" + std::to_string(count) + "}";
+               else
+                       return "\\s";
+       }
+       return "";
+}
+
 string string2regex(string in)
 {
        static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\$\\])" };
@@ -833,20 +844,14 @@ string string2regex(string in)
                }
                else {
                        if (blanks > 0) {
-                               if (blanks > 1)
-                                       temp += "\\s+";
-                               else
-                                       temp += "\\s";
+                               temp += getRegexSpaceCount(blanks);
                        }
                        temp += tempx[i];
                        blanks = 0;
                }
        }
        if (blanks > 0) {
-               if (blanks > 1)
-                       temp += "\\s+";
-               else
-                       temp += "\\s";
+               temp += getRegexSpaceCount(blanks);
        }
 
        string temp2("");
@@ -887,6 +892,7 @@ string correctRegex(string t, bool withformat)
                buildAccentsMap();
 
        //LYXERR0("correctRegex input '" << t << "'");
+       int skip = 0;
        for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) {
                sub = *it;
                string replace;
@@ -903,6 +909,10 @@ string correctRegex(string t, bool withformat)
                                                replace = "";
                                                backslashed = true;
                                        }
+                                       else if (withformat && next[0] == '$') {
+                                               replace = accents["lyxdollar"];
+                                               skip = 1;       // Skip following '$'
+                                       }
                                }
                        }
                        else if (sub.str(4) == "mathcircumflex")
@@ -955,7 +965,8 @@ string correctRegex(string t, bool withformat)
                if (lastpos < (size_t) sub.position(2))
                        s += std::regex_replace(t.substr(lastpos, sub.position(2) - lastpos), protectedSpace, R"( )");
                s += replace;
-               lastpos = sub.position(2) + sub.length(2);
+               lastpos = sub.position(2) + sub.length(2) + skip;
+               skip = 0;
        }
        if (lastpos == 0)
                s = std::regex_replace(t, protectedSpace, R"( )");
@@ -1153,11 +1164,11 @@ static docstring buffer_to_latex(Buffer & buffer)
        runparams.nice = true;
        setFindParams(runparams);
        if (ignoreFormats.getDeleted())
-               runparams.for_search = OutputParams::SearchWithoutDeleted;
+               runparams.find_set_feature(OutputParams::SearchWithoutDeleted);
        else
-               runparams.for_search = OutputParams::SearchWithDeleted;
+               runparams.find_set_feature(OutputParams::SearchWithDeleted);
        if (ignoreFormats.getNonContent()) {
-               runparams.for_search |= OutputParams::SearchNonOutput;
+               runparams.find_add_feature(OutputParams::SearchNonOutput);
        }
        pit_type const endpit = buffer.paragraphs().size();
        for (pit_type pit = 0; pit != endpit; ++pit) {
@@ -1231,13 +1242,13 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co
                int option = AS_STR_INSETS |AS_STR_PLAINTEXT;
                if (ignoreFormats.getDeleted()) {
                        option |= AS_STR_SKIPDELETE;
-                       runparams.for_search = OutputParams::SearchWithoutDeleted;
+                       runparams.find_set_feature(OutputParams::SearchWithoutDeleted);
                }
                else {
-                       runparams.for_search = OutputParams::SearchWithDeleted;
+                       runparams.find_set_feature(OutputParams::SearchWithDeleted);
                }
                if (ignoreFormats.getNonContent()) {
-                       runparams.for_search |= OutputParams::SearchNonOutput;
+                       runparams.find_add_feature(OutputParams::SearchNonOutput);
                }
                string t("");
                for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) {
@@ -1417,8 +1428,8 @@ public:
        string par;
        int ignoreidx;
        static vector<Border> borders;
-       int depts[MAXOPENED];
-       int closes[MAXOPENED];
+       static vector<int> depts;
+       static vector<int> closes;
        int actualdeptindex;
        int previousNotIgnored(int) const;
        int nextNotIgnored(int) const;
@@ -1430,6 +1441,7 @@ public:
        void removeAccents();
        void setForDefaultLang(KeyInfo const & defLang) const;
        int findclosing(int start, int end, char up, char down, int repeat);
+       void removeInvalidClosings(void);
        void handleParentheses(int lastpos, bool closingAllowed);
        bool hasTitle;
        // Number of disabled language specs up
@@ -1442,6 +1454,8 @@ public:
 };
 
 vector<Border> Intervall::borders = vector<Border>(30);
+vector<int> Intervall::depts = vector<int>(30);
+vector<int> Intervall::closes = vector<int>(30);
 
 int Intervall::isOpeningPar(int pos) const
 {
@@ -1475,6 +1489,8 @@ void Intervall::setForDefaultLang(KeyInfo const & defLang) const
        }
 }
 
+#if 0
+// Not needed, because dpts and closes are now dynamically expanded
 static void checkDepthIndex(int val)
 {
        static int maxdepthidx = MAXOPENED-2;
@@ -1488,6 +1504,7 @@ static void checkDepthIndex(int val)
                LYXERR(Debug::INFO, "maxdepthidx now " << val);
        }
 }
+#endif
 
 #if 0
 // Not needed, because borders are now dynamically expanded
@@ -1961,6 +1978,7 @@ static void buildAccentsMap()
        accents["braceright"]    = getutf8(0xf0031);
        accents["lyxtilde"]      = getutf8(0xf0032);
        accents["sim"]           = getutf8(0xf0032);
+       accents["lyxdollar"]     = getutf8(0xf0033);
        accents["backslash lyx"]           = getutf8(0xf0010);  // Used logos inserted with starting \backslash
        accents["backslash LyX"]           = getutf8(0xf0010);
        accents["backslash tex"]           = getutf8(0xf0011);
@@ -2036,7 +2054,8 @@ void Intervall::removeAccents()
        if (accents.empty())
                buildAccentsMap();
        static regex const accre("\\\\("
-                                "([\\S]|[A-Za-z]+)\\{[^\\{\\}]+\\}"
+                                "([\\S]|[A-Za-z]+)\\{[^\\\\\\{\\}]+\\}"
+                                "|([\\S]|[A-Za-z]+)\\{\\\\[ij](math)?\\}"
                                 "|("
                                 "(backslash ([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))"
                                 "|[A-Za-z]+"
@@ -2072,9 +2091,13 @@ void Intervall::removeAccents()
 void Intervall::handleOpenP(int i)
 {
        actualdeptindex++;
+       if ((size_t) actualdeptindex >= depts.size()) {
+               depts.resize(actualdeptindex + 30);
+               closes.resize(actualdeptindex + 30);
+       }
        depts[actualdeptindex] = i+1;
        closes[actualdeptindex] = -1;
-       checkDepthIndex(actualdeptindex);
+       // checkDepthIndex(actualdeptindex);
 }
 
 void Intervall::handleCloseP(int i, bool closingAllowed)
@@ -2236,6 +2259,27 @@ int Intervall::findclosing(int start, int end, char up = '{', char down = '}', i
        return end;
 }
 
+void Intervall::removeInvalidClosings(void)
+{
+       // this can happen, if there are deleted parts
+       int skip = 0;
+       int depth = 0;
+       for (unsigned i = 0; i < par.size(); i += 1 + skip) {
+               char c = par[i];
+               skip = 0;
+               if (c == '\\') skip = 1;
+               else if (c == '{')
+                       depth++;
+               else if (c == '}') {
+                       if (depth == 0) {
+                               addIntervall(i, i+1);
+                               LYXERR(Debug::FINDVERBOSE, "removed invalid closing '}' at " << i);
+                       }
+                       else
+                               --depth;
+               }
+       }
+}
 class MathInfo {
        class MathEntry {
        public:
@@ -2323,6 +2367,7 @@ void LatexInfo::buildEntries(bool isPatternString)
        static bool removeMathHull = false;
 
        interval_.removeAccents();
+       interval_.removeInvalidClosings();
 
        for (sregex_iterator itmath(interval_.par.begin(), interval_.par.end(), rmath), end; itmath != end; ++itmath) {
                submath = *itmath;
@@ -2728,7 +2773,8 @@ void LatexInfo::buildKeys(bool isPatternString)
        if (keysBuilt && !isPatternString) return;
 
        // Keys to ignore in any case
-       makeKey("text|textcyrillic|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true);
+       makeKey("text|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true);
+       makeKey("nonumber|notag", KeyInfo(KeyInfo::headRemove, 0, true), true);
        // Known standard keys with 1 parameter.
        // Split is done, if not at start of region
        makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString);
@@ -3606,7 +3652,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
                CreateRegexp(opt, "", "", "");
                return;
        }
-       use_regexp = lyx::to_utf8(ds).find("\\regexp{") != std::string::npos;
+       use_regexp = ds.find(from_utf8("\\regexp{")) != std::string::npos;
        if (opt.replace_all && previous_single_replace) {
                previous_single_replace = false;
                num_replaced = 0;
@@ -3695,7 +3741,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
                                        break;
                        }
                        if (lng < par_as_string.size())
-                               par_as_string = par_as_string.substr(0,lng);
+                               par_as_string.resize(lng);
                }
                LYXERR(Debug::FINDVERBOSE, "par_as_string after correctRegex is '" << par_as_string << "'");
                if ((lng > 0) && (par_as_string[0] == '^')) {
@@ -3738,9 +3784,6 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
        MatchResult mres;
 
        mres.searched_size = len;
-       if (at_begin &&
-                       (opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) )
-               return mres;
 
        docstring docstr = stringifyFromForSearch(opt, cur, len);
        string str;
@@ -3941,8 +3984,79 @@ static bool simple_replace(string &t, string from, string to)
 }
 #endif
 
-string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const
+#if 1
+static string convertLF2Space(docstring const &s, bool ignore_format)
 {
+       // Using original docstring to handle '\n'
+
+       if (s.size() == 0) return "";
+       stringstream t;
+       size_t pos;
+       size_t start = 0;
+       size_t end = s.size() - 1;
+       if (!ignore_format) {
+               while (s[start] == '\n' && start <= end)
+                       start++;
+               while (end >= start && s[end] == '\n')
+                       end--;
+               if (start >= end + 1)
+                       return "";
+       }
+       do {
+               bool dospace = true;
+               int skip = -1;
+               pos = s.find('\n', start);
+               if (pos >= end) {
+                       t << lyx::to_utf8(s.substr(start, end + 1 - start));
+                       break;
+               }
+               if (!ignore_format) {
+                       if ((pos > start + 1) &&
+                            s[pos-1] == '\\' &&
+                            s[pos-2] == '\\') {
+                               skip = 2;
+                               if ((pos > start + 2) &&
+                                   (s[pos+1] == '~' || isSpace(s[pos+1]) ||
+                                    s[pos-3] == '~' || isSpace(s[pos-3]))) {
+                                       // discard "\\\\\n", do not replace with space
+                                       dospace = false;
+                               }
+                       }
+                       else if (pos > start) {
+                               if (s[pos-1] == '%') {
+                                       skip = 1;
+                                       while ((pos > start+skip) && (s[pos-1-skip] == '%'))
+                                               skip++;
+                                       if ((pos > start+skip) &&
+                                           (s[pos+1] == '~' || isSpace(s[pos+1]) ||
+                                            s[pos-1-skip] == '~' || isSpace(s[pos-1-skip]))) {
+                                               // discard '%%%%%\n'
+                                               dospace = false;
+                                       }
+                               }
+                               else if (!isAlnumASCII(s[pos+1]) || !isAlnumASCII(s[pos-1])) {
+                                       dospace = false;
+                                       skip = 0;       // remove the '\n' only
+                               }
+                       }
+               }
+               else {
+                       dospace = true;
+                       skip = 0;
+               }
+               t << lyx::to_utf8(s.substr(start, pos-skip-start));
+               if (dospace)
+                       t << ' ';
+               start = pos+1;
+       } while (start <= end);
+       return(t.str());
+}
+
+#else
+static string convertLF2Space(docstring const & s, bool ignore_format)
+{
+       // Using utf8-converted string to handle '\n'
+
        string t;
        t = lyx::to_utf8(s);
        // Remove \n at begin
@@ -3957,10 +4071,11 @@ string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const
        while ((pos = t.find("\n")) != string::npos) {
                if (pos > 1 && t[pos-1] == '\\' && t[pos-2] == '\\' ) {
                        // Handle '\\\n'
-                       if (isAlnumASCII(t[pos+1])) {
+                       if (isPrintableNonspace(t[pos+1]) && ((pos < 3) || isPrintableNonspace(t[pos-3]))) {
                                t.replace(pos-2, 3, " ");
                        }
                        else {
+                               // Already a space there
                                t.replace(pos-2, 3, "");
                        }
                }
@@ -3984,19 +4099,29 @@ string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const
                        }
                }
        }
-       // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify
-       // Kornel: Added textsl, textsf, textit, texttt and noun
-       // + allow to seach for colored text too
-       LYXERR(Debug::FINDVERBOSE, "Removing stale empty macros from: " << t);
-       while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", ""))
-               LYXERR(Debug::FINDVERBOSE, "  further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
-       while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
-               LYXERR(Debug::FINDVERBOSE, "  further removing stale empty \\section{}, \\part{}, \\paragraph{} macros from: " << t);
-       while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", ""));
+       return(t);
 
-       return t;
 }
+#endif
 
+string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const
+{
+       string t = convertLF2Space(s, ignore_format);
+
+       // The following replaces are not appropriate in non-format-search mode
+       if (!ignore_format) {
+               // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify
+               // Kornel: Added textsl, textsf, textit, texttt and noun
+               // + allow to seach for colored text too
+               LYXERR(Debug::FINDVERBOSE, "Removing stale empty macros from: " << t);
+               while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", ""))
+                       LYXERR(Debug::FINDVERBOSE, "  further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
+               while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
+                       LYXERR(Debug::FINDVERBOSE, "  further removing stale empty \\section{}, \\part{}, \\paragraph{} macros from: " << t);
+               while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", ""));
+       }
+       return t;
+}
 
 docstring stringifyFromCursor(DocIterator const & cur, int len)
 {
@@ -4014,13 +4139,13 @@ docstring stringifyFromCursor(DocIterator const & cur, int len)
                int option = AS_STR_INSETS | AS_STR_PLAINTEXT;
                if (ignoreFormats.getDeleted()) {
                        option |= AS_STR_SKIPDELETE;
-                       runparams.for_search = OutputParams::SearchWithoutDeleted;
+                       runparams.find_set_feature(OutputParams::SearchWithoutDeleted);
                }
                else {
-                       runparams.for_search = OutputParams::SearchWithDeleted;
+                       runparams.find_set_feature(OutputParams::SearchWithDeleted);
                }
                if (ignoreFormats.getNonContent()) {
-                       runparams.for_search |= OutputParams::SearchNonOutput;
+                       runparams.find_add_feature(OutputParams::SearchNonOutput);
                }
                LYXERR(Debug::FINDVERBOSE, "Stringifying with cur: "
                       << cur << ", from pos: " << cur.pos() << ", end: " << end);
@@ -4067,13 +4192,13 @@ docstring latexifyFromCursor(DocIterator const & cur, int len)
        runparams.nice = false;
        setFindParams(runparams);
        if (ignoreFormats.getDeleted()) {
-               runparams.for_search = OutputParams::SearchWithoutDeleted;
+               runparams.find_set_feature(OutputParams::SearchWithoutDeleted);
        }
        else {
-               runparams.for_search = OutputParams::SearchWithDeleted;
+               runparams.find_set_feature(OutputParams::SearchWithDeleted);
        }
        if (ignoreFormats.getNonContent()) {
-               runparams.for_search |= OutputParams::SearchNonOutput;
+               runparams.find_add_feature(OutputParams::SearchNonOutput);
        }
 
        if (cur.inTexted()) {
@@ -4220,7 +4345,10 @@ MatchResult findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Mat
                displayMres(mres, "Start with negative match", cur);
                max_match = mres;
        }
-       if (max_match.match_len <= 0) return fail;
+       // Only now we are really at_begin
+       if ((max_match.match_len <= 0) ||
+           (match.opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()))
+               return fail;
        LYXERR(Debug::FINDVERBOSE, "Ok");
 
        // Compute the match length