X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Flyxfind.cpp;h=ac6366c2620e4ad655b03063078a2d32f9247dd8;hb=bf60c6106641db7b756dacfb560eabaf399fe154;hp=4df0f1e1c5334a17510d2a15559d895401685f55;hpb=441b0eab2370b8b6cbe0c0a75a1718405a4b6329;p=features.git diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp index 4df0f1e1c5..ac6366c262 100644 --- a/src/lyxfind.cpp +++ b/src/lyxfind.cpp @@ -812,6 +812,17 @@ namespace { typedef vector > Escapes; +static string getRegexSpaceCount(int count) +{ + if (count > 0) { + if (count > 1) + return "\\s{" + std::to_string(count) + "}"; + else + return "\\s"; + } + return ""; +} + string string2regex(string in) { static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\$\\])" }; @@ -833,20 +844,14 @@ string string2regex(string in) } else { if (blanks > 0) { - if (blanks > 1) - temp += "\\s+"; - else - temp += "\\s"; + temp += getRegexSpaceCount(blanks); } temp += tempx[i]; blanks = 0; } } if (blanks > 0) { - if (blanks > 1) - temp += "\\s+"; - else - temp += "\\s"; + temp += getRegexSpaceCount(blanks); } string temp2(""); @@ -887,6 +892,7 @@ string correctRegex(string t, bool withformat) buildAccentsMap(); //LYXERR0("correctRegex input '" << t << "'"); + int skip = 0; for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) { sub = *it; string replace; @@ -903,6 +909,10 @@ string correctRegex(string t, bool withformat) replace = ""; backslashed = true; } + else if (withformat && next[0] == '$') { + replace = accents["lyxdollar"]; + skip = 1; // Skip following '$' + } } } else if (sub.str(4) == "mathcircumflex") @@ -955,7 +965,8 @@ string correctRegex(string t, bool withformat) if (lastpos < (size_t) sub.position(2)) s += std::regex_replace(t.substr(lastpos, sub.position(2) - lastpos), protectedSpace, R"( )"); s += replace; - lastpos = sub.position(2) + sub.length(2); + lastpos = sub.position(2) + sub.length(2) + skip; + skip = 0; } if (lastpos == 0) s = std::regex_replace(t, protectedSpace, R"( )"); @@ -1055,13 +1066,19 @@ public: ** constructor as opt.search, under the opt.* options settings. ** ** @param at_begin - ** If set, then match is searched only against beginning of text starting at cur. - ** If unset, then match is searched anywhere in text starting at cur. + ** If set to MatchStringAdv::MatchFromStart, + ** then match is searched only against beginning of text starting at cur. + ** Otherwise the match is searched anywhere in text starting at cur. ** ** @return ** The length of the matching text, or zero if no match was found. **/ - MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const; + enum matchType { + MatchAnyPlace, + MatchFromStart + }; + string matchTypeAsString(matchType const x) const { return (x == MatchFromStart ? "MatchFromStart" : "MatchAnyPlace"); } + MatchResult operator()(DocIterator const & cur, int len, matchType at_begin) const; #if QTSEARCH bool regexIsValid; string regexError; @@ -1077,7 +1094,7 @@ public: private: /// Auxiliary find method (does not account for opt.matchword) - MatchResult findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const; + MatchResult findAux(DocIterator const & cur, int len, matchType at_begin) const; void CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string = ""); /** Normalize a stringified or latexified LyX paragraph. @@ -1153,11 +1170,11 @@ static docstring buffer_to_latex(Buffer & buffer) runparams.nice = true; setFindParams(runparams); if (ignoreFormats.getDeleted()) - runparams.for_search = OutputParams::SearchWithoutDeleted; + runparams.find_set_feature(OutputParams::SearchWithoutDeleted); else - runparams.for_search = OutputParams::SearchWithDeleted; + runparams.find_set_feature(OutputParams::SearchWithDeleted); if (ignoreFormats.getNonContent()) { - runparams.for_search |= OutputParams::SearchNonOutput; + runparams.find_add_feature(OutputParams::SearchNonOutput); } pit_type const endpit = buffer.paragraphs().size(); for (pit_type pit = 0; pit != endpit; ++pit) { @@ -1231,13 +1248,13 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co int option = AS_STR_INSETS |AS_STR_PLAINTEXT; if (ignoreFormats.getDeleted()) { option |= AS_STR_SKIPDELETE; - runparams.for_search = OutputParams::SearchWithoutDeleted; + runparams.find_set_feature(OutputParams::SearchWithoutDeleted); } else { - runparams.for_search = OutputParams::SearchWithDeleted; + runparams.find_set_feature(OutputParams::SearchWithDeleted); } if (ignoreFormats.getNonContent()) { - runparams.for_search |= OutputParams::SearchNonOutput; + runparams.find_add_feature(OutputParams::SearchNonOutput); } string t(""); for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) { @@ -1417,8 +1434,8 @@ public: string par; int ignoreidx; static vector borders; - int depts[MAXOPENED]; - int closes[MAXOPENED]; + static vector depts; + static vector closes; int actualdeptindex; int previousNotIgnored(int) const; int nextNotIgnored(int) const; @@ -1430,6 +1447,7 @@ public: void removeAccents(); void setForDefaultLang(KeyInfo const & defLang) const; int findclosing(int start, int end, char up, char down, int repeat); + void removeInvalidClosings(void); void handleParentheses(int lastpos, bool closingAllowed); bool hasTitle; // Number of disabled language specs up @@ -1442,6 +1460,8 @@ public: }; vector Intervall::borders = vector(30); +vector Intervall::depts = vector(30); +vector Intervall::closes = vector(30); int Intervall::isOpeningPar(int pos) const { @@ -1475,6 +1495,8 @@ void Intervall::setForDefaultLang(KeyInfo const & defLang) const } } +#if 0 +// Not needed, because dpts and closes are now dynamically expanded static void checkDepthIndex(int val) { static int maxdepthidx = MAXOPENED-2; @@ -1488,6 +1510,7 @@ static void checkDepthIndex(int val) LYXERR(Debug::INFO, "maxdepthidx now " << val); } } +#endif #if 0 // Not needed, because borders are now dynamically expanded @@ -1961,6 +1984,7 @@ static void buildAccentsMap() accents["braceright"] = getutf8(0xf0031); accents["lyxtilde"] = getutf8(0xf0032); accents["sim"] = getutf8(0xf0032); + accents["lyxdollar"] = getutf8(0xf0033); accents["backslash lyx"] = getutf8(0xf0010); // Used logos inserted with starting \backslash accents["backslash LyX"] = getutf8(0xf0010); accents["backslash tex"] = getutf8(0xf0011); @@ -2036,7 +2060,8 @@ void Intervall::removeAccents() if (accents.empty()) buildAccentsMap(); static regex const accre("\\\\(" - "([\\S]|[A-Za-z]+)\\{[^\\{\\}]+\\}" + "([\\S]|[A-Za-z]+)\\{[^\\\\\\{\\}]+\\}" + "|([\\S]|[A-Za-z]+)\\{\\\\[ij](math)?\\}" "|(" "(backslash ([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))" "|[A-Za-z]+" @@ -2072,9 +2097,13 @@ void Intervall::removeAccents() void Intervall::handleOpenP(int i) { actualdeptindex++; + if ((size_t) actualdeptindex >= depts.size()) { + depts.resize(actualdeptindex + 30); + closes.resize(actualdeptindex + 30); + } depts[actualdeptindex] = i+1; closes[actualdeptindex] = -1; - checkDepthIndex(actualdeptindex); + // checkDepthIndex(actualdeptindex); } void Intervall::handleCloseP(int i, bool closingAllowed) @@ -2236,6 +2265,27 @@ int Intervall::findclosing(int start, int end, char up = '{', char down = '}', i return end; } +void Intervall::removeInvalidClosings(void) +{ + // this can happen, if there are deleted parts + int skip = 0; + int depth = 0; + for (unsigned i = 0; i < par.size(); i += 1 + skip) { + char c = par[i]; + skip = 0; + if (c == '\\') skip = 1; + else if (c == '{') + depth++; + else if (c == '}') { + if (depth == 0) { + addIntervall(i, i+1); + LYXERR(Debug::FINDVERBOSE, "removed invalid closing '}' at " << i); + } + else + --depth; + } + } +} class MathInfo { class MathEntry { public: @@ -2323,6 +2373,7 @@ void LatexInfo::buildEntries(bool isPatternString) static bool removeMathHull = false; interval_.removeAccents(); + interval_.removeInvalidClosings(); for (sregex_iterator itmath(interval_.par.begin(), interval_.par.end(), rmath), end; itmath != end; ++itmath) { submath = *itmath; @@ -2728,7 +2779,8 @@ void LatexInfo::buildKeys(bool isPatternString) if (keysBuilt && !isPatternString) return; // Keys to ignore in any case - makeKey("text|textcyrillic|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true); + makeKey("text|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true); + makeKey("nonumber|notag", KeyInfo(KeyInfo::headRemove, 0, true), true); // Known standard keys with 1 parameter. // Split is done, if not at start of region makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString); @@ -3606,7 +3658,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) CreateRegexp(opt, "", "", ""); return; } - use_regexp = lyx::to_utf8(ds).find("\\regexp{") != std::string::npos; + use_regexp = ds.find(from_utf8("\\regexp{")) != std::string::npos; if (opt.replace_all && previous_single_replace) { previous_single_replace = false; num_replaced = 0; @@ -3695,7 +3747,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) break; } if (lng < par_as_string.size()) - par_as_string = par_as_string.substr(0,lng); + par_as_string.resize(lng); } LYXERR(Debug::FINDVERBOSE, "par_as_string after correctRegex is '" << par_as_string << "'"); if ((lng > 0) && (par_as_string[0] == '^')) { @@ -3733,14 +3785,11 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) } } -MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const +MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, MatchStringAdv::matchType at_begin) const { MatchResult mres; mres.searched_size = len; - if (at_begin && - (opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) ) - return mres; docstring docstr = stringifyFromForSearch(opt, cur, len); string str; @@ -3767,12 +3816,12 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be LASSERT(use_regexp, /**/); { // use_regexp always true - LYXERR(Debug::FINDVERBOSE, "Searching in regexp mode: at_begin=" << at_begin); + LYXERR(Debug::FINDVERBOSE, "Searching in regexp mode: at_begin=" << matchTypeAsString(at_begin)); #if QTSEARCH QString qstr = QString::fromStdString(str); QRegularExpression const *p_regexp; QRegularExpression::MatchType flags = QRegularExpression::NormalMatch; - if (at_begin) { + if (at_begin == MatchStringAdv::MatchFromStart) { p_regexp = ®exp; } else { p_regexp = ®exp2; @@ -3783,7 +3832,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be #else regex const *p_regexp; regex_constants::match_flag_type flags; - if (at_begin) { + if (at_begin == MatchStringAdv::MatchFromStart) { flags = regex_constants::match_continuous; p_regexp = ®exp; } else { @@ -3897,12 +3946,12 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be } -MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin) const +MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, MatchStringAdv::matchType at_begin) const { MatchResult mres = findAux(cur, len, at_begin); int res = mres.match_len; LYXERR(Debug::FINDVERBOSE, - "res=" << res << ", at_begin=" << at_begin + "res=" << res << ", at_begin=" << matchTypeAsString(at_begin) << ", matchAtStart=" << opt.matchAtStart << ", inTexted=" << cur.inTexted()); if (opt.matchAtStart) { @@ -3941,8 +3990,79 @@ static bool simple_replace(string &t, string from, string to) } #endif -string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const +#if 1 +static string convertLF2Space(docstring const &s, bool ignore_format) { + // Using original docstring to handle '\n' + + if (s.size() == 0) return ""; + stringstream t; + size_t pos; + size_t start = 0; + size_t end = s.size() - 1; + if (!ignore_format) { + while (s[start] == '\n' && start <= end) + start++; + while (end >= start && s[end] == '\n') + end--; + if (start >= end + 1) + return ""; + } + do { + bool dospace = true; + int skip = -1; + pos = s.find('\n', start); + if (pos >= end) { + t << lyx::to_utf8(s.substr(start, end + 1 - start)); + break; + } + if (!ignore_format) { + if ((pos > start + 1) && + s[pos-1] == '\\' && + s[pos-2] == '\\') { + skip = 2; + if ((pos > start + 2) && + (s[pos+1] == '~' || isSpace(s[pos+1]) || + s[pos-3] == '~' || isSpace(s[pos-3]))) { + // discard "\\\\\n", do not replace with space + dospace = false; + } + } + else if (pos > start) { + if (s[pos-1] == '%') { + skip = 1; + while ((pos > start+skip) && (s[pos-1-skip] == '%')) + skip++; + if ((pos > start+skip) && + (s[pos+1] == '~' || isSpace(s[pos+1]) || + s[pos-1-skip] == '~' || isSpace(s[pos-1-skip]))) { + // discard '%%%%%\n' + dospace = false; + } + } + else if (!isAlnumASCII(s[pos+1]) || !isAlnumASCII(s[pos-1])) { + dospace = false; + skip = 0; // remove the '\n' only + } + } + } + else { + dospace = true; + skip = 0; + } + t << lyx::to_utf8(s.substr(start, pos-skip-start)); + if (dospace) + t << ' '; + start = pos+1; + } while (start <= end); + return(t.str()); +} + +#else +static string convertLF2Space(docstring const & s, bool ignore_format) +{ + // Using utf8-converted string to handle '\n' + string t; t = lyx::to_utf8(s); // Remove \n at begin @@ -3957,10 +4077,11 @@ string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const while ((pos = t.find("\n")) != string::npos) { if (pos > 1 && t[pos-1] == '\\' && t[pos-2] == '\\' ) { // Handle '\\\n' - if (isAlnumASCII(t[pos+1])) { + if (isPrintableNonspace(t[pos+1]) && ((pos < 3) || isPrintableNonspace(t[pos-3]))) { t.replace(pos-2, 3, " "); } else { + // Already a space there t.replace(pos-2, 3, ""); } } @@ -3984,19 +4105,29 @@ string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const } } } - // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify - // Kornel: Added textsl, textsf, textit, texttt and noun - // + allow to seach for colored text too - LYXERR(Debug::FINDVERBOSE, "Removing stale empty macros from: " << t); - while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", "")) - LYXERR(Debug::FINDVERBOSE, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); - while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", "")) - LYXERR(Debug::FINDVERBOSE, " further removing stale empty \\section{}, \\part{}, \\paragraph{} macros from: " << t); - while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", "")); + return(t); - return t; } +#endif +string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const +{ + string t = convertLF2Space(s, ignore_format); + + // The following replaces are not appropriate in non-format-search mode + if (!ignore_format) { + // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify + // Kornel: Added textsl, textsf, textit, texttt and noun + // + allow to seach for colored text too + LYXERR(Debug::FINDVERBOSE, "Removing stale empty macros from: " << t); + while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", "")) + LYXERR(Debug::FINDVERBOSE, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); + while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", "")) + LYXERR(Debug::FINDVERBOSE, " further removing stale empty \\section{}, \\part{}, \\paragraph{} macros from: " << t); + while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", "")); + } + return t; +} docstring stringifyFromCursor(DocIterator const & cur, int len) { @@ -4014,13 +4145,13 @@ docstring stringifyFromCursor(DocIterator const & cur, int len) int option = AS_STR_INSETS | AS_STR_PLAINTEXT; if (ignoreFormats.getDeleted()) { option |= AS_STR_SKIPDELETE; - runparams.for_search = OutputParams::SearchWithoutDeleted; + runparams.find_set_feature(OutputParams::SearchWithoutDeleted); } else { - runparams.for_search = OutputParams::SearchWithDeleted; + runparams.find_set_feature(OutputParams::SearchWithDeleted); } if (ignoreFormats.getNonContent()) { - runparams.for_search |= OutputParams::SearchNonOutput; + runparams.find_add_feature(OutputParams::SearchNonOutput); } LYXERR(Debug::FINDVERBOSE, "Stringifying with cur: " << cur << ", from pos: " << cur.pos() << ", end: " << end); @@ -4067,13 +4198,13 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) runparams.nice = false; setFindParams(runparams); if (ignoreFormats.getDeleted()) { - runparams.for_search = OutputParams::SearchWithoutDeleted; + runparams.find_set_feature(OutputParams::SearchWithoutDeleted); } else { - runparams.for_search = OutputParams::SearchWithDeleted; + runparams.find_set_feature(OutputParams::SearchWithDeleted); } if (ignoreFormats.getNonContent()) { - runparams.for_search |= OutputParams::SearchNonOutput; + runparams.find_add_feature(OutputParams::SearchNonOutput); } if (cur.inTexted()) { @@ -4178,13 +4309,13 @@ MatchResult findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Mat // either one sees "http://www.bla.bla" or nothing // so the search for "www" gives prefix_len = 7 (== sizeof("http://") // and although we search for only 3 chars, we find the whole hyperlink inset - bool at_begin = (expected.match_prefix == 0); + MatchStringAdv::matchType at_begin = (expected.match_prefix == 0) ? MatchStringAdv::MatchFromStart : MatchStringAdv::MatchAnyPlace; if (!match.opt.forward && match.opt.ignoreformat) { if (expected.pos > 0) return fail; } - LASSERT(at_begin, /**/); - if (expected.match_len > 0 && at_begin) { + LASSERT(at_begin == MatchStringAdv::MatchFromStart, /**/); + if (expected.match_len > 0 && at_begin == MatchStringAdv::MatchFromStart) { // Search for deepest match old_cur = cur; max_match = expected; @@ -4216,11 +4347,14 @@ MatchResult findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Mat } else { // (expected.match_len <= 0) - mres = match(cur); /* match valid only if not searching whole words */ + mres = match(cur, -1, MatchStringAdv::MatchFromStart); /* match valid only if not searching whole words */ displayMres(mres, "Start with negative match", cur); max_match = mres; } - if (max_match.match_len <= 0) return fail; + // Only now we are really at_begin + if ((max_match.match_len <= 0) || + (match.opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed())) + return fail; LYXERR(Debug::FINDVERBOSE, "Ok"); // Compute the match length @@ -4318,7 +4452,7 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match) while (!theApp()->longOperationCancelled() && cur) { //(void) findAdvForwardInnermost(cur); LYXERR(Debug::FINDVERBOSE, "findForwardAdv() cur: " << cur); - MatchResult mres = match(cur, -1, false); + MatchResult mres = match(cur, -1, MatchStringAdv::MatchAnyPlace); string msg = "Starting"; if (repeat) msg = "Repeated"; @@ -4356,7 +4490,7 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match) continue; } cur.pos() = cur.pos() + increment; - MatchResult mres2 = match(cur, -1, false); + MatchResult mres2 = match(cur, -1, MatchStringAdv::MatchAnyPlace); displayMres(mres2, "findForwardAdv loop", cur) switch (interpretMatch(mres, mres2)) { case MatchResult::newIsTooFar: @@ -4432,7 +4566,8 @@ MatchResult findMostBackwards(DocIterator & cur, MatchStringAdv const & match, M LYXERR(Debug::FINDVERBOSE, "findMostBackwards(): cur=" << cur); DocIterator new_cur = cur; new_cur.backwardPos(); - if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len) + if (new_cur == cur || &new_cur.inset() != &inset + || match(new_cur, -1, MatchStringAdv::MatchFromStart).match_len <= 0) break; MatchResult new_mr = findAdvFinalize(new_cur, match, expected); if (new_mr.match_len == mr.match_len) @@ -4458,7 +4593,7 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) bool pit_changed = false; do { cur.pos() = 0; - MatchResult found_match = match(cur, -1, false); + MatchResult found_match = match(cur, -1, MatchStringAdv::MatchAnyPlace); if (found_match.match_len > 0) { if (pit_changed) @@ -4468,7 +4603,7 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) LYXERR(Debug::FINDVERBOSE, "findBackAdv2: cur: " << cur); DocIterator cur_prev_iter; do { - found_match = match(cur); + found_match = match(cur, -1, MatchStringAdv::MatchFromStart); LYXERR(Debug::FINDVERBOSE, "findBackAdv3: found_match=" << (found_match.match_len > 0) << ", cur: " << cur); if (found_match.match_len > 0) { @@ -4630,7 +4765,7 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma return 0; LASSERT(sel_len > 0, return 0); - if (!matchAdv(sel_beg, sel_len).match_len) + if (matchAdv(sel_beg, sel_len, MatchStringAdv::MatchFromStart).match_len <= 0) return 0; // Build a copy of the replace buffer, adapted to the KeepCase option