X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Flyxfind.cpp;h=93e454fcf10d058a9c86a15a97485cb2a8f1b4e7;hb=3a1b19c5c363428f424180270e32bc8b468ea54f;hp=ef4777b11c90c17ee38aafdaee7f5492da7adea6;hpb=e8099942c7b7464895545959b346028a298f8bd7;p=lyx.git diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp index ef4777b11c..93e454fcf1 100644 --- a/src/lyxfind.cpp +++ b/src/lyxfind.cpp @@ -31,6 +31,7 @@ #include "Paragraph.h" #include "Text.h" #include "Encoding.h" +#include "Language.h" #include "frontends/Application.h" #include "frontends/alert.h" @@ -49,8 +50,10 @@ #include "support/lstrings.h" #include "support/textutils.h" -#include +#include #include + +//#define ResultsDebug #define USE_QT_FOR_SEARCH #if defined(USE_QT_FOR_SEARCH) #include // sets QT_VERSION @@ -69,6 +72,9 @@ using namespace lyx::support; namespace lyx { +typedef unordered_map AccentsMap; +typedef unordered_map::const_iterator AccentsIterator; +static AccentsMap accents = unordered_map(); // Helper class for deciding what should be ignored class IgnoreFormats { @@ -96,6 +102,10 @@ class IgnoreFormats { /// bool getLanguage() const { return ignoreLanguage_; } /// + bool getDeleted() const { return ignoreDeleted_; } + /// + void setIgnoreDeleted(bool value); + /// void setIgnoreFormat(string const & type, bool value); private: @@ -119,9 +129,10 @@ private: bool ignoreColor_ = false; /// bool ignoreLanguage_ = false; + /// + bool ignoreDeleted_ = true; }; - void IgnoreFormats::setIgnoreFormat(string const & type, bool value) { if (type == "color") { @@ -157,6 +168,9 @@ void IgnoreFormats::setIgnoreFormat(string const & type, bool value) else if (type == "strike") { ignoreStrikeOut_ = value; } + else if (type == "deleted") { + ignoreDeleted_ = value; + } } // The global variable that can be changed from outside @@ -374,7 +388,7 @@ pair replaceOne(BufferView * bv, docstring searchstr, // This causes a minor bug as undo will restore this selection, // which the user did not create (#8986). cur.innerText()->selectWord(cur, WHOLE_WORD); - searchstr = cur.selectionAsString(false); + searchstr = cur.selectionAsString(false, true); } // if we still don't have a search string, report the error @@ -383,7 +397,7 @@ pair replaceOne(BufferView * bv, docstring searchstr, return make_pair(false, 0); bool have_selection = cur.selection(); - docstring const selected = cur.selectionAsString(false); + docstring const selected = cur.selectionAsString(false, true); bool match = case_sens ? searchstr == selected @@ -462,12 +476,11 @@ bool lyxfind(BufferView * bv, FuncRequest const & ev) bool matchword = parse_bool(howto); bool forward = parse_bool(howto); - return findOne(bv, search, casesensitive, matchword, forward, true, true); + return findOne(bv, search, casesensitive, matchword, forward, false, true); } -bool lyxreplace(BufferView * bv, - FuncRequest const & ev, bool has_deleted) +bool lyxreplace(BufferView * bv, FuncRequest const & ev) { if (!bv || ev.action() != LFUN_WORD_REPLACE) return false; @@ -489,40 +502,31 @@ bool lyxreplace(BufferView * bv, bool update = false; - if (!has_deleted) { - int replace_count = 0; - if (all) { - replace_count = replaceAll(bv, search, rplc, casesensitive, matchword); - update = replace_count > 0; - } else { - pair rv = - replaceOne(bv, search, rplc, casesensitive, matchword, forward, findnext); - update = rv.first; - replace_count = rv.second; - } + int replace_count = 0; + if (all) { + replace_count = replaceAll(bv, search, rplc, casesensitive, matchword); + update = replace_count > 0; + } else { + pair rv = + replaceOne(bv, search, rplc, casesensitive, matchword, forward, findnext); + update = rv.first; + replace_count = rv.second; + } - Buffer const & buf = bv->buffer(); - if (!update) { - // emit message signal. - buf.message(_("String not found.")); + Buffer const & buf = bv->buffer(); + if (!update) { + // emit message signal. + buf.message(_("String not found.")); + } else { + if (replace_count == 0) { + buf.message(_("String found.")); + } else if (replace_count == 1) { + buf.message(_("String has been replaced.")); } else { - if (replace_count == 0) { - buf.message(_("String found.")); - } else if (replace_count == 1) { - buf.message(_("String has been replaced.")); - } else { - docstring const str = - bformat(_("%1$d strings have been replaced."), replace_count); - buf.message(str); - } + docstring const str = + bformat(_("%1$d strings have been replaced."), replace_count); + buf.message(str); } - } else if (findnext) { - // if we have deleted characters, we do not replace at all, but - // rather search for the next occurence - if (findOne(bv, search, casesensitive, matchword, forward, true, findnext)) - update = true; - else - bv->message(_("String not found.")); } return update; } @@ -646,140 +650,111 @@ namespace { typedef vector > Escapes; -/// A map of symbols and their escaped equivalent needed within a regex. -/// @note Beware of order -Escapes const & get_regexp_escapes() +string string2regex(string in) { - typedef std::pair P; - - static Escapes escape_map; - if (escape_map.empty()) { - escape_map.push_back(P("$", "_x_$")); - escape_map.push_back(P("{", "_x_{")); - escape_map.push_back(P("}", "_x_}")); - escape_map.push_back(P("[", "_x_[")); - escape_map.push_back(P("]", "_x_]")); - escape_map.push_back(P("(", "_x_(")); - escape_map.push_back(P(")", "_x_)")); - escape_map.push_back(P("+", "_x_+")); - escape_map.push_back(P("*", "_x_*")); - escape_map.push_back(P(".", "_x_.")); - escape_map.push_back(P("\\", "(?:\\\\|\\\\backslash)")); - escape_map.push_back(P("~", "(?:\\\\textasciitilde|\\\\sim)")); - escape_map.push_back(P("^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\textasciicircum|\\\\mathcircumflex)")); - escape_map.push_back(P("_x_", "\\")); - } - return escape_map; -} - -/// A map of lyx escaped strings and their unescaped equivalent. -Escapes const & get_lyx_unescapes() -{ - typedef std::pair P; - - static Escapes escape_map; - if (escape_map.empty()) { - escape_map.push_back(P("\\%", "%")); - escape_map.push_back(P("\\{", "{")); - escape_map.push_back(P("\\}", "}")); - escape_map.push_back(P("\\mathcircumflex ", "^")); - escape_map.push_back(P("\\mathcircumflex", "^")); - escape_map.push_back(P("\\backslash ", "\\")); - escape_map.push_back(P("\\backslash", "\\")); - escape_map.push_back(P("\\sim ", "~")); - escape_map.push_back(P("\\sim", "~")); + static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\$\\])" }; + string temp = std::regex_replace(in, specialChars, R"(\$&)" ); + string temp2(""); + size_t lastpos = 0; + size_t fl_pos = 0; + int offset = 1; + while (fl_pos < temp.size()) { + fl_pos = temp.find("\\\\foreignlanguage", lastpos + offset); + if (fl_pos == string::npos) + break; + offset = 16; + temp2 += temp.substr(lastpos, fl_pos - lastpos); + temp2 += "\\n"; + lastpos = fl_pos; } - return escape_map; -} - -/// A map of escapes turning a regexp matching text to one matching latex. -Escapes const & get_regexp_latex_escapes() -{ - typedef std::pair P; - - static Escapes escape_map; - if (escape_map.empty()) { - escape_map.push_back(P("\\\\", "(?:\\\\\\\\|\\\\backslash|\\\\textbackslash\\{\\}|\\\\textbackslash)")); - escape_map.push_back(P("(first << " as " << it->second); - unsigned int pos = 0; - while (pos < s.length() && (pos = s.find(it->first, pos)) < s.length()) { - s.replace(pos, it->first.length(), it->second); - LYXERR(Debug::FIND, "After escape: " << s); - pos += it->second.length(); -// LYXERR(Debug::FIND, "pos: " << pos); + /* Convert \backslash => \ + * and \{, \}, \[, \] => {, }, [, ] + */ + string s(""); + regex wordre("(\\\\)*(\\\\((backslash|mathcircumflex) ?|[\\[\\]\\{\\}]))"); + size_t lastpos = 0; + smatch sub; + bool backslashed = false; + for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) { + sub = *it; + string replace; + if ((sub.position(2) - sub.position(0)) % 2 == 1) { + continue; + } + else { + if (sub.str(4) == "backslash") { + replace = "\\"; + if (withformat) { + // transforms '\backslash \{' into '\{' + // and '\{' into '{' + string next = t.substr(sub.position(2) + sub.str(2).length(), 2); + if ((next == "\\{") || (next == "\\}")) { + replace = ""; + backslashed = true; + } + } + } + else if (sub.str(4) == "mathcircumflex") + replace = "^"; + else if (backslashed) { + backslashed = false; + if (withformat && (sub.str(3) == "{")) + replace = accents["braceleft"]; + else if (withformat && (sub.str(3) == "}")) + replace = accents["braceright"]; + else { + // else part should not exist + LASSERT(1, /**/); + } + } + else + replace = sub.str(3); } + if (lastpos < (size_t) sub.position(2)) + s += t.substr(lastpos, sub.position(2) - lastpos); + s += replace; + lastpos = sub.position(2) + sub.length(2); } - LYXERR(Debug::FIND, "Escaped : '" << s << "'"); + if (lastpos == 0) + return t; + else if (lastpos < t.length()) + s += t.substr(lastpos, t.length() - lastpos); return s; } - /// Within \regexp{} apply get_lyx_unescapes() only (i.e., preserve regexp semantics of the string), /// while outside apply get_lyx_unescapes()+get_regexp_escapes(). /// If match_latex is true, then apply regexp_latex_escapes() to \regexp{} contents as well. -string escape_for_regex(string s, bool match_latex) +string escape_for_regex(string s, bool withformat) { - size_t pos = 0; - while (pos < s.size()) { - size_t new_pos = s.find("\\regexp{", pos); - if (new_pos == string::npos) - new_pos = s.size(); - string t; - if (new_pos > pos) { - // outside regexp - LYXERR(Debug::FIND, "new_pos: " << new_pos); - t = apply_escapes(s.substr(pos, new_pos - pos), get_lyx_unescapes()); - LYXERR(Debug::FIND, "t [lyx]: " << t); - t = apply_escapes(t, get_regexp_escapes()); - LYXERR(Debug::FIND, "t [rxp]: " << t); - s.replace(pos, new_pos - pos, t); - new_pos = pos + t.size(); - LYXERR(Debug::FIND, "Regexp after escaping: " << s); - LYXERR(Debug::FIND, "new_pos: " << new_pos); - if (new_pos == s.size()) - break; - } - // Might fail if \\endregexp{} is preceeded by unexpected stuff (weird escapes) - size_t end_pos = s.find("\\endregexp{}}", new_pos + 8); - LYXERR(Debug::FIND, "end_pos: " << end_pos); - t = s.substr(new_pos + 8, end_pos - (new_pos + 8)); - LYXERR(Debug::FIND, "t in regexp : " << t); - t = apply_escapes(t, get_lyx_unescapes()); - LYXERR(Debug::FIND, "t in regexp after unescapes [lyx]: " << t); - if (match_latex) { - t = apply_escapes(t, get_regexp_latex_escapes()); - LYXERR(Debug::FIND, "t in regexp after latex_escapes [ltx]: " << t); + size_t lastpos = 0; + string result = ""; + while (lastpos < s.size()) { + size_t regex_pos = s.find("\\regexp{", lastpos); + if (regex_pos == string::npos) { + regex_pos = s.size(); } - if (end_pos == s.size()) { - s.replace(new_pos, end_pos - new_pos, t); - LYXERR(Debug::FIND, "Regexp after \\regexp{} removal: " << s); - break; + if (regex_pos > lastpos) { + result += string2regex(s.substr(lastpos, regex_pos-lastpos)); + lastpos = regex_pos; + if (lastpos == s.size()) + break; } - s.replace(new_pos, end_pos + 13 - new_pos, t); - LYXERR(Debug::FIND, "Regexp after \\regexp{...\\endregexp{}} removal: " << s); - pos = new_pos + t.size(); - LYXERR(Debug::FIND, "pos: " << pos); + size_t end_pos = s.find("\\endregexp{}}", regex_pos + 8); + result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)), withformat); + lastpos = end_pos + 13; } - return s; + return result; } @@ -797,62 +772,6 @@ bool regex_replace(string const & s, string & t, string const & searchstr, return rv; } - -/** Checks if supplied string segment is well-formed from the standpoint of matching open-closed braces. - ** - ** Verify that closed braces exactly match open braces. This avoids that, for example, - ** \frac{.*}{x} matches \frac{x+\frac{y}{x}}{z} with .* being 'x+\frac{y'. - ** - ** @param unmatched - ** Number of open braces that must remain open at the end for the verification to succeed. - **/ -#if QTSEARCH -bool braces_match(QString const & beg, - int unmatched = 0) -#else -bool braces_match(string const & beg, - int unmatched = 0) -#endif -{ - int open_pars = 0; -#if QTSEARCH - LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg.toStdString() << "'"); -#else - LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg << "'"); -#endif - int lastidx = beg.size(); - for (int i=0; i < lastidx; ++i) { - // Skip escaped braces in the count -#if QTSEARCH - QChar c = beg.at(i); -#else - char c = beg.at(i); -#endif - if (c == '\\') { - ++i; - if (i >= lastidx) - break; - } else if (c == '{') { - ++open_pars; - } else if (c == '}') { - if (open_pars == 0) { - LYXERR(Debug::FIND, "Found unmatched closed brace"); - return false; - } else - --open_pars; - } - } - if (open_pars != unmatched) { - LYXERR(Debug::FIND, "Found " << open_pars - << " instead of " << unmatched - << " unmatched open braces at the end of count"); - return false; - } - LYXERR(Debug::FIND, "Braces match as expected"); - return true; -} - - class MatchResult { public: enum range { @@ -865,7 +784,10 @@ public: int match2end; int pos; int leadsize; - MatchResult(): match_len(0),match_prefix(0),match2end(0), pos(0),leadsize(0) {}; + int pos_len; + int searched_size; + vector result = vector (); + MatchResult(int len = 0): match_len(len),match_prefix(0),match2end(0), pos(0),leadsize(0),pos_len(-1),searched_size(0) {}; }; static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres) @@ -874,13 +796,10 @@ static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newre return MatchResult::newIsTooFar; if (newres.match_len < oldres.match_len) return MatchResult::newIsTooFar; - if ((newres.match_len == oldres.match_len) && (newres.match2end == oldres.match2end)) - return MatchResult::newIsBetter; - if ((newres.match_len == oldres.match_len) && (newres.match2end -2 == oldres.match2end)) { - // The string contained for instance "\usepackage...fontenc ..." - // and now after moved 9 char forward contains "ge...{fontenc} ..." - // so we accept it as OK - return MatchResult::newIsBetter; + + if (newres.match_len == oldres.match_len) { + if (newres.match2end == oldres.match2end) + return MatchResult::newIsBetter; } return MatchResult::newIsInvalid; } @@ -890,7 +809,7 @@ static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newre class MatchStringAdv { public: - MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt); + MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt); /** Tests if text starting at the supplied position matches with the one provided to the MatchStringAdv ** constructor as opt.search, under the opt.* options settings. @@ -919,6 +838,7 @@ public: private: /// Auxiliary find method (does not account for opt.matchword) MatchResult findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const; + void CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string = ""); /** Normalize a stringified or latexified LyX paragraph. ** @@ -933,7 +853,7 @@ private: ** @todo Normalization should also expand macros, if the corresponding ** search option was checked. **/ - string normalize(docstring const & s, bool hack_braces) const; + string normalize(docstring const & s) const; // normalized string to search string par_as_string; // regular expression to use for searching @@ -957,8 +877,24 @@ private: public: // Are we searching with regular expressions ? bool use_regexp; + static int valid_matches; + static vector matches; + void FillResults(MatchResult &found_mr); }; +int MatchStringAdv::valid_matches = 0; +vector MatchStringAdv::matches = vector (10); + +void MatchStringAdv::FillResults(MatchResult &found_mr) +{ + if (found_mr.match_len > 0) { + valid_matches = found_mr.result.size(); + for (size_t i = 0; i < found_mr.result.size(); i++) + matches[i] = found_mr.result[i]; + } + else + valid_matches = 0; +} static docstring buffer_to_latex(Buffer & buffer) { @@ -971,7 +907,10 @@ static docstring buffer_to_latex(Buffer & buffer) runparams.linelen = 10000; //lyxrc.plaintext_linelen; // No side effect of file copying and image conversion runparams.dryrun = true; - runparams.for_search = true; + if (ignoreFormats.getDeleted()) + runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + else + runparams.for_searchAdv = OutputParams::SearchWithDeleted; pit_type const endpit = buffer.paragraphs().size(); for (pit_type pit = 0; pit != endpit; ++pit) { TeXOnePar(buffer, buffer.text(), pit, os, runparams); @@ -993,16 +932,23 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co runparams.flavor = Flavor::XeTeX; runparams.linelen = 10000; //lyxrc.plaintext_linelen; runparams.dryrun = true; - runparams.for_search = true; + int option = AS_STR_INSETS |AS_STR_PLAINTEXT; + if (ignoreFormats.getDeleted()) { + option |= AS_STR_SKIPDELETE; + runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + } + else { + runparams.for_searchAdv = OutputParams::SearchWithDeleted; + } for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) { Paragraph const & par = buffer.paragraphs().at(pit); LYXERR(Debug::FIND, "Adding to search string: '" << par.asString(pos_type(0), par.size(), - AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, + option, &runparams) << "'"); str += par.asString(pos_type(0), par.size(), - AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, + option, &runparams); } // Even in ignore-format we have to remove "\text{}, \lyxmathsym{}" parts @@ -1328,9 +1274,6 @@ void Intervall::addIntervall(int low, int upper) } } -typedef map AccentsMap; -static AccentsMap accents = map(); - static void buildaccent(string n, string param, string values) { stringstream s(n); @@ -1408,6 +1351,11 @@ static void buildAccentsMap() accents["i"] = "ı"; accents["jmath"] = "ȷ"; accents["cdot"] = "·"; + accents["textasciicircum"] = "^"; + accents["mathcircumflex"] = "^"; + accents["sim"] = "~"; + accents["guillemotright"] = "»"; + accents["guillemotleft"] = "«"; accents["hairspace"] = getutf8(0xf0000); // select from free unicode plane 15 accents["thinspace"] = getutf8(0xf0002); // and used _only_ by findadv accents["negthinspace"] = getutf8(0xf0003); // to omit backslashed latex macros @@ -1424,6 +1372,8 @@ static void buildAccentsMap() accents["latexe"] = getutf8(0xf0013); accents["LaTeXe"] = getutf8(0xf0013); accents["lyxarrow"] = getutf8(0xf0020); + accents["braceleft"] = getutf8(0xf0030); + accents["braceright"] = getutf8(0xf0031); accents["backslash lyx"] = getutf8(0xf0010); // Used logos inserted with starting \backslash accents["backslash LyX"] = getutf8(0xf0010); accents["backslash tex"] = getutf8(0xf0011); @@ -1498,13 +1448,14 @@ void Intervall::removeAccents() buildAccentsMap(); static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|" "cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}" - "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow)))(?![a-zA-Z]))"); + "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|(brace|guillemot)(left|right)|textasciicircum|mathcircumflex|sim)(?![a-zA-Z]))"); smatch sub; for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) { sub = *itacc; string key = sub.str(1); - if (accents.find(key) != accents.end()) { - string val = accents[key]; + AccentsIterator it_ac = accents.find(key); + if (it_ac != accents.end()) { + string val = it_ac->second; size_t pos = sub.position(size_t(0)); for (size_t i = 0; i < val.size(); i++) { par[pos+i] = val[i]; @@ -1579,9 +1530,10 @@ int Intervall::nextNotIgnored(int start) const return start; } -typedef map KeysMap; +typedef unordered_map KeysMap; +typedef unordered_map::const_iterator KeysIterator; typedef vector< KeyInfo> Entries; -static KeysMap keys = map(); +static KeysMap keys = unordered_map(); class LatexInfo { private: @@ -1741,8 +1693,8 @@ class MathInfo { void LatexInfo::buildEntries(bool isPatternString) { - static regex const rmath("\\$|\\\\\\[|\\\\\\]|\\\\(begin|end)\\{((eqnarray|equation|flalign|gather|multline|align|alignat)\\*?)\\}"); - static regex const rkeys("\\$|\\\\\\[|\\\\\\]|\\\\((([a-zA-Z]+\\*?)(\\{([a-z]+\\*?)\\}|=[0-9]+[a-z]+)?))"); + static regex const rmath("(\\\\)*(\\$|\\\\\\[|\\\\\\]|\\\\(begin|end)\\{((eqnarray|equation|flalign|gather|multline|align|alignat)\\*?)\\})"); + static regex const rkeys("(\\\\)*(\\$|\\\\\\[|\\\\\\]|\\\\((([a-zA-Z]+\\*?)(\\{([a-z]+\\*?)\\}|=[0-9]+[a-z]+)?)))"); static bool disableLanguageOverride = false; smatch sub, submath; bool evaluatingRegexp = false; @@ -1763,45 +1715,46 @@ void LatexInfo::buildEntries(bool isPatternString) for (sregex_iterator itmath(interval_.par.begin(), interval_.par.end(), rmath), end; itmath != end; ++itmath) { submath = *itmath; + if ((submath.position(2) - submath.position(0)) %2 == 1) { + // prefixed by odd count of '\\' + continue; + } if (math_end_waiting) { - size_t pos = submath.position(size_t(0)); + size_t pos = submath.position(size_t(2)); if ((math_end == "$") && - (submath.str(0) == "$") && - (interval_.par[pos-1] != '\\')) { + (submath.str(2) == "$")) { mi.insert("$", math_pos, pos + 1); math_end_waiting = false; } else if ((math_end == "\\]") && - (submath.str(0) == "\\]")) { + (submath.str(2) == "\\]")) { mi.insert("\\]", math_pos, pos + 2); math_end_waiting = false; } - else if ((submath.str(1).compare("end") == 0) && - (submath.str(2).compare(math_end) == 0)) { - mi.insert(math_end, math_pos, pos + submath.str(0).length()); + else if ((submath.str(3).compare("end") == 0) && + (submath.str(4).compare(math_end) == 0)) { + mi.insert(math_end, math_pos, pos + submath.str(2).length()); math_end_waiting = false; } else continue; } else { - if (submath.str(1).compare("begin") == 0) { + if (submath.str(3).compare("begin") == 0) { math_end_waiting = true; - math_end = submath.str(2); - math_pos = submath.position(size_t(0)); + math_end = submath.str(4); + math_pos = submath.position(size_t(2)); } - else if (submath.str(0).compare("\\[") == 0) { + else if (submath.str(2).compare("\\[") == 0) { math_end_waiting = true; math_end = "\\]"; - math_pos = submath.position(size_t(0)); + math_pos = submath.position(size_t(2)); } - else if (submath.str(0) == "$") { - size_t pos = submath.position(size_t(0)); - if ((pos == 0) || (interval_.par[pos-1] != '\\')) { - math_end_waiting = true; - math_end = "$"; - math_pos = pos; - } + else if (submath.str(2) == "$") { + size_t pos = submath.position(size_t(2)); + math_end_waiting = true; + math_end = "$"; + math_pos = pos; } } } @@ -1828,27 +1781,25 @@ void LatexInfo::buildEntries(bool isPatternString) math_pos = mi.getFirstPos(); for (sregex_iterator it(interval_.par.begin(), interval_.par.end(), rkeys), end; it != end; ++it) { sub = *it; - string key = sub.str(3); + if ((sub.position(2) - sub.position(0)) %2 == 1) { + // prefixed by odd count of '\\' + continue; + } + string key = sub.str(5); if (key == "") { - if (sub.str(0)[0] == '\\') - key = sub.str(0)[1]; + if (sub.str(2)[0] == '\\') + key = sub.str(2)[1]; else { - key = sub.str(0); - if (key == "$") { - size_t k_pos = sub.position(size_t(0)); - if ((k_pos > 0) && (interval_.par[k_pos - 1] == '\\')) { - // Escaped '$', ignoring - continue; - } - } + key = sub.str(2); } - }; - if (keys.find(key) != keys.end()) { - if (keys[key].keytype == KeyInfo::headRemove) { - KeyInfo found1 = keys[key]; + } + KeysIterator it_key = keys.find(key); + if (it_key != keys.end()) { + if (it_key->second.keytype == KeyInfo::headRemove) { + KeyInfo found1 = it_key->second; found1.disabled = true; found1.head = "\\" + key + "{"; - found1._tokenstart = sub.position(size_t(0)); + found1._tokenstart = sub.position(size_t(2)); found1._tokensize = found1.head.length(); found1._dataStart = found1._tokenstart + found1.head.length(); int endpos = interval_.findclosing(found1._dataStart, interval_.par.length(), '{', '}', 1); @@ -1858,10 +1809,10 @@ void LatexInfo::buildEntries(bool isPatternString) } } if (evaluatingRegexp) { - if (sub.str(1).compare("endregexp") == 0) { + if (sub.str(3).compare("endregexp") == 0) { evaluatingRegexp = false; // found._tokenstart already set - found._dataEnd = sub.position(size_t(0)) + 13; + found._dataEnd = sub.position(size_t(2)) + 13; found._dataStart = found._dataEnd; found._tokensize = found._dataEnd - found._tokenstart; found.parenthesiscount = 0; @@ -1873,13 +1824,13 @@ void LatexInfo::buildEntries(bool isPatternString) } else { if (evaluatingMath) { - if (size_t(sub.position(size_t(0))) < mi.getEndPos()) + if (size_t(sub.position(size_t(2))) < mi.getEndPos()) continue; evaluatingMath = false; mi.incrEntry(); math_pos = mi.getStartPos(); } - if (keys.find(key) == keys.end()) { + if (it_key == keys.end()) { found = KeyInfo(KeyInfo::isStandard, 0, true); LYXERR(Debug::INFO, "Undefined key " << key << " ==> will be used as text"); found = KeyInfo(KeyInfo::isText, 0, false); @@ -1895,7 +1846,7 @@ void LatexInfo::buildEntries(bool isPatternString) found = keys[key]; if (key.compare("regexp") == 0) { evaluatingRegexp = true; - found._tokenstart = sub.position(size_t(0)); + found._tokenstart = sub.position(size_t(2)); found._tokensize = 0; continue; } @@ -1904,9 +1855,9 @@ void LatexInfo::buildEntries(bool isPatternString) if (found.keytype == KeyInfo::isIgnored) continue; else if (found.keytype == KeyInfo::isMath) { - if (size_t(sub.position(size_t(0))) == math_pos) { + if (size_t(sub.position(size_t(2))) == math_pos) { found = keys[key]; - found._tokenstart = sub.position(size_t(0)); + found._tokenstart = sub.position(size_t(2)); found._tokensize = mi.getSize(); found._dataEnd = found._tokenstart + found._tokensize; found._dataStart = found._dataEnd; @@ -1921,21 +1872,21 @@ void LatexInfo::buildEntries(bool isPatternString) bool discardComment; found = keys[key]; found.keytype = KeyInfo::doRemove; - if ((sub.str(5).compare("longtable") == 0) || - (sub.str(5).compare("tabular") == 0)) { + if ((sub.str(7).compare("longtable") == 0) || + (sub.str(7).compare("tabular") == 0)) { discardComment = true; /* '%' */ } else { discardComment = false; static regex const removeArgs("^(multicols|multipar|sectionbox|subsectionbox|tcolorbox)$"); smatch sub2; - string token = sub.str(5); + string token = sub.str(7); if (regex_match(token, sub2, removeArgs)) { found.keytype = KeyInfo::removeWithArg; } } - // discard spaces before pos(0) - int pos = sub.position(size_t(0)); + // discard spaces before pos(2) + int pos = sub.position(size_t(2)); int count; for (count = 0; pos - count > 0; count++) { char c = interval_.par[pos-count-1]; @@ -1947,9 +1898,9 @@ void LatexInfo::buildEntries(bool isPatternString) break; } found._tokenstart = pos - count; - if (sub.str(1).compare(0, 5, "begin") == 0) { - size_t pos1 = pos + sub.str(0).length(); - if (sub.str(5).compare("cjk") == 0) { + if (sub.str(3).compare(0, 5, "begin") == 0) { + size_t pos1 = pos + sub.str(2).length(); + if (sub.str(7).compare("cjk") == 0) { pos1 = interval_.findclosing(pos1+1, interval_.par.length()) + 1; if ((interval_.par[pos1] == '{') && (interval_.par[pos1+1] == '}')) pos1 += 2; @@ -1982,7 +1933,7 @@ void LatexInfo::buildEntries(bool isPatternString) } else { // Handle "\end{...}" - found._dataStart = pos + sub.str(0).length(); + found._dataStart = pos + sub.str(2).length(); found._dataEnd = found._dataStart; found._tokensize = count + found._dataEnd - pos; found.parenthesiscount = 0; @@ -1992,16 +1943,16 @@ void LatexInfo::buildEntries(bool isPatternString) } } else if (found.keytype != KeyInfo::isRegex) { - found._tokenstart = sub.position(size_t(0)); + found._tokenstart = sub.position(size_t(2)); if (found.parenthesiscount == 0) { // Probably to be discarded - size_t following_pos = sub.position(size_t(0)) + sub.str(3).length() + 1; + size_t following_pos = sub.position(size_t(2)) + sub.str(5).length() + 1; char following = interval_.par[following_pos]; if (following == ' ') - found.head = "\\" + sub.str(3) + " "; + found.head = "\\" + sub.str(5) + " "; else if (following == '=') { // like \uldepth=1000pt - found.head = sub.str(0); + found.head = sub.str(2); } else found.head = "\\" + key; @@ -2028,8 +1979,12 @@ void LatexInfo::buildEntries(bool isPatternString) key += interval_.par.substr(params, optend-params); evaluatingOptional = true; optionalEnd = optend; + if (found.keytype == KeyInfo::isSectioning) { + // Remove optional values (but still keep in header) + interval_.addIntervall(params, optend); + } } - string token = sub.str(5); + string token = sub.str(7); int closings; if (interval_.par[optend] != '{') { closings = 0; @@ -2043,7 +1998,7 @@ void LatexInfo::buildEntries(bool isPatternString) } else if (found.parenthesiscount > 1) { if (token != "") { - found.head = sub.str(0) + "{"; + found.head = sub.str(2) + "{"; closings = found.parenthesiscount - 1; } else { @@ -2055,18 +2010,18 @@ void LatexInfo::buildEntries(bool isPatternString) if (found.keytype == KeyInfo::doRemove) { if (closings > 0) { size_t endpar = 2 + interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings); - if (endpar >= interval_.par.length()) - found._dataStart = interval_.par.length(); - else - found._dataStart = endpar; + if (endpar >= interval_.par.length()) + found._dataStart = interval_.par.length(); + else + found._dataStart = endpar; found._tokensize = found._dataStart - found._tokenstart; } else { found._dataStart = found._tokenstart + found._tokensize; - } + } closings = 0; } - if (interval_.par.substr(found._dataStart-1, 15).compare("\\endarguments{}") == 0) { + if (interval_.par.substr(found._dataStart, 15).compare("\\endarguments{}") == 0) { found._dataStart += 15; } size_t endpos; @@ -2179,7 +2134,6 @@ void LatexInfo::buildKeys(bool isPatternString) makeKey("textasciicircum|textasciitilde", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); makeKey("textasciiacute|texemdash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); makeKey("dots|ldots", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); - makeKey("guillemotright|guillemotleft", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); // Spaces makeKey("quad|qquad|hfill|dotfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); makeKey("textvisiblespace|nobreakspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); @@ -2513,7 +2467,12 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) case KeyInfo::doRemove: { // Remove the key with all parameters and following spaces size_t pos; - for (pos = actual._dataEnd+1; pos < interval_.par.length(); pos++) { + size_t start; + if (interval_.par[actual._dataEnd-1] == ' ') + start = actual._dataEnd; + else + start = actual._dataEnd+1; + for (pos = start; pos < interval_.par.length(); pos++) { if ((interval_.par[pos] != ' ') && (interval_.par[pos] != '%')) break; } @@ -2815,7 +2774,7 @@ string splitOnKnownMacros(string par, bool isPatternString) * Resulting modified string is set to "", if * the searched tex does not contain all the features in the search pattern */ -static string correctlanguagesetting(string par, bool isPatternString, bool withformat) +static string correctlanguagesetting(string par, bool isPatternString, bool withformat, lyx::Buffer *pbuf = nullptr) { static Features regex_f; static int missed = 0; @@ -2835,8 +2794,24 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with // Split the latex input into pieces which // can be digested by our search engine LYXERR(Debug::FIND, "input: \"" << par << "\""); + if (isPatternString && (pbuf != nullptr)) { // Check if we should disable/enable test for language + // We check for polyglossia, because in runparams.flavor we use Flavor::XeTeX + string doclang = pbuf->params().language->polyglossia(); + static regex langre("\\\\(foreignlanguage)\\{([^\\}]+)\\}"); + smatch sub; + bool toIgnoreLang = true; + for (sregex_iterator it(par.begin(), par.end(), langre), end; it != end; ++it) { + sub = *it; + if (sub.str(2) != doclang) { + toIgnoreLang = false; + break; + } + } + setIgnoreFormat("language", toIgnoreLang); + + } result = splitOnKnownMacros(par.substr(0,parlen), isPatternString); - LYXERR(Debug::FIND, "After split: \"" << result << "\""); + LYXERR(Debug::FIND, "After splitOnKnownMacros:\n\"" << result << "\""); } else result = par.substr(0, parlen); @@ -2865,6 +2840,7 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with return ""; } } + } else { // LYXERR(Debug::INFO, "No regex formats"); @@ -2897,7 +2873,90 @@ static int identifyClosing(string & t) static int num_replaced = 0; static bool previous_single_replace = true; -MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt) +void MatchStringAdv::CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string) +{ +#if QTSEARCH + // Handle \w properly + QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption; + if (! opt.casesensitive) { + popts |= QRegularExpression::CaseInsensitiveOption; + } + regexp = QRegularExpression(QString::fromStdString(regexp_str), popts); + regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts); + regexError = ""; + if (regexp.isValid() && regexp2.isValid()) { + regexIsValid = true; + // Check '{', '}' pairs inside the regex + int balanced = 0; + int skip = 1; + for (unsigned i = 0; i < par_as_string.size(); i+= skip) { + char c = par_as_string[i]; + if (c == '\\') { + skip = 2; + continue; + } + if (c == '{') + balanced++; + else if (c == '}') { + balanced--; + if (balanced < 0) + break; + } + skip = 1; + } + if (balanced != 0) { + regexIsValid = false; + regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\""; + } + } + else { + regexIsValid = false; + if (!regexp.isValid()) + regexError += "Invalid regexp \"" + regexp_str + "\", error = " + regexp.errorString().toStdString(); + else + regexError += "Invalid regexp2 \"" + regexp2_str + "\", error = " + regexp2.errorString().toStdString(); + } +#else + if (opt.casesensitive) { + regexp = regex(regexp_str); + regexp2 = regex(regexp2_str); + } + else { + regexp = regex(regexp_str, std::regex_constants::icase); + regexp2 = regex(regexp2_str, std::regex_constants::icase); + } +#endif +} + +static void modifyRegexForMatchWord(string &t) +{ + string s(""); + regex wordre("(\\\\)*((\\.|\\\\b))"); + size_t lastpos = 0; + smatch sub; + for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) { + sub = *it; + if ((sub.position(2) - sub.position(0)) % 2 == 1) { + continue; + } + else if (sub.str(2) == "\\\\b") + return; + if (lastpos < (size_t) sub.position(2)) + s += t.substr(lastpos, sub.position(2) - lastpos); + s += "\\S"; + lastpos = sub.position(2) + sub.length(2); + } + if (lastpos == 0) { + s = "\\b" + t + "\\b"; + t = s; + return; + } + else if (lastpos < t.length()) + s += t.substr(lastpos, t.length() - lastpos); + t = "\\b" + s + "\\b"; +} + +MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) : p_buf(&buf), p_first_buf(&buf), opt(opt) { Buffer & find_buf = *theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true); @@ -2912,62 +2971,67 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & previous_single_replace = true; } // When using regexp, braces are hacked already by escape_for_regex() - par_as_string = normalize(ds, !use_regexp); + par_as_string = normalize(ds); open_braces = 0; close_wildcards = 0; size_t lead_size = 0; // correct the language settings - par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat); - if (opt.ignoreformat) { - if (!use_regexp) { - // if par_as_string_nolead were emty, - // the following call to findAux will always *find* the string - // in the checked data, and thus always using the slow - // examining of the current text part. - par_as_string_nolead = par_as_string; + par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat, &buf); + opt.matchAtStart = false; + if (!use_regexp) { + identifyClosing(par_as_string); // Removes math closings ($, ], ...) at end of string + if (opt.ignoreformat) { + lead_size = 0; } - } else { + else { + lead_size = identifyLeading(par_as_string); + } + lead_as_string = par_as_string.substr(0, lead_size); + string lead_as_regex_string = string2regex(lead_as_string); + par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size); + string par_as_regex_string_nolead = string2regex(par_as_string_nolead); + /* Handle whole words too in this case + */ + if (opt.matchword) { + par_as_regex_string_nolead = "\\b" + par_as_regex_string_nolead + "\\b"; + opt.matchword = false; + } + string regexp_str = "(" + lead_as_regex_string + ")()" + par_as_regex_string_nolead; + string regexp2_str = "(" + lead_as_regex_string + ")(.*?)" + par_as_regex_string_nolead; + CreateRegexp(opt, regexp_str, regexp2_str); + use_regexp = true; + LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); + LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); + return; + } + + if (!opt.ignoreformat) { lead_size = identifyLeading(par_as_string); LYXERR(Debug::FIND, "Lead_size: " << lead_size); lead_as_string = par_as_string.substr(0, lead_size); par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size); } - if (!use_regexp) { - open_braces = identifyClosing(par_as_string); - identifyClosing(par_as_string_nolead); - LYXERR(Debug::FIND, "Open braces: " << open_braces); - LYXERR(Debug::FIND, "Built MatchStringAdv object: par_as_string = '" << par_as_string << "'"); - } else { + // Here we are using regexp + LASSERT(use_regexp, /**/); + { string lead_as_regexp; if (lead_size > 0) { - // @todo No need to search for \regexp{} insets in leading material - lead_as_regexp = escape_for_regex(par_as_string.substr(0, lead_size), !opt.ignoreformat); + lead_as_regexp = string2regex(par_as_string.substr(0, lead_size)); + regex_replace(par_as_string_nolead, par_as_string_nolead, "}$", ""); par_as_string = par_as_string_nolead; LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'"); LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); } - LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'"); + // LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'"); par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat); // Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them. - LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); - if ( - // Insert .* before trailing '\$' ('$' has been escaped by escape_for_regex) - regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\$)\\'", "$1(.*?)$2") - // Insert .* before trailing '\\\]' ('\]' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])( \\\\\\\\\\\\\\])\\'", "$1(.*?)$2") - // Insert .* before trailing '\\end\{...}' ('\end{...}' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, - "(.*[^\\\\])( \\\\\\\\end\\\\\\{[a-zA-Z_]*)(\\\\\\*)?(\\\\\\})\\'", "$1(.*?)$2$3$4") - // Insert .* before trailing '\}' ('}' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\})\\'", "$1(.*?)$2") - ) { - ++close_wildcards; - } + // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); + ++close_wildcards; + size_t lng = par_as_string.size(); if (!opt.ignoreformat) { // Remove extra '\}' at end if not part of \{\.\} - size_t lng = par_as_string.size(); while(lng > 2) { if (par_as_string.substr(lng-2, 2).compare("\\}") == 0) { if (lng >= 6) { @@ -2982,23 +3046,16 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & } if (lng < par_as_string.size()) par_as_string = par_as_string.substr(0,lng); - /* - // save '\.' - regex_replace(par_as_string, par_as_string, "\\\\\\.", "_xxbdotxx_"); - // handle '.' -> '[^]', replace later as '[^\}\{\\]' - regex_replace(par_as_string, par_as_string, "\\.", "[^]"); - // replace '[^...]' with '[^...\}\{\\]' - regex_replace(par_as_string, par_as_string, "\\[\\^([^\\\\\\]]*)\\]", "_xxbrlxx_$1\\}\\{\\\\_xxbrrxx_"); - regex_replace(par_as_string, par_as_string, "_xxbrlxx_", "[^"); - regex_replace(par_as_string, par_as_string, "_xxbrrxx_", "]"); - // restore '\.' - regex_replace(par_as_string, par_as_string, "_xxbdotxx_", "\\."); - */ } - LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); - LYXERR(Debug::FIND, "Open braces: " << open_braces); - LYXERR(Debug::FIND, "Close .*? : " << close_wildcards); - LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string); + LYXERR(Debug::FIND, "par_as_string after correctRegex is '" << par_as_string << "'"); + if ((lng > 0) && (par_as_string[0] == '^')) { + par_as_string = par_as_string.substr(1); + --lng; + opt.matchAtStart = true; + } + // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); + // LYXERR(Debug::FIND, "Open braces: " << open_braces); + // LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string); // If entered regexp must match at begin of searched string buffer // Kornel: Added parentheses to use $1 for size of the leading string @@ -3013,152 +3070,50 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & string dest = "\\" + std::to_string(i+2); while (regex_replace(par_as_string, par_as_string, orig, dest)); } + if (opt.matchword) { + modifyRegexForMatchWord(par_as_string); + opt.matchword = false; + } regexp_str = "(" + lead_as_regexp + ")()" + par_as_string; regexp2_str = "(" + lead_as_regexp + ")(.*?)" + par_as_string; } LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); -#if QTSEARCH - // Handle \w properly - QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption; - if (! opt.casesensitive) { - popts |= QRegularExpression::CaseInsensitiveOption; - } - regexp = QRegularExpression(QString::fromStdString(regexp_str), popts); - regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts); - regexError = ""; - if (regexp.isValid() && regexp2.isValid()) { - regexIsValid = true; - // Check '{', '}' pairs inside the regex - int balanced = 0; - int skip = 1; - for (unsigned i = 0; i < par_as_string.size(); i+= skip) { - char c = par_as_string[i]; - if (c == '\\') { - skip = 2; - continue; - } - if (c == '{') - balanced++; - else if (c == '}') { - balanced--; - if (balanced < 0) - break; - } - skip = 1; - } - if (balanced != 0) { - regexIsValid = false; - regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\""; - } - } - else { - regexIsValid = false; - if (!regexp.isValid()) - regexError += "Invalid regexp \"" + regexp_str + "\", error = " + regexp.errorString().toStdString(); - if (!regexp2.isValid()) - regexError += "Invalid regexp2 \"" + regexp2_str + "\", error = " + regexp2.errorString().toStdString(); - } -#else - if (opt.casesensitive) { - regexp = regex(regexp_str); - regexp2 = regex(regexp2_str); - } - else { - regexp = regex(regexp_str, std::regex_constants::icase); - regexp2 = regex(regexp2_str, std::regex_constants::icase); - } -#endif - } -} - -#if 0 -// Count number of characters in string -// {]} ==> 1 -// \& ==> 1 -// --- ==> 1 -// \\[a-zA-Z]+ ==> 1 -#if QTSEARCH -static int computeSize(QStringRef s, int len) -#define isLyxAlpha(arg) arg.isLetter() -#else -static int computeSize(string s, int len) -#define isLyxAlpha(arg) isalpha(arg) -#endif -{ - if (len == 0) - return 0; - int skip = 1; - int count = 0; - for (int i = 0; i < len; i += skip, count++) { - if (s.at(i) == '\\') { - skip = 2; - if (i + 1 < len && isLyxAlpha(s.at(i+1))) { - for (int j = 2; i+j < len; j++) { - if (! isLyxAlpha(s.at(i+j))) { - if (s.at(i+j) == ' ') - skip++; - else if (s.at(i+j) == '{') { - if (i+j+1 < len && s.at(i+j+1) == '}') - skip += 2; - else if (i + j + 1 >= len) - skip++; - } - break; - } - skip++; - } - } - } - else if (s.at(i) == '{') { - if (i + 1 < len && s.at(i+1) == '}') - skip = 2; - else - skip = 3; - } - else if (s.at(i) == '-') { - if (i+1 < len && s.at(i+1) == '-') { - if (i + 2 < len && s.at(i+2) == '-') - skip = 3; - else - skip = 2; - } - else - skip = 1; - } - else { - skip = 1; - } + CreateRegexp(opt, regexp_str, regexp2_str, par_as_string); } - return count; } -#endif MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const { MatchResult mres; + mres.searched_size = len; if (at_begin && (opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) ) return mres; docstring docstr = stringifyFromForSearch(opt, cur, len); string str; - if (use_regexp || opt.casesensitive) - str = normalize(docstr, true); - else - str = normalize(lowercase(docstr), true); + str = normalize(docstr); if (!opt.ignoreformat) { str = correctlanguagesetting(str, false, !opt.ignoreformat); + // remove closing '}' and '\n' to allow for use of '$' in regex + size_t lng = str.size(); + while ((lng > 1) && ((str[lng -1] == '}') || (str[lng -1] == '\n'))) + lng--; + if (lng != str.size()) { + str = str.substr(0, lng); + } } if (str.empty()) { mres.match_len = -1; return mres; } - LYXERR(Debug::FIND, "Matching against '" << lyx::to_utf8(docstr) << "'"); - LYXERR(Debug::FIND, "After normalization: '" << str << "'"); + LYXERR(Debug::FIND, "After normalization: Matching against:\n'" << str << "'"); - if (use_regexp) { + LASSERT(use_regexp, /**/); + { + // use_regexp always true LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin); #if QTSEARCH QString qstr = QString::fromStdString(str); @@ -3172,11 +3127,6 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be QRegularExpressionMatch match = p_regexp->match(qstr, 0, flags); if (!match.hasMatch()) return mres; - // Check braces on segments that matched all (.*?) subexpressions, - // except the last "padding" one inserted by lyx. - for (int i = 3; i < match.lastCapturedIndex(); ++i) - if (!braces_match(match.captured(i), open_braces)) - return mres; #else regex const *p_regexp; regex_constants::match_flag_type flags; @@ -3191,49 +3141,25 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be if (re_it == sregex_iterator()) return mres; match_results const & m = *re_it; - // Check braces on segments that matched all (.*?) subexpressions, - // except the last "padding" one inserted by lyx. - for (size_t i = 3; i < m.size() - 1; ++i) - if (!braces_match(m[i], open_braces)) - return mres; #endif - // Exclude from the returned match length any length - // due to close wildcards added at end of regexp - // and also the length of the leading (e.g. '\emph{}') + // Whole found string, including the leading + // std: m[0].second - m[0].first + // Qt: match.capturedEnd(0) - match.capturedStart(0) // - // Whole found string, including the leading: m[0].second - m[0].first - // Size of the leading string: m[1].second - m[1].first + // Size of the leading string + // std: m[1].second - m[1].first + // Qt: match.capturedEnd(1) - match.capturedStart(1) int leadingsize = 0; - int result; #if QTSEARCH if (match.lastCapturedIndex() > 0) { leadingsize = match.capturedEnd(1) - match.capturedStart(1); } - int lastidx = match.lastCapturedIndex(); - for (int i = 0; i <= lastidx; i++) { - LYXERR(Debug::FIND, "Match " << i << " is " << match.capturedEnd(i) - match.capturedStart(i) << " long"); - } - if (close_wildcards == 0) - result = match.capturedEnd(0) - match.capturedStart(0); - else - result = match.capturedStart(lastidx + 1 - close_wildcards) - match.capturedStart(0); #else if (m.size() > 2) { leadingsize = m[1].second - m[1].first; } - for (size_t i = 0; i < m.size(); i++) { - LYXERR(Debug::FIND, "Match " << i << " is " << m[i].second - m[i].first << " long"); - } - if (close_wildcards == 0) - result = m[0].second - m[0].first; - else - result = m[m.size() - close_wildcards].first - m[0].first; #endif - if (result > leadingsize) - result -= leadingsize; - else - result = 0; #if QTSEARCH mres.match_prefix = match.capturedEnd(2) - match.capturedStart(2); mres.match_len = match.capturedEnd(0) - match.capturedEnd(2); @@ -3241,23 +3167,25 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be // we have to 'unify' the length of the post-match. // Done by ignoring closing parenthesis and linefeeds at string end int matchend = match.capturedEnd(0); - while (mres.match_len > 0) { - QChar c = qstr.at(matchend - 1); - if ((c == '\n') || (c == '}') || (c == '{')) { - mres.match_len--; - matchend--; - } - else - break; - } size_t strsize = qstr.size(); - while (strsize > (size_t) match.capturedEnd(0)) { - QChar c = qstr.at(strsize-1); - if ((c == '\n') || (c == '}')) { - --strsize; + if (!opt.ignoreformat) { + while (mres.match_len > 0) { + QChar c = qstr.at(matchend - 1); + if ((c == '\n') || (c == '}') || (c == '{')) { + mres.match_len--; + matchend--; + } + else + break; + } + while (strsize > (size_t) match.capturedEnd(0)) { + QChar c = qstr.at(strsize-1); + if ((c == '\n') || (c == '}')) { + --strsize; + } + else + break; } - else - break; } // LYXERR0(qstr.toStdString()); mres.match2end = strsize - matchend; @@ -3268,22 +3196,24 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be // ignore closing parenthesis and linefeeds at string end size_t strend = m[0].second - m[0].first; int matchend = strend; - while (mres.match_len > 0) { - char c = str.at(matchend - 1); - if ((c == '\n') || (c == '}') || (c == '{')) { - mres.match_len--; - matchend--; - } - else - break; - } size_t strsize = str.size(); - while (strsize > strend) { - if ((str.at(strsize-1) == '}') || (str.at(strsize-1) == '\n')) { - --strsize; + if (!opt.ignoreformat) { + while (mres.match_len > 0) { + char c = str.at(matchend - 1); + if ((c == '\n') || (c == '}') || (c == '{')) { + mres.match_len--; + matchend--; + } + else + break; + } + while (strsize > strend) { + if ((str.at(strsize-1) == '}') || (str.at(strsize-1) == '\n')) { + --strsize; + } + else + break; } - else - break; } // LYXERR0(str); mres.match2end = strsize - matchend; @@ -3292,36 +3222,25 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be if (mres.match2end < 0) mres.match_len = 0; mres.leadsize = leadingsize; - return mres; - } - - // else !use_regexp: but all code paths above return - LYXERR(Debug::FIND, "Searching in normal mode: par_as_string='" - << par_as_string << "', str='" << str << "'"); - LYXERR(Debug::FIND, "Searching in normal mode: lead_as_string='" - << lead_as_string << "', par_as_string_nolead='" - << par_as_string_nolead << "'"); - - if (at_begin) { - LYXERR(Debug::FIND, "size=" << par_as_string.size() - << ", substr='" << str.substr(0, par_as_string.size()) << "'"); - if (str.substr(0, par_as_string.size()) == par_as_string) { - mres.match_len = par_as_string.size(); - mres.match2end = str.size(); - mres.pos = 0; - return mres; +#if QTSEARCH + if (mres.match_len > 0) { + string a0 = match.captured(0).mid(mres.pos + mres.match_prefix, mres.match_len).toStdString(); + mres.result.push_back(a0); + for (int i = 3; i <= match.lastCapturedIndex(); i++) { + mres.result.push_back(match.captured(i).toStdString()); + } } - } else { - // Start the search _after_ the leading part - size_t pos = str.find(par_as_string_nolead, lead_as_string.size()); - if (pos != string::npos) { - mres.match_len = par_as_string.size(); - mres.match2end = str.size() - pos; - mres.pos = pos; - return mres; +#else + if (mres.match_len > 0) { + string a0 = m[0].str().substr(mres.pos + mres.match_prefix, mres.match_len); + mres.result.push_back(a0); + for (size_t i = 3; i < m.size(); i++) { + mres.result.push_back(m[i]); + } } +#endif + return mres; } - return mres; } @@ -3331,47 +3250,45 @@ MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at int res = mres.match_len; LYXERR(Debug::FIND, "res=" << res << ", at_begin=" << at_begin - << ", matchword=" << opt.matchword + << ", matchAtStart=" << opt.matchAtStart << ", inTexted=" << cur.inTexted()); - if (res == 0 || !at_begin || !opt.matchword || !cur.inTexted()) + if (opt.matchAtStart) { + if (cur.pos() != 0) + mres.match_len = 0; + else if (mres.match_prefix > 0) + mres.match_len = 0; return mres; - if ((len > 0) && (res < len)) { - mres.match_len = 0; - return mres; - } - Paragraph const & par = cur.paragraph(); - bool ws_left = (cur.pos() > 0) - ? par.isWordSeparator(cur.pos() - 1) - : true; - bool ws_right; - if (len < 0) - ws_right = true; - else { - ws_right = (cur.pos() + len < par.size()) - ? par.isWordSeparator(cur.pos() + len) - : true; - } - LYXERR(Debug::FIND, - "cur.pos()=" << cur.pos() << ", res=" << res - << ", separ: " << ws_left << ", " << ws_right - << ", len: " << len - << endl); - if (ws_left && ws_right) { - // Check for word separators inside the found 'word' - for (int i = 0; i < len; i++) { - if (par.isWordSeparator(cur.pos() + i)) { - mres.match_len = 0; - return mres; - } - } - return mres; } - mres.match_len = 0; - return mres; + else + return mres; } +#if 0 +static bool simple_replace(string &t, string from, string to) +{ + regex repl("(\\\\)*(" + from + ")"); + string s(""); + size_t lastpos = 0; + smatch sub; + for (sregex_iterator it(t.begin(), t.end(), repl), end; it != end; ++it) { + sub = *it; + if ((sub.position(2) - sub.position(0)) % 2 == 1) + continue; + if (lastpos < (size_t) sub.position(2)) + s += t.substr(lastpos, sub.position(2) - lastpos); + s += to; + lastpos = sub.position(2) + sub.length(2); + } + if (lastpos == 0) + return false; + else if (lastpos < t.length()) + s += t.substr(lastpos, t.length() - lastpos); + t = s; + return true; +} +#endif -string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const +string MatchStringAdv::normalize(docstring const & s) const { string t; t = lyx::to_utf8(s); @@ -3405,23 +3322,12 @@ string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify // Kornel: Added textsl, textsf, textit, texttt and noun // + allow to seach for colored text too - LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, \\*section{} macros from: " << t); + LYXERR(Debug::FIND, "Removing stale empty macros from: " << t); while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", "")) LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", "")) LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", "")); - // FIXME - check what preceeds the brace - if (hack_braces) { - if (opt.ignoreformat) - while (regex_replace(t, t, "\\{", "_x_<") - || regex_replace(t, t, "\\}", "_x_>")) - LYXERR(Debug::FIND, "After {} replacement: '" << t << "'"); - else - while (regex_replace(t, t, "\\\\\\{", "_x_<") - || regex_replace(t, t, "\\\\\\}", "_x_>")) - LYXERR(Debug::FIND, "After {} replacement: '" << t << "'"); - } return t; } @@ -3443,11 +3349,18 @@ docstring stringifyFromCursor(DocIterator const & cur, int len) runparams.linelen = 10000; //lyxrc.plaintext_linelen; // No side effect of file copying and image conversion runparams.dryrun = true; - runparams.for_search = true; + int option = AS_STR_INSETS | AS_STR_PLAINTEXT; + if (ignoreFormats.getDeleted()) { + option |= AS_STR_SKIPDELETE; + runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + } + else { + runparams.for_searchAdv = OutputParams::SearchWithDeleted; + } LYXERR(Debug::FIND, "Stringifying with cur: " << cur << ", from pos: " << cur.pos() << ", end: " << end); return par.asString(cur.pos(), end, - AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, + option, &runparams); } else if (cur.inMathed()) { CursorSlice cs = cur.top(); @@ -3475,9 +3388,11 @@ docstring stringifyFromCursor(DocIterator const & cur, int len) */ docstring latexifyFromCursor(DocIterator const & cur, int len) { + /* LYXERR(Debug::FIND, "Latexifying with len=" << len << " from cursor at pos: " << cur); LYXERR(Debug::FIND, " with cur.lastpost=" << cur.lastpos() << ", cur.lastrow=" << cur.lastrow() << ", cur.lastcol=" << cur.lastcol()); + */ Buffer const & buf = *cur.buffer(); odocstringstream ods; @@ -3489,7 +3404,12 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) runparams.linelen = 8000; //lyxrc.plaintext_linelen; // No side effect of file copying and image conversion runparams.dryrun = true; - runparams.for_search = true; + if (ignoreFormats.getDeleted()) { + runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + } + else { + runparams.for_searchAdv = OutputParams::SearchWithDeleted; + } if (cur.inTexted()) { // @TODO what about searching beyond/across paragraph breaks ? @@ -3542,99 +3462,128 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) return ods.str(); } -#if 0 +#if defined(ResultsDebug) // Debugging output -static void displayMResult(MatchResult &mres, int increment) +static void displayMResult(MatchResult &mres, string from, DocIterator & cur) { - LYXERR0( "pos: " << mres.pos << " increment " << increment); - LYXERR0( "leadsize: " << mres.leadsize); - LYXERR0( "match_len: " << mres.match_len); - LYXERR0( "match_prefix: " << mres.match_prefix); - LYXERR0( "match2end: " << mres.match2end); + LYXERR0( "from:\t\t\t" << from); + string status; + if (mres.pos_len > 0) { + // Set in finalize + status = "FINALSEARCH"; + } + else { + if (mres.match_len > 0) { + if ((mres.match_prefix == 0) && (mres.pos == mres.leadsize)) + status = "Good Match"; + else + status = "Matched in"; + } + else + status = "MissedSearch"; + } + + LYXERR0( status << "(" << cur.pos() << " ... " << mres.searched_size + cur.pos() << ") cur.lastpos(" << cur.lastpos() << ")"); + if ((mres.leadsize > 0) || (mres.match_len > 0) || (mres.match2end > 0)) + LYXERR0( "leadsize(" << mres.leadsize << ") match_len(" << mres.match_len << ") match2end(" << mres.match2end << ")"); + if ((mres.pos > 0) || (mres.match_prefix > 0)) + LYXERR0( "pos(" << mres.pos << ") match_prefix(" << mres.match_prefix << ")"); + for (size_t i = 0; i < mres.result.size(); i++) + LYXERR0( "Match " << i << " = \"" << mres.result[i] << "\""); } - #define displayMres(s,i) displayMResult(s,i); + #define displayMres(s, txt, cur) displayMResult(s, txt, cur); #else - #define displayMres(s,i) + #define displayMres(s, txt, cur) #endif /** Finalize an advanced find operation, advancing the cursor to the innermost ** position that matches, plus computing the length of the matching text to ** be selected + ** Return the cur.pos() difference between start and end of found match **/ -int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, int expected_len) +MatchResult findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, MatchResult const & expected = MatchResult(-1)) { // Search the foremost position that matches (avoids find of entire math // inset when match at start of it) - size_t d; DocIterator old_cur(cur.buffer()); MatchResult mres; - do { - LYXERR(Debug::FIND, "Forwarding one step (searching for innermost match)"); - d = cur.depth(); + static MatchResult fail = MatchResult(); + MatchResult max_match; + // If (prefix_len > 0) means that forwarding 1 position will remove the complete entry + // Happens with e.g. hyperlinks + // either one sees "http://www.bla.bla" or nothing + // so the search for "www" gives prefix_len = 7 (== sizeof("http://") + // and although we search for only 3 chars, we find the whole hyperlink inset + bool at_begin = (expected.match_prefix == 0); + if (!match.opt.forward && match.opt.ignoreformat) { + if (expected.pos > 0) + return fail; + } + LASSERT(at_begin, /**/); + if (expected.match_len > 0 && at_begin) { + // Search for deepest match old_cur = cur; - cur.forwardPos(); - if (!cur) - break; - if (cur.depth() > d) - continue; - if (cur.depth() == d) - break; - mres = match(cur); - displayMres(mres, 1); - if (expected_len > 0) { - if (mres.match_len < expected_len) + max_match = expected; + do { + size_t d = cur.depth(); + cur.forwardPos(); + if (!cur) break; - } - else { - if (mres.match_len <= 0) + if (cur.depth() < d) break; - } - } while (1); - cur = old_cur; - mres = match(cur); /* match valid only if not searching whole words */ - int max_match = mres.match_len; - if (max_match <= 0) return 0; + if (cur.depth() == d) + break; + size_t lastd = d; + while (cur && cur.depth() > lastd) { + lastd = cur.depth(); + mres = match(cur, -1, at_begin); + displayMres(mres, "Checking innermost", cur); + if (mres.match_len > 0) + break; + // maybe deeper? + cur.forwardPos(); + } + if (mres.match_len < expected.match_len) + break; + max_match = mres; + old_cur = cur;; + } while(1); + cur = old_cur; + } + else { + // (expected.match_len <= 0) + mres = match(cur); /* match valid only if not searching whole words */ + displayMres(mres, "Start with negative match", cur); + max_match = mres; + } + if (max_match.match_len <= 0) return fail; LYXERR(Debug::FIND, "Ok"); // Compute the match length - int len = 1; + int len = 1; if (cur.pos() + len > cur.lastpos()) - return 0; - // regexp should use \w+, \S+, or \b(some string)\b - // to search for whole words - if (match.opt.matchword && !match.use_regexp) { - LYXERR(Debug::FIND, "verifying unmatch with len = " << len); - while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) { - ++len; - LYXERR(Debug::FIND, "verifying unmatch with len = " << len); - } - // Length of matched text (different from len param) - int old_match = match(cur, len).match_len; - if (old_match < 0) - old_match = 0; - int new_match; - // Greedy behaviour while matching regexps - while ((new_match = match(cur, len + 1).match_len) > old_match) { - ++len; - old_match = new_match; - LYXERR(Debug::FIND, "verifying match with len = " << len); - } - if (old_match == 0) - len = 0; - } - else { + return fail; + + LASSERT(match.use_regexp, /**/); + { int minl = 1; int maxl = cur.lastpos() - cur.pos(); // Greedy behaviour while matching regexps while (maxl > minl) { - int actual_match = match(cur, len).match_len; - if (actual_match >= max_match) { - // actual_match > max_match _can_ happen, + MatchResult mres2; + mres2 = match(cur, len, at_begin); + displayMres(mres2, "Finalize loop", cur); + int actual_match_len = mres2.match_len; + if (actual_match_len >= max_match.match_len) { + // actual_match_len > max_match _can_ happen, // if the search area splits // some following word so that the regex // (e.g. 'r.*r\b' matches 'r' from the middle of the // splitted word) // This means, the len value is too big + actual_match_len = max_match.match_len; + max_match = mres2; + max_match.match_len = actual_match_len; maxl = len; if (maxl - minl < 4) len = (int)((maxl + minl)/2); @@ -3642,15 +3591,16 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, int expecte len = (int)(minl + (maxl - minl + 3)/4); } else { - // (actual_match < max_match) + // (actual_match_len < max_match.match_len) minl = len + 1; len = (int)((maxl + minl)/2); } } + len = minl; old_cur = cur; // Search for real start of matched characters while (len > 1) { - int actual_match; + MatchResult actual_match; do { cur.forwardPos(); } while (cur.depth() > old_cur.depth()); /* Skip inner insets */ @@ -3661,11 +3611,12 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, int expecte } if (cur.pos() != old_cur.pos()) { // OK, forwarded 1 pos in actual inset - actual_match = match(cur, len-1).match_len; - if (actual_match == max_match) { + actual_match = match(cur, len-1, at_begin); + if (actual_match.match_len == max_match.match_len) { // Ha, got it! The shorter selection has the same match length len--; old_cur = cur; + max_match = actual_match; } else { // OK, the shorter selection matches less chars, revert to previous value @@ -3675,168 +3626,131 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, int expecte } else { LYXERR(Debug::INFO, "cur.pos() == old_cur.pos(), this should never happen"); - actual_match = match(cur, len).match_len; - if (actual_match == max_match) + actual_match = match(cur, len, at_begin); + if (actual_match.match_len == max_match.match_len) { old_cur = cur; + max_match = actual_match; + } } } - } - return len; + if (len == 0) + return fail; + else { + max_match.pos_len = len; + displayMres(max_match, "SEARCH RESULT", cur) + return max_match; + } + } } /// Finds forward -int findForwardAdv(DocIterator & cur, MatchStringAdv const & match) +int findForwardAdv(DocIterator & cur, MatchStringAdv & match) { if (!cur) return 0; + bool repeat = false; + DocIterator orig_cur; // to be used if repeat not successful + MatchResult orig_mres; while (!theApp()->longOperationCancelled() && cur) { - { - // forward to - size_t d; - DocIterator old_cur(cur.buffer()); - do { - d = cur.depth(); - old_cur = cur; - cur.forwardPos(); - if (!cur) - break; - if (cur.depth() > d) - continue; - if (cur.depth() == d) - break; - } while (1); - cur = old_cur; - } - + //(void) findAdvForwardInnermost(cur); LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur); MatchResult mres = match(cur, -1, false); - displayMres(mres,-1) + string msg = "Starting"; + if (repeat) + msg = "Repeated"; + displayMres(mres, msg + " findForwardAdv", cur) int match_len = mres.match_len; if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) { LYXERR(Debug::INFO, "BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end); match_len = 0; } - if (match_len > 0) { + if (match_len <= 0) { + // This should exit nested insets, if any, or otherwise undefine the currsor. + cur.pos() = cur.lastpos(); + LYXERR(Debug::FIND, "Advancing pos: cur=" << cur); + cur.forwardPos(); + } + else { // match_len > 0 // Try to find the begin of searched string int increment; - int firstInvalid = 100000; - if (mres.match_prefix + mres.pos - mres.leadsize > 1) - increment = (mres.match_prefix + mres.pos - mres.leadsize + 1)*3/4; - else - increment = 10; + int firstInvalid = cur.lastpos() - cur.pos(); + { + int incrmatch = (mres.match_prefix + mres.pos - mres.leadsize + 1)*3/4; + int incrcur = (firstInvalid + 1 )*3/4; + if (incrcur < incrmatch) + increment = incrcur; + else + increment = incrmatch; + if (increment < 1) + increment = 1; + } LYXERR(Debug::FIND, "Set increment to " << increment); while (increment > 0) { DocIterator old_cur = cur; - for (int i = 0; i < increment && cur; cur.forwardPos(), i++) { - } - if (! cur || (cur.pit() > old_cur.pit())) { - // Are we outside of the paragraph? - // This can happen if moving past some UTF8-encoded chars - cur = old_cur; + if (cur.pos() + increment >= cur.lastpos()) { increment /= 2; + continue; } - else { - MatchResult mres2 = match(cur, -1, false); - displayMres(mres2,increment) - switch (interpretMatch(mres, mres2)) { + cur.pos() = cur.pos() + increment; + MatchResult mres2 = match(cur, -1, false); + displayMres(mres2, "findForwardAdv loop", cur) + switch (interpretMatch(mres, mres2)) { case MatchResult::newIsTooFar: - // behind the expected match - firstInvalid = increment; - cur = old_cur; - increment /= 2; - break; + // behind the expected match + firstInvalid = increment; + cur = old_cur; + increment /= 2; + break; case MatchResult::newIsBetter: - // not reached ye, but cur.pos()+increment is bettert - mres = mres2; - firstInvalid -= increment; - if (increment > firstInvalid*3/4) - increment = firstInvalid*3/4; - if ((mres2.pos == mres2.leadsize) && (increment >= mres2.match_prefix)) { - if (increment >= mres2.match_prefix) - increment = (mres2.match_prefix+1)*3/4; - } - break; + // not reached yet, but cur.pos()+increment is bettert + mres = mres2; + firstInvalid -= increment; + if (increment > firstInvalid*3/4) + increment = firstInvalid*3/4; + if ((mres2.pos == mres2.leadsize) && (increment >= mres2.match_prefix)) { + if (increment >= mres2.match_prefix) + increment = (mres2.match_prefix+1)*3/4; + } + break; default: - // Todo@ - // Handle not like MatchResult::newIsTooFar - // LYXERR0( "Something is wrong: Increment = " << increment << " match_prefix = " << mres.match_prefix); - firstInvalid--; - increment = increment*3/4; - cur = old_cur; - break; - } + // Todo@ + // Handle not like MatchResult::newIsTooFar + LYXERR0( "Probably too far: Increment = " << increment << " match_prefix = " << mres.match_prefix); + firstInvalid--; + increment = increment*3/4; + cur = old_cur; + break; } } - // LYXERR0("Leaving first loop"); - { - LYXERR(Debug::FIND, "Finalizing 1"); - int len = findAdvFinalize(cur, match, mres.match_len); - if (len > 0) - return len; - else { - // try next possible match - cur.forwardPos(); - continue; - } - } - // The following code is newer reached - // but parts of it may be needed in future - int match_len_zero_count = 0; - MatchResult mres3; - for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) { - if (i++ > 3) { - mres3 = match(cur, -1, false); - displayMres(mres3, 1) - int remaining_len = mres3.match_len; - if (remaining_len <= 0) { - // Apparently the searched string is not in the remaining part - break; - } - else { - i = 0; - } - } - LYXERR(Debug::FIND, "Advancing cur: " << cur); - mres3 = match(cur, 1); - displayMres(mres3, 1) - int match_len3 = mres3.match_len; - if (match_len3 < 0) + if (mres.match_len > 0) { + if (mres.match_prefix + mres.pos - mres.leadsize > 0) { + // The match seems to indicate some deeper level + repeat = true; + orig_cur = cur; + orig_mres = mres; + cur.forwardPos(); continue; - mres3 = match(cur); - displayMres(mres3, 1) - int match_len2 = mres3.match_len; - LYXERR(Debug::FIND, "match_len2: " << match_len2); - if (match_len2 > 0) { - // Sometimes in finalize we understand it wasn't a match - // and we need to continue the outest loop - LYXERR(Debug::FIND, "Finalizing 2"); - int len = findAdvFinalize(cur, match, mres.match_len); - if (len > 0) { - return len; - } - } - if (match_len2 > 0) - match_len_zero_count = 0; - else if (match_len2 == 0) - match_len_zero_count++; - if (match_len2 < 0) { - if (++match_len_zero_count > 3) { - LYXERR(Debug::FIND, "match_len2_zero_count: " << match_len_zero_count << ", match_len was " << match_len); - } - break; } } - if (!cur) - return 0; - } - if (match_len >= 0 && cur.pit() < cur.lastpit()) { - LYXERR(Debug::FIND, "Advancing par: cur=" << cur); - cur.forwardPar(); - } else { - // This should exit nested insets, if any, or otherwise undefine the currsor. - cur.pos() = cur.lastpos(); - LYXERR(Debug::FIND, "Advancing pos: cur=" << cur); - cur.forwardPos(); + else if (repeat) { + // should never be reached. + cur = orig_cur; + mres = orig_mres; + } + // LYXERR0("Leaving first loop"); + LYXERR(Debug::FIND, "Finalizing 1"); + MatchResult found_match = findAdvFinalize(cur, match, mres); + if (found_match.match_len > 0) { + LASSERT(found_match.pos_len > 0, /**/); + match.FillResults(found_match); + return found_match.pos_len; + } + else { + // try next possible match + cur.forwardPos(); + repeat = false; + continue; + } } } return 0; @@ -3844,11 +3758,12 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv const & match) /// Find the most backward consecutive match within same paragraph while searching backwards. -int findMostBackwards(DocIterator & cur, MatchStringAdv const & match) +MatchResult findMostBackwards(DocIterator & cur, MatchStringAdv const & match, MatchResult &expected) { - DocIterator cur_begin = doc_iterator_begin(cur.buffer()); + DocIterator cur_begin = cur; + cur_begin.pos() = 0; DocIterator tmp_cur = cur; - int len = findAdvFinalize(tmp_cur, match, -1); + MatchResult mr = findAdvFinalize(tmp_cur, match, expected); Inset & inset = cur.inset(); for (; cur != cur_begin; cur.backwardPos()) { LYXERR(Debug::FIND, "findMostBackwards(): cur=" << cur); @@ -3856,13 +3771,13 @@ int findMostBackwards(DocIterator & cur, MatchStringAdv const & match) new_cur.backwardPos(); if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len) break; - int new_len = findAdvFinalize(new_cur, match, -1); - if (new_len == len) + MatchResult new_mr = findAdvFinalize(new_cur, match, expected); + if (new_mr.match_len == mr.match_len) break; - len = new_len; + mr = new_mr; } LYXERR(Debug::FIND, "findMostBackwards(): exiting with cur=" << cur); - return len; + return mr; } @@ -3880,9 +3795,9 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) bool pit_changed = false; do { cur.pos() = 0; - bool found_match = (match(cur, -1, false).match_len > 0); + MatchResult found_match = match(cur, -1, false); - if (found_match) { + if (found_match.match_len > 0) { if (pit_changed) cur.pos() = cur.lastpos(); else @@ -3890,11 +3805,16 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) LYXERR(Debug::FIND, "findBackAdv2: cur: " << cur); DocIterator cur_prev_iter; do { - found_match = (match(cur).match_len > 0); + found_match = match(cur); LYXERR(Debug::FIND, "findBackAdv3: found_match=" - << found_match << ", cur: " << cur); - if (found_match) - return findMostBackwards(cur, match); + << (found_match.match_len > 0) << ", cur: " << cur); + if (found_match.match_len > 0) { + MatchResult found_mr = findMostBackwards(cur, match, found_match); + if (found_mr.pos_len > 0) { + match.FillResults(found_mr); + return found_mr.pos_len; + } + } // Stop if begin of document reached if (cur == cur_begin) @@ -3995,9 +3915,36 @@ static void changeFirstCase(Buffer & buffer, TextCase first_case, TextCase other right = pit->size(); pit->changeCase(buffer.params(), pos_type(1), right, others_case); } - } // namespace +static bool replaceMatches(string &t, int maxmatchnum, vector const & replacements) +{ + // Should replace the string "$" + std::to_string(matchnum) with replacement + // if the char '$' is not prefixed with odd number of char '\\' + static regex const rematch("(\\\\)*(\\$\\$([0-9]))"); + string s; + size_t lastpos = 0; + smatch sub; + for (sregex_iterator it(t.begin(), t.end(), rematch), end; it != end; ++it) { + sub = *it; + if ((sub.position(2) - sub.position(0)) % 2 == 1) + continue; + int num = stoi(sub.str(3), nullptr, 10); + if (num >= maxmatchnum) + continue; + if (lastpos < (size_t) sub.position(2)) + s += t.substr(lastpos, sub.position(2) - lastpos); + s += replacements[num]; + lastpos = sub.position(2) + sub.length(2); + } + if (lastpos == 0) + return false; + else if (lastpos < t.length()) + s += t.substr(lastpos, t.length() - lastpos); + t = s; + return true; +} + /// static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv) { @@ -4028,6 +3975,9 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma ostringstream oss; repl_buffer_orig.write(oss); string lyx = oss.str(); + if (matchAdv.valid_matches > 0) { + replaceMatches(lyx, matchAdv.valid_matches, matchAdv.matches); + } Buffer repl_buffer("", false); repl_buffer.setUnnamed(true); LASSERT(repl_buffer.readString(lyx), return 0); @@ -4088,10 +4038,10 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma /// Perform a FindAdv operation. -bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) +bool findAdv(BufferView * bv, FindAndReplaceOptions & opt) { DocIterator cur; - int match_len = 0; + int pos_len = 0; // e.g., when invoking word-findadv from mini-buffer wither with // wrong options syntax or before ever opening advanced F&R pane @@ -4112,15 +4062,15 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) num_replaced += findAdvReplace(bv, opt, matchAdv); cur = bv->cursor(); if (opt.forward) - match_len = findForwardAdv(cur, matchAdv); + pos_len = findForwardAdv(cur, matchAdv); else - match_len = findBackwardsAdv(cur, matchAdv); + pos_len = findBackwardsAdv(cur, matchAdv); } catch (exception & ex) { - bv->message(from_ascii(ex.what())); + bv->message(from_utf8(ex.what())); return false; } - if (match_len == 0) { + if (pos_len == 0) { if (num_replaced > 0) { switch (num_replaced) { @@ -4147,8 +4097,13 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) else bv->message(_("Match found.")); - LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << match_len); - bv->putSelectionAt(cur, match_len, !opt.forward); + if (cur.pos() + pos_len > cur.lastpos()) { + // Prevent crash in bv->putSelectionAt() + // Should never happen, maybe LASSERT() here? + pos_len = cur.lastpos() - cur.pos(); + } + LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << pos_len); + bv->putSelectionAt(cur, pos_len, !opt.forward); return true; } @@ -4176,7 +4131,7 @@ ostringstream & operator<<(ostringstream & os, FindAndReplaceOptions const & opt istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) { - LYXERR(Debug::FIND, "parsing"); + // LYXERR(Debug::FIND, "parsing"); string s; string line; getline(is, line); @@ -4188,7 +4143,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) break; getline(is, line); } - LYXERR(Debug::FIND, "file_buf_name: '" << s << "'"); + // LYXERR(Debug::FIND, "file_buf_name: '" << s << "'"); opt.find_buf_name = from_utf8(s); is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat >> opt.replace_all; is.get(); // Waste space before replace string @@ -4202,7 +4157,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) break; getline(is, line); } - LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'"); + // LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'"); opt.repl_buf_name = from_utf8(s); is >> opt.keep_case; int i; @@ -4211,9 +4166,11 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) is >> i; opt.restr = FindAndReplaceOptions::SearchRestriction(i); + /* LYXERR(Debug::FIND, "parsed: " << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' ' << opt.expandmacros << ' ' << opt.ignoreformat << ' ' << opt.keep_case << ' ' << opt.scope << ' ' << opt.restr); + */ return is; }