X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Flyxfind.cpp;h=93e454fcf10d058a9c86a15a97485cb2a8f1b4e7;hb=3a1b19c5c363428f424180270e32bc8b468ea54f;hp=677b382b8453a3e880ee84188305703371bb5a6f;hpb=d87511308bdcca7545cef5f9c63a7fbde6ccf9b0;p=lyx.git diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp index 677b382b84..93e454fcf1 100644 --- a/src/lyxfind.cpp +++ b/src/lyxfind.cpp @@ -31,6 +31,7 @@ #include "Paragraph.h" #include "Text.h" #include "Encoding.h" +#include "Language.h" #include "frontends/Application.h" #include "frontends/alert.h" @@ -49,7 +50,7 @@ #include "support/lstrings.h" #include "support/textutils.h" -#include +#include #include //#define ResultsDebug @@ -71,6 +72,9 @@ using namespace lyx::support; namespace lyx { +typedef unordered_map AccentsMap; +typedef unordered_map::const_iterator AccentsIterator; +static AccentsMap accents = unordered_map(); // Helper class for deciding what should be ignored class IgnoreFormats { @@ -98,6 +102,10 @@ class IgnoreFormats { /// bool getLanguage() const { return ignoreLanguage_; } /// + bool getDeleted() const { return ignoreDeleted_; } + /// + void setIgnoreDeleted(bool value); + /// void setIgnoreFormat(string const & type, bool value); private: @@ -121,9 +129,10 @@ private: bool ignoreColor_ = false; /// bool ignoreLanguage_ = false; + /// + bool ignoreDeleted_ = true; }; - void IgnoreFormats::setIgnoreFormat(string const & type, bool value) { if (type == "color") { @@ -159,6 +168,9 @@ void IgnoreFormats::setIgnoreFormat(string const & type, bool value) else if (type == "strike") { ignoreStrikeOut_ = value; } + else if (type == "deleted") { + ignoreDeleted_ = value; + } } // The global variable that can be changed from outside @@ -376,7 +388,7 @@ pair replaceOne(BufferView * bv, docstring searchstr, // This causes a minor bug as undo will restore this selection, // which the user did not create (#8986). cur.innerText()->selectWord(cur, WHOLE_WORD); - searchstr = cur.selectionAsString(false); + searchstr = cur.selectionAsString(false, true); } // if we still don't have a search string, report the error @@ -385,7 +397,7 @@ pair replaceOne(BufferView * bv, docstring searchstr, return make_pair(false, 0); bool have_selection = cur.selection(); - docstring const selected = cur.selectionAsString(false); + docstring const selected = cur.selectionAsString(false, true); bool match = case_sens ? searchstr == selected @@ -468,8 +480,7 @@ bool lyxfind(BufferView * bv, FuncRequest const & ev) } -bool lyxreplace(BufferView * bv, - FuncRequest const & ev, bool has_deleted) +bool lyxreplace(BufferView * bv, FuncRequest const & ev) { if (!bv || ev.action() != LFUN_WORD_REPLACE) return false; @@ -491,40 +502,31 @@ bool lyxreplace(BufferView * bv, bool update = false; - if (!has_deleted) { - int replace_count = 0; - if (all) { - replace_count = replaceAll(bv, search, rplc, casesensitive, matchword); - update = replace_count > 0; - } else { - pair rv = - replaceOne(bv, search, rplc, casesensitive, matchword, forward, findnext); - update = rv.first; - replace_count = rv.second; - } + int replace_count = 0; + if (all) { + replace_count = replaceAll(bv, search, rplc, casesensitive, matchword); + update = replace_count > 0; + } else { + pair rv = + replaceOne(bv, search, rplc, casesensitive, matchword, forward, findnext); + update = rv.first; + replace_count = rv.second; + } - Buffer const & buf = bv->buffer(); - if (!update) { - // emit message signal. - buf.message(_("String not found.")); + Buffer const & buf = bv->buffer(); + if (!update) { + // emit message signal. + buf.message(_("String not found.")); + } else { + if (replace_count == 0) { + buf.message(_("String found.")); + } else if (replace_count == 1) { + buf.message(_("String has been replaced.")); } else { - if (replace_count == 0) { - buf.message(_("String found.")); - } else if (replace_count == 1) { - buf.message(_("String has been replaced.")); - } else { - docstring const str = - bformat(_("%1$d strings have been replaced."), replace_count); - buf.message(str); - } + docstring const str = + bformat(_("%1$d strings have been replaced."), replace_count); + buf.message(str); } - } else if (findnext) { - // if we have deleted characters, we do not replace at all, but - // rather search for the next occurence - if (findOne(bv, search, casesensitive, matchword, forward, true, findnext)) - update = true; - else - bv->message(_("String not found.")); } return update; } @@ -648,140 +650,111 @@ namespace { typedef vector > Escapes; -/// A map of symbols and their escaped equivalent needed within a regex. -/// @note Beware of order -Escapes const & get_regexp_escapes() -{ - typedef std::pair P; - - static Escapes escape_map; - if (escape_map.empty()) { - escape_map.push_back(P("$", "_x_$")); - escape_map.push_back(P("{", "_x_{")); - escape_map.push_back(P("}", "_x_}")); - escape_map.push_back(P("[", "_x_[")); - escape_map.push_back(P("]", "_x_]")); - escape_map.push_back(P("(", "_x_(")); - escape_map.push_back(P(")", "_x_)")); - escape_map.push_back(P("+", "_x_+")); - escape_map.push_back(P("*", "_x_*")); - escape_map.push_back(P(".", "_x_.")); - escape_map.push_back(P("\\", "(?:\\\\|\\\\backslash)")); - escape_map.push_back(P("~", "(?:\\\\textasciitilde|\\\\sim)")); - escape_map.push_back(P("^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\textasciicircum|\\\\mathcircumflex)")); - escape_map.push_back(P("_x_", "\\")); - } - return escape_map; -} - -/// A map of lyx escaped strings and their unescaped equivalent. -Escapes const & get_lyx_unescapes() +string string2regex(string in) { - typedef std::pair P; - - static Escapes escape_map; - if (escape_map.empty()) { - escape_map.push_back(P("\\%", "%")); - escape_map.push_back(P("\\{", "{")); - escape_map.push_back(P("\\}", "}")); - escape_map.push_back(P("\\mathcircumflex ", "^")); - escape_map.push_back(P("\\mathcircumflex", "^")); - escape_map.push_back(P("\\backslash ", "\\")); - escape_map.push_back(P("\\backslash", "\\")); - escape_map.push_back(P("\\sim ", "~")); - escape_map.push_back(P("\\sim", "~")); + static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\$\\])" }; + string temp = std::regex_replace(in, specialChars, R"(\$&)" ); + string temp2(""); + size_t lastpos = 0; + size_t fl_pos = 0; + int offset = 1; + while (fl_pos < temp.size()) { + fl_pos = temp.find("\\\\foreignlanguage", lastpos + offset); + if (fl_pos == string::npos) + break; + offset = 16; + temp2 += temp.substr(lastpos, fl_pos - lastpos); + temp2 += "\\n"; + lastpos = fl_pos; } - return escape_map; -} - -/// A map of escapes turning a regexp matching text to one matching latex. -Escapes const & get_regexp_latex_escapes() -{ - typedef std::pair P; - - static Escapes escape_map; - if (escape_map.empty()) { - escape_map.push_back(P("\\\\", "(?:\\\\\\\\|\\\\backslash|\\\\textbackslash\\{\\}|\\\\textbackslash)")); - escape_map.push_back(P("(first << " as " << it->second); - unsigned int pos = 0; - while (pos < s.length() && (pos = s.find(it->first, pos)) < s.length()) { - s.replace(pos, it->first.length(), it->second); - LYXERR(Debug::FIND, "After escape: " << s); - pos += it->second.length(); -// LYXERR(Debug::FIND, "pos: " << pos); + /* Convert \backslash => \ + * and \{, \}, \[, \] => {, }, [, ] + */ + string s(""); + regex wordre("(\\\\)*(\\\\((backslash|mathcircumflex) ?|[\\[\\]\\{\\}]))"); + size_t lastpos = 0; + smatch sub; + bool backslashed = false; + for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) { + sub = *it; + string replace; + if ((sub.position(2) - sub.position(0)) % 2 == 1) { + continue; + } + else { + if (sub.str(4) == "backslash") { + replace = "\\"; + if (withformat) { + // transforms '\backslash \{' into '\{' + // and '\{' into '{' + string next = t.substr(sub.position(2) + sub.str(2).length(), 2); + if ((next == "\\{") || (next == "\\}")) { + replace = ""; + backslashed = true; + } + } + } + else if (sub.str(4) == "mathcircumflex") + replace = "^"; + else if (backslashed) { + backslashed = false; + if (withformat && (sub.str(3) == "{")) + replace = accents["braceleft"]; + else if (withformat && (sub.str(3) == "}")) + replace = accents["braceright"]; + else { + // else part should not exist + LASSERT(1, /**/); + } + } + else + replace = sub.str(3); } + if (lastpos < (size_t) sub.position(2)) + s += t.substr(lastpos, sub.position(2) - lastpos); + s += replace; + lastpos = sub.position(2) + sub.length(2); } - LYXERR(Debug::FIND, "Escaped : '" << s << "'"); + if (lastpos == 0) + return t; + else if (lastpos < t.length()) + s += t.substr(lastpos, t.length() - lastpos); return s; } - /// Within \regexp{} apply get_lyx_unescapes() only (i.e., preserve regexp semantics of the string), /// while outside apply get_lyx_unescapes()+get_regexp_escapes(). /// If match_latex is true, then apply regexp_latex_escapes() to \regexp{} contents as well. -string escape_for_regex(string s, bool match_latex) +string escape_for_regex(string s, bool withformat) { - size_t pos = 0; - while (pos < s.size()) { - size_t new_pos = s.find("\\regexp{", pos); - if (new_pos == string::npos) - new_pos = s.size(); - string t; - if (new_pos > pos) { - // outside regexp - LYXERR(Debug::FIND, "new_pos: " << new_pos); - t = apply_escapes(s.substr(pos, new_pos - pos), get_lyx_unescapes()); - LYXERR(Debug::FIND, "t [lyx]: " << t); - t = apply_escapes(t, get_regexp_escapes()); - LYXERR(Debug::FIND, "t [rxp]: " << t); - s.replace(pos, new_pos - pos, t); - new_pos = pos + t.size(); - LYXERR(Debug::FIND, "Regexp after escaping: " << s); - LYXERR(Debug::FIND, "new_pos: " << new_pos); - if (new_pos == s.size()) - break; - } - // Might fail if \\endregexp{} is preceeded by unexpected stuff (weird escapes) - size_t end_pos = s.find("\\endregexp{}}", new_pos + 8); - LYXERR(Debug::FIND, "end_pos: " << end_pos); - t = s.substr(new_pos + 8, end_pos - (new_pos + 8)); - LYXERR(Debug::FIND, "t in regexp : " << t); - t = apply_escapes(t, get_lyx_unescapes()); - LYXERR(Debug::FIND, "t in regexp after unescapes [lyx]: " << t); - if (match_latex) { - t = apply_escapes(t, get_regexp_latex_escapes()); - LYXERR(Debug::FIND, "t in regexp after latex_escapes [ltx]: " << t); + size_t lastpos = 0; + string result = ""; + while (lastpos < s.size()) { + size_t regex_pos = s.find("\\regexp{", lastpos); + if (regex_pos == string::npos) { + regex_pos = s.size(); } - if (end_pos == s.size()) { - s.replace(new_pos, end_pos - new_pos, t); - LYXERR(Debug::FIND, "Regexp after \\regexp{} removal: " << s); - break; + if (regex_pos > lastpos) { + result += string2regex(s.substr(lastpos, regex_pos-lastpos)); + lastpos = regex_pos; + if (lastpos == s.size()) + break; } - s.replace(new_pos, end_pos + 13 - new_pos, t); - LYXERR(Debug::FIND, "Regexp after \\regexp{...\\endregexp{}} removal: " << s); - pos = new_pos + t.size(); - LYXERR(Debug::FIND, "pos: " << pos); + size_t end_pos = s.find("\\endregexp{}}", regex_pos + 8); + result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)), withformat); + lastpos = end_pos + 13; } - return s; + return result; } @@ -799,62 +772,6 @@ bool regex_replace(string const & s, string & t, string const & searchstr, return rv; } -#if 0 -/** Checks if supplied string segment is well-formed from the standpoint of matching open-closed braces. - ** - ** Verify that closed braces exactly match open braces. This avoids that, for example, - ** \frac{.*}{x} matches \frac{x+\frac{y}{x}}{z} with .* being 'x+\frac{y'. - ** - ** @param unmatched - ** Number of open braces that must remain open at the end for the verification to succeed. - **/ -#if QTSEARCH -bool braces_match(QString const & beg, - int unmatched = 0) -#else -bool braces_match(string const & beg, - int unmatched = 0) -#endif -{ - int open_pars = 0; -#if QTSEARCH - LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg.toStdString() << "'"); -#else - LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg << "'"); -#endif - int lastidx = beg.size(); - for (int i=0; i < lastidx; ++i) { - // Skip escaped braces in the count -#if QTSEARCH - QChar c = beg.at(i); -#else - char c = beg.at(i); -#endif - if (c == '\\') { - ++i; - if (i >= lastidx) - break; - } else if (c == '{') { - ++open_pars; - } else if (c == '}') { - if (open_pars == 0) { - LYXERR(Debug::FIND, "Found unmatched closed brace"); - return false; - } else - --open_pars; - } - } - if (open_pars != unmatched) { - LYXERR(Debug::FIND, "Found " << open_pars - << " instead of " << unmatched - << " unmatched open braces at the end of count"); - return false; - } - LYXERR(Debug::FIND, "Braces match as expected"); - return true; -} -#endif - class MatchResult { public: enum range { @@ -868,22 +785,21 @@ public: int pos; int leadsize; int pos_len; + int searched_size; vector result = vector (); - MatchResult(int len = 0): match_len(len),match_prefix(0),match2end(0), pos(0),leadsize(0),pos_len(-1) {}; + MatchResult(int len = 0): match_len(len),match_prefix(0),match2end(0), pos(0),leadsize(0),pos_len(-1),searched_size(0) {}; }; static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres) { - int range = oldres.match_len; - if (range < 2) range = 2; - if (newres.match2end < oldres.match2end - oldres.match_len) + if (newres.match2end < oldres.match2end) return MatchResult::newIsTooFar; if (newres.match_len < oldres.match_len) return MatchResult::newIsTooFar; - if ((newres.match_len == oldres.match_len) && - (newres.match2end < oldres.match2end + range) && - (newres.match2end > oldres.match2end - range)) { - return MatchResult::newIsBetter; + + if (newres.match_len == oldres.match_len) { + if (newres.match2end == oldres.match2end) + return MatchResult::newIsBetter; } return MatchResult::newIsInvalid; } @@ -991,7 +907,10 @@ static docstring buffer_to_latex(Buffer & buffer) runparams.linelen = 10000; //lyxrc.plaintext_linelen; // No side effect of file copying and image conversion runparams.dryrun = true; - runparams.for_search = true; + if (ignoreFormats.getDeleted()) + runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + else + runparams.for_searchAdv = OutputParams::SearchWithDeleted; pit_type const endpit = buffer.paragraphs().size(); for (pit_type pit = 0; pit != endpit; ++pit) { TeXOnePar(buffer, buffer.text(), pit, os, runparams); @@ -1013,16 +932,23 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co runparams.flavor = Flavor::XeTeX; runparams.linelen = 10000; //lyxrc.plaintext_linelen; runparams.dryrun = true; - runparams.for_search = true; + int option = AS_STR_INSETS |AS_STR_PLAINTEXT; + if (ignoreFormats.getDeleted()) { + option |= AS_STR_SKIPDELETE; + runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + } + else { + runparams.for_searchAdv = OutputParams::SearchWithDeleted; + } for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) { Paragraph const & par = buffer.paragraphs().at(pit); LYXERR(Debug::FIND, "Adding to search string: '" << par.asString(pos_type(0), par.size(), - AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, + option, &runparams) << "'"); str += par.asString(pos_type(0), par.size(), - AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, + option, &runparams); } // Even in ignore-format we have to remove "\text{}, \lyxmathsym{}" parts @@ -1348,9 +1274,6 @@ void Intervall::addIntervall(int low, int upper) } } -typedef map AccentsMap; -static AccentsMap accents = map(); - static void buildaccent(string n, string param, string values) { stringstream s(n); @@ -1428,6 +1351,9 @@ static void buildAccentsMap() accents["i"] = "ı"; accents["jmath"] = "ȷ"; accents["cdot"] = "·"; + accents["textasciicircum"] = "^"; + accents["mathcircumflex"] = "^"; + accents["sim"] = "~"; accents["guillemotright"] = "»"; accents["guillemotleft"] = "«"; accents["hairspace"] = getutf8(0xf0000); // select from free unicode plane 15 @@ -1446,6 +1372,8 @@ static void buildAccentsMap() accents["latexe"] = getutf8(0xf0013); accents["LaTeXe"] = getutf8(0xf0013); accents["lyxarrow"] = getutf8(0xf0020); + accents["braceleft"] = getutf8(0xf0030); + accents["braceright"] = getutf8(0xf0031); accents["backslash lyx"] = getutf8(0xf0010); // Used logos inserted with starting \backslash accents["backslash LyX"] = getutf8(0xf0010); accents["backslash tex"] = getutf8(0xf0011); @@ -1520,13 +1448,14 @@ void Intervall::removeAccents() buildAccentsMap(); static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|" "cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}" - "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|guillemot(left|right))(?![a-zA-Z]))"); + "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|(brace|guillemot)(left|right)|textasciicircum|mathcircumflex|sim)(?![a-zA-Z]))"); smatch sub; for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) { sub = *itacc; string key = sub.str(1); - if (accents.find(key) != accents.end()) { - string val = accents[key]; + AccentsIterator it_ac = accents.find(key); + if (it_ac != accents.end()) { + string val = it_ac->second; size_t pos = sub.position(size_t(0)); for (size_t i = 0; i < val.size(); i++) { par[pos+i] = val[i]; @@ -1601,9 +1530,10 @@ int Intervall::nextNotIgnored(int start) const return start; } -typedef map KeysMap; +typedef unordered_map KeysMap; +typedef unordered_map::const_iterator KeysIterator; typedef vector< KeyInfo> Entries; -static KeysMap keys = map(); +static KeysMap keys = unordered_map(); class LatexInfo { private: @@ -1863,9 +1793,10 @@ void LatexInfo::buildEntries(bool isPatternString) key = sub.str(2); } } - if (keys.find(key) != keys.end()) { - if (keys[key].keytype == KeyInfo::headRemove) { - KeyInfo found1 = keys[key]; + KeysIterator it_key = keys.find(key); + if (it_key != keys.end()) { + if (it_key->second.keytype == KeyInfo::headRemove) { + KeyInfo found1 = it_key->second; found1.disabled = true; found1.head = "\\" + key + "{"; found1._tokenstart = sub.position(size_t(2)); @@ -1899,7 +1830,7 @@ void LatexInfo::buildEntries(bool isPatternString) mi.incrEntry(); math_pos = mi.getStartPos(); } - if (keys.find(key) == keys.end()) { + if (it_key == keys.end()) { found = KeyInfo(KeyInfo::isStandard, 0, true); LYXERR(Debug::INFO, "Undefined key " << key << " ==> will be used as text"); found = KeyInfo(KeyInfo::isText, 0, false); @@ -2048,6 +1979,10 @@ void LatexInfo::buildEntries(bool isPatternString) key += interval_.par.substr(params, optend-params); evaluatingOptional = true; optionalEnd = optend; + if (found.keytype == KeyInfo::isSectioning) { + // Remove optional values (but still keep in header) + interval_.addIntervall(params, optend); + } } string token = sub.str(7); int closings; @@ -2083,10 +2018,10 @@ void LatexInfo::buildEntries(bool isPatternString) } else { found._dataStart = found._tokenstart + found._tokensize; - } + } closings = 0; } - if (interval_.par.substr(found._dataStart-1, 15).compare("\\endarguments{}") == 0) { + if (interval_.par.substr(found._dataStart, 15).compare("\\endarguments{}") == 0) { found._dataStart += 15; } size_t endpos; @@ -2532,7 +2467,12 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) case KeyInfo::doRemove: { // Remove the key with all parameters and following spaces size_t pos; - for (pos = actual._dataEnd+1; pos < interval_.par.length(); pos++) { + size_t start; + if (interval_.par[actual._dataEnd-1] == ' ') + start = actual._dataEnd; + else + start = actual._dataEnd+1; + for (pos = start; pos < interval_.par.length(); pos++) { if ((interval_.par[pos] != ' ') && (interval_.par[pos] != '%')) break; } @@ -2834,7 +2774,7 @@ string splitOnKnownMacros(string par, bool isPatternString) * Resulting modified string is set to "", if * the searched tex does not contain all the features in the search pattern */ -static string correctlanguagesetting(string par, bool isPatternString, bool withformat) +static string correctlanguagesetting(string par, bool isPatternString, bool withformat, lyx::Buffer *pbuf = nullptr) { static Features regex_f; static int missed = 0; @@ -2854,8 +2794,24 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with // Split the latex input into pieces which // can be digested by our search engine LYXERR(Debug::FIND, "input: \"" << par << "\""); + if (isPatternString && (pbuf != nullptr)) { // Check if we should disable/enable test for language + // We check for polyglossia, because in runparams.flavor we use Flavor::XeTeX + string doclang = pbuf->params().language->polyglossia(); + static regex langre("\\\\(foreignlanguage)\\{([^\\}]+)\\}"); + smatch sub; + bool toIgnoreLang = true; + for (sregex_iterator it(par.begin(), par.end(), langre), end; it != end; ++it) { + sub = *it; + if (sub.str(2) != doclang) { + toIgnoreLang = false; + break; + } + } + setIgnoreFormat("language", toIgnoreLang); + + } result = splitOnKnownMacros(par.substr(0,parlen), isPatternString); - LYXERR(Debug::FIND, "After split: \"" << result << "\""); + LYXERR(Debug::FIND, "After splitOnKnownMacros:\n\"" << result << "\""); } else result = par.substr(0, parlen); @@ -2884,6 +2840,7 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with return ""; } } + } else { // LYXERR(Debug::INFO, "No regex formats"); @@ -2989,8 +2946,11 @@ static void modifyRegexForMatchWord(string &t) s += "\\S"; lastpos = sub.position(2) + sub.length(2); } - if (lastpos == 0) + if (lastpos == 0) { + s = "\\b" + t + "\\b"; + t = s; return; + } else if (lastpos < t.length()) s += t.substr(lastpos, t.length() - lastpos); t = "\\b" + s + "\\b"; @@ -2999,7 +2959,6 @@ static void modifyRegexForMatchWord(string &t) MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) : p_buf(&buf), p_first_buf(&buf), opt(opt) { - static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\\])" }; Buffer & find_buf = *theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true); docstring const & ds = stringifySearchBuffer(find_buf, opt); use_regexp = lyx::to_utf8(ds).find("\\regexp{") != std::string::npos; @@ -3018,7 +2977,8 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) size_t lead_size = 0; // correct the language settings - par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat); + par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat, &buf); + opt.matchAtStart = false; if (!use_regexp) { identifyClosing(par_as_string); // Removes math closings ($, ], ...) at end of string if (opt.ignoreformat) { @@ -3028,9 +2988,9 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) lead_size = identifyLeading(par_as_string); } lead_as_string = par_as_string.substr(0, lead_size); - string lead_as_regex_string = std::regex_replace(lead_as_string, specialChars, R"(\$&)" ); + string lead_as_regex_string = string2regex(lead_as_string); par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size); - string par_as_regex_string_nolead = std::regex_replace(par_as_string_nolead, specialChars, R"(\$&)" ); + string par_as_regex_string_nolead = string2regex(par_as_string_nolead); /* Handle whole words too in this case */ if (opt.matchword) { @@ -3041,6 +3001,8 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) string regexp2_str = "(" + lead_as_regex_string + ")(.*?)" + par_as_regex_string_nolead; CreateRegexp(opt, regexp_str, regexp2_str); use_regexp = true; + LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); + LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); return; } @@ -3056,31 +3018,20 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) { string lead_as_regexp; if (lead_size > 0) { - lead_as_regexp = std::regex_replace(par_as_string.substr(0, lead_size), specialChars, R"(\$&)" ); + lead_as_regexp = string2regex(par_as_string.substr(0, lead_size)); + regex_replace(par_as_string_nolead, par_as_string_nolead, "}$", ""); par_as_string = par_as_string_nolead; LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'"); LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); } - LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'"); + // LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'"); par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat); // Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them. - LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); - if ( - // Insert .* before trailing '\$' ('$' has been escaped by escape_for_regex) - regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\$)\\'", "$1(.*?)$2") - // Insert .* before trailing '\\\]' ('\]' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])( \\\\\\\\\\\\\\])\\'", "$1(.*?)$2") - // Insert .* before trailing '\\end\{...}' ('\end{...}' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, - "(.*[^\\\\])( \\\\\\\\end\\\\\\{[a-zA-Z_]*)(\\\\\\*)?(\\\\\\})\\'", "$1(.*?)$2$3$4") - // Insert .* before trailing '\}' ('}' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\})\\'", "$1(.*?)$2") - ) { - ++close_wildcards; - } + // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); + ++close_wildcards; + size_t lng = par_as_string.size(); if (!opt.ignoreformat) { // Remove extra '\}' at end if not part of \{\.\} - size_t lng = par_as_string.size(); while(lng > 2) { if (par_as_string.substr(lng-2, 2).compare("\\}") == 0) { if (lng >= 6) { @@ -3095,16 +3046,16 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) } if (lng < par_as_string.size()) par_as_string = par_as_string.substr(0,lng); - if ((lng > 0) && (par_as_string[0] == '^')) { - par_as_string = par_as_string.substr(1); - --lng; - opt.matchstart = true; - } } - LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); - LYXERR(Debug::FIND, "Open braces: " << open_braces); - LYXERR(Debug::FIND, "Close .*? : " << close_wildcards); - LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string); + LYXERR(Debug::FIND, "par_as_string after correctRegex is '" << par_as_string << "'"); + if ((lng > 0) && (par_as_string[0] == '^')) { + par_as_string = par_as_string.substr(1); + --lng; + opt.matchAtStart = true; + } + // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); + // LYXERR(Debug::FIND, "Open braces: " << open_braces); + // LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string); // If entered regexp must match at begin of searched string buffer // Kornel: Added parentheses to use $1 for size of the leading string @@ -3119,13 +3070,6 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) string dest = "\\" + std::to_string(i+2); while (regex_replace(par_as_string, par_as_string, orig, dest)); } - /* opt.matchword is ignored if using regex - so expanding par_as_string with "\\b" seems appropriate here - if regex contains for instance '.*' or '.+' - 1.) Nothing to do, if 'par_as_string' contains "\\b" already. - (Means, that the user knows how to handle whole words - 2.) else replace '.' with "\\S" and wrap the regex with "\\b" - */ if (opt.matchword) { modifyRegexForMatchWord(par_as_string); opt.matchword = false; @@ -3139,93 +3083,37 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) } } -#if 0 -// Count number of characters in string -// {]} ==> 1 -// \& ==> 1 -// --- ==> 1 -// \\[a-zA-Z]+ ==> 1 -#if QTSEARCH -static int computeSize(QStringRef s, int len) -#define isLyxAlpha(arg) arg.isLetter() -#else -static int computeSize(string s, int len) -#define isLyxAlpha(arg) isalpha(arg) -#endif -{ - if (len == 0) - return 0; - int skip = 1; - int count = 0; - for (int i = 0; i < len; i += skip, count++) { - if (s.at(i) == '\\') { - skip = 2; - if (i + 1 < len && isLyxAlpha(s.at(i+1))) { - for (int j = 2; i+j < len; j++) { - if (! isLyxAlpha(s.at(i+j))) { - if (s.at(i+j) == ' ') - skip++; - else if (s.at(i+j) == '{') { - if (i+j+1 < len && s.at(i+j+1) == '}') - skip += 2; - else if (i + j + 1 >= len) - skip++; - } - break; - } - skip++; - } - } - } - else if (s.at(i) == '{') { - if (i + 1 < len && s.at(i+1) == '}') - skip = 2; - else - skip = 3; - } - else if (s.at(i) == '-') { - if (i+1 < len && s.at(i+1) == '-') { - if (i + 2 < len && s.at(i+2) == '-') - skip = 3; - else - skip = 2; - } - else - skip = 1; - } - else { - skip = 1; - } - } - return count; -} -#endif - MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const { MatchResult mres; + mres.searched_size = len; if (at_begin && (opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) ) return mres; docstring docstr = stringifyFromForSearch(opt, cur, len); string str; - if (use_regexp || opt.casesensitive) - str = normalize(docstr); - else - str = normalize(lowercase(docstr)); + str = normalize(docstr); if (!opt.ignoreformat) { str = correctlanguagesetting(str, false, !opt.ignoreformat); + // remove closing '}' and '\n' to allow for use of '$' in regex + size_t lng = str.size(); + while ((lng > 1) && ((str[lng -1] == '}') || (str[lng -1] == '\n'))) + lng--; + if (lng != str.size()) { + str = str.substr(0, lng); + } } if (str.empty()) { mres.match_len = -1; return mres; } - LYXERR(Debug::FIND, "Matching against '" << lyx::to_utf8(docstr) << "'"); - LYXERR(Debug::FIND, "After normalization: '" << str << "'"); + LYXERR(Debug::FIND, "After normalization: Matching against:\n'" << str << "'"); - if (use_regexp) { + LASSERT(use_regexp, /**/); + { + // use_regexp always true LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin); #if QTSEARCH QString qstr = QString::fromStdString(str); @@ -3279,23 +3167,25 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be // we have to 'unify' the length of the post-match. // Done by ignoring closing parenthesis and linefeeds at string end int matchend = match.capturedEnd(0); - while (mres.match_len > 0) { - QChar c = qstr.at(matchend - 1); - if ((c == '\n') || (c == '}') || (c == '{')) { - mres.match_len--; - matchend--; - } - else - break; - } size_t strsize = qstr.size(); - while (strsize > (size_t) match.capturedEnd(0)) { - QChar c = qstr.at(strsize-1); - if ((c == '\n') || (c == '}')) { - --strsize; + if (!opt.ignoreformat) { + while (mres.match_len > 0) { + QChar c = qstr.at(matchend - 1); + if ((c == '\n') || (c == '}') || (c == '{')) { + mres.match_len--; + matchend--; + } + else + break; + } + while (strsize > (size_t) match.capturedEnd(0)) { + QChar c = qstr.at(strsize-1); + if ((c == '\n') || (c == '}')) { + --strsize; + } + else + break; } - else - break; } // LYXERR0(qstr.toStdString()); mres.match2end = strsize - matchend; @@ -3306,22 +3196,24 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be // ignore closing parenthesis and linefeeds at string end size_t strend = m[0].second - m[0].first; int matchend = strend; - while (mres.match_len > 0) { - char c = str.at(matchend - 1); - if ((c == '\n') || (c == '}') || (c == '{')) { - mres.match_len--; - matchend--; - } - else - break; - } size_t strsize = str.size(); - while (strsize > strend) { - if ((str.at(strsize-1) == '}') || (str.at(strsize-1) == '\n')) { - --strsize; + if (!opt.ignoreformat) { + while (mres.match_len > 0) { + char c = str.at(matchend - 1); + if ((c == '\n') || (c == '}') || (c == '{')) { + mres.match_len--; + matchend--; + } + else + break; + } + while (strsize > strend) { + if ((str.at(strsize-1) == '}') || (str.at(strsize-1) == '\n')) { + --strsize; + } + else + break; } - else - break; } // LYXERR0(str); mres.match2end = strsize - matchend; @@ -3349,34 +3241,6 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be #endif return mres; } - - // else !use_regexp: but all code paths above return - LYXERR(Debug::FIND, "Searching in normal mode: par_as_string='" - << par_as_string << "', str='" << str << "'"); - LYXERR(Debug::FIND, "Searching in normal mode: lead_as_string='" - << lead_as_string << "', par_as_string_nolead='" - << par_as_string_nolead << "'"); - - if (at_begin) { - LYXERR(Debug::FIND, "size=" << par_as_string.size() - << ", substr='" << str.substr(0, par_as_string.size()) << "'"); - if (str.substr(0, par_as_string.size()) == par_as_string) { - mres.match_len = par_as_string.size(); - mres.match2end = str.size(); - mres.pos = 0; - return mres; - } - } else { - // Start the search _after_ the leading part - size_t pos = str.find(par_as_string_nolead, lead_as_string.size()); - if (pos != string::npos) { - mres.match_len = par_as_string.size(); - mres.match2end = str.size() - pos; - mres.pos = pos; - return mres; - } - } - return mres; } @@ -3386,9 +3250,9 @@ MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at int res = mres.match_len; LYXERR(Debug::FIND, "res=" << res << ", at_begin=" << at_begin - << ", matchstart=" << opt.matchstart + << ", matchAtStart=" << opt.matchAtStart << ", inTexted=" << cur.inTexted()); - if (opt.matchstart) { + if (opt.matchAtStart) { if (cur.pos() != 0) mres.match_len = 0; else if (mres.match_prefix > 0) @@ -3397,43 +3261,6 @@ MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at } else return mres; - /* DEAD CODE follows - if (res == 0 || !at_begin || !opt.matchword || !cur.inTexted()) - return mres; - if ((len > 0) && (res < len)) { - mres.match_len = 0; - return mres; - } - Paragraph const & par = cur.paragraph(); - bool ws_left = (cur.pos() > 0) - ? par.isWordSeparator(cur.pos() - 1) - : true; - bool ws_right; - if (len < 0) - ws_right = true; - else { - ws_right = (cur.pos() + len < par.size()) - ? par.isWordSeparator(cur.pos() + len) - : true; - } - LYXERR(Debug::FIND, - "cur.pos()=" << cur.pos() << ", res=" << res - << ", separ: " << ws_left << ", " << ws_right - << ", len: " << len - << endl); - if (ws_left && ws_right) { - // Check for word separators inside the found 'word' - for (int i = 0; i < len; i++) { - if (par.isWordSeparator(cur.pos() + i)) { - mres.match_len = 0; - return mres; - } - } - return mres; - } - mres.match_len = 0; - return mres; - */ } #if 0 @@ -3495,7 +3322,7 @@ string MatchStringAdv::normalize(docstring const & s) const // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify // Kornel: Added textsl, textsf, textit, texttt and noun // + allow to seach for colored text too - LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, \\*section{} macros from: " << t); + LYXERR(Debug::FIND, "Removing stale empty macros from: " << t); while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", "")) LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", "")) @@ -3522,11 +3349,18 @@ docstring stringifyFromCursor(DocIterator const & cur, int len) runparams.linelen = 10000; //lyxrc.plaintext_linelen; // No side effect of file copying and image conversion runparams.dryrun = true; - runparams.for_search = true; + int option = AS_STR_INSETS | AS_STR_PLAINTEXT; + if (ignoreFormats.getDeleted()) { + option |= AS_STR_SKIPDELETE; + runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + } + else { + runparams.for_searchAdv = OutputParams::SearchWithDeleted; + } LYXERR(Debug::FIND, "Stringifying with cur: " << cur << ", from pos: " << cur.pos() << ", end: " << end); return par.asString(cur.pos(), end, - AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, + option, &runparams); } else if (cur.inMathed()) { CursorSlice cs = cur.top(); @@ -3554,9 +3388,11 @@ docstring stringifyFromCursor(DocIterator const & cur, int len) */ docstring latexifyFromCursor(DocIterator const & cur, int len) { + /* LYXERR(Debug::FIND, "Latexifying with len=" << len << " from cursor at pos: " << cur); LYXERR(Debug::FIND, " with cur.lastpost=" << cur.lastpos() << ", cur.lastrow=" << cur.lastrow() << ", cur.lastcol=" << cur.lastcol()); + */ Buffer const & buf = *cur.buffer(); odocstringstream ods; @@ -3568,7 +3404,12 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) runparams.linelen = 8000; //lyxrc.plaintext_linelen; // No side effect of file copying and image conversion runparams.dryrun = true; - runparams.for_search = true; + if (ignoreFormats.getDeleted()) { + runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + } + else { + runparams.for_searchAdv = OutputParams::SearchWithDeleted; + } if (cur.inTexted()) { // @TODO what about searching beyond/across paragraph breaks ? @@ -3623,90 +3464,98 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) #if defined(ResultsDebug) // Debugging output -static void displayMResult(MatchResult &mres, int increment) +static void displayMResult(MatchResult &mres, string from, DocIterator & cur) { - LYXERR0( "pos: " << mres.pos << " increment " << increment); - LYXERR0( "leadsize: " << mres.leadsize); - LYXERR0( "match_len: " << mres.match_len); - LYXERR0( "match_prefix: " << mres.match_prefix); - LYXERR0( "match2end: " << mres.match2end); - LYXERR0( "pos_len: " << mres.pos_len); // Set in finalize - for (size_t i = 0; i < mres.result.size(); i++) - LYXERR0( "Match " << i << " = \"" << mres.result[i] << "\""); -} - #define displayMres(s,i) displayMResult(s,i); -#else - #define displayMres(s,i) -#endif - -static bool findAdvForwardInnermost(DocIterator & cur) -{ - size_t d; - DocIterator old_cur(cur.buffer()); - int forwardCount = 0; - do { - d = cur.depth(); - old_cur = cur; - cur.forwardPos(); - if (!cur) { - break; - } - if (cur.depth() > d) { - forwardCount++; - continue; + LYXERR0( "from:\t\t\t" << from); + string status; + if (mres.pos_len > 0) { + // Set in finalize + status = "FINALSEARCH"; + } + else { + if (mres.match_len > 0) { + if ((mres.match_prefix == 0) && (mres.pos == mres.leadsize)) + status = "Good Match"; + else + status = "Matched in"; } - if (cur.depth() == d) - break; - } while(1); - cur = old_cur; - if (forwardCount > 0) { - LYXERR(Debug::FIND, "Forwarded " << forwardCount << " step(s) (searching for innermost match)"); - return true;; + else + status = "MissedSearch"; } - else - return false; + + LYXERR0( status << "(" << cur.pos() << " ... " << mres.searched_size + cur.pos() << ") cur.lastpos(" << cur.lastpos() << ")"); + if ((mres.leadsize > 0) || (mres.match_len > 0) || (mres.match2end > 0)) + LYXERR0( "leadsize(" << mres.leadsize << ") match_len(" << mres.match_len << ") match2end(" << mres.match2end << ")"); + if ((mres.pos > 0) || (mres.match_prefix > 0)) + LYXERR0( "pos(" << mres.pos << ") match_prefix(" << mres.match_prefix << ")"); + for (size_t i = 0; i < mres.result.size(); i++) + LYXERR0( "Match " << i << " = \"" << mres.result[i] << "\""); } + #define displayMres(s, txt, cur) displayMResult(s, txt, cur); +#else + #define displayMres(s, txt, cur) +#endif /** Finalize an advanced find operation, advancing the cursor to the innermost ** position that matches, plus computing the length of the matching text to ** be selected ** Return the cur.pos() difference between start and end of found match **/ -MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, MatchResult const & expected = MatchResult(-1)) +MatchResult findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, MatchResult const & expected = MatchResult(-1)) { // Search the foremost position that matches (avoids find of entire math // inset when match at start of it) DocIterator old_cur(cur.buffer()); MatchResult mres; static MatchResult fail = MatchResult(); - static MatchResult max_match; + MatchResult max_match; // If (prefix_len > 0) means that forwarding 1 position will remove the complete entry // Happens with e.g. hyperlinks // either one sees "http://www.bla.bla" or nothing // so the search for "www" gives prefix_len = 7 (== sizeof("http://") // and although we search for only 3 chars, we find the whole hyperlink inset bool at_begin = (expected.match_prefix == 0); - if (findAdvForwardInnermost(cur)) { - mres = match(cur, -1, at_begin); - displayMres(mres, 0); - if (expected.match_len > 0) { + if (!match.opt.forward && match.opt.ignoreformat) { + if (expected.pos > 0) + return fail; + } + LASSERT(at_begin, /**/); + if (expected.match_len > 0 && at_begin) { + // Search for deepest match + old_cur = cur; + max_match = expected; + do { + size_t d = cur.depth(); + cur.forwardPos(); + if (!cur) + break; + if (cur.depth() < d) + break; + if (cur.depth() == d) + break; + size_t lastd = d; + while (cur && cur.depth() > lastd) { + lastd = cur.depth(); + mres = match(cur, -1, at_begin); + displayMres(mres, "Checking innermost", cur); + if (mres.match_len > 0) + break; + // maybe deeper? + cur.forwardPos(); + } if (mres.match_len < expected.match_len) - return fail; - } - else { - if (mres.match_len <= 0) - return fail; - } - max_match = mres.match_len; + break; + max_match = mres; + old_cur = cur;; + } while(1); + cur = old_cur; } - else if (expected.match_len < 0) { + else { + // (expected.match_len <= 0) mres = match(cur); /* match valid only if not searching whole words */ - displayMres(mres, 0); + displayMres(mres, "Start with negative match", cur); max_match = mres; } - else { - max_match = expected; - } if (max_match.match_len <= 0) return fail; LYXERR(Debug::FIND, "Ok"); @@ -3714,43 +3563,27 @@ MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Ma int len = 1; if (cur.pos() + len > cur.lastpos()) return fail; - // regexp should use \w+, \S+, or \b(some string)\b - // to search for whole words - if (match.opt.matchword && !match.use_regexp) { - LYXERR(Debug::FIND, "verifying unmatch with len = " << len); - while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) { - ++len; - LYXERR(Debug::FIND, "verifying unmatch with len = " << len); - } - // Length of matched text (different from len param) - static MatchResult old_match = match(cur, len, at_begin); - if (old_match.match_len < 0) - old_match = fail; - MatchResult new_match; - // Greedy behaviour while matching regexps - while ((new_match = match(cur, len + 1, at_begin)).match_len > old_match.match_len) { - ++len; - old_match = new_match; - LYXERR(Debug::FIND, "verifying match with len = " << len); - } - return old_match; - } - else { + + LASSERT(match.use_regexp, /**/); + { int minl = 1; int maxl = cur.lastpos() - cur.pos(); // Greedy behaviour while matching regexps while (maxl > minl) { MatchResult mres2; mres2 = match(cur, len, at_begin); - displayMres(mres2, len); - int actual_match = mres2.match_len; - if (actual_match >= max_match.match_len) { - // actual_match > max_match _can_ happen, + displayMres(mres2, "Finalize loop", cur); + int actual_match_len = mres2.match_len; + if (actual_match_len >= max_match.match_len) { + // actual_match_len > max_match _can_ happen, // if the search area splits // some following word so that the regex // (e.g. 'r.*r\b' matches 'r' from the middle of the // splitted word) // This means, the len value is too big + actual_match_len = max_match.match_len; + max_match = mres2; + max_match.match_len = actual_match_len; maxl = len; if (maxl - minl < 4) len = (int)((maxl + minl)/2); @@ -3758,11 +3591,12 @@ MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Ma len = (int)(minl + (maxl - minl + 3)/4); } else { - // (actual_match < max_match) + // (actual_match_len < max_match.match_len) minl = len + 1; len = (int)((maxl + minl)/2); } } + len = minl; old_cur = cur; // Search for real start of matched characters while (len > 1) { @@ -3782,6 +3616,7 @@ MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Ma // Ha, got it! The shorter selection has the same match length len--; old_cur = cur; + max_match = actual_match; } else { // OK, the shorter selection matches less chars, revert to previous value @@ -3802,6 +3637,7 @@ MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Ma return fail; else { max_match.pos_len = len; + displayMres(max_match, "SEARCH RESULT", cur) return max_match; } } @@ -3812,142 +3648,109 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match) { if (!cur) return 0; + bool repeat = false; + DocIterator orig_cur; // to be used if repeat not successful + MatchResult orig_mres; while (!theApp()->longOperationCancelled() && cur) { - (void) findAdvForwardInnermost(cur); + //(void) findAdvForwardInnermost(cur); LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur); MatchResult mres = match(cur, -1, false); - displayMres(mres,-1) + string msg = "Starting"; + if (repeat) + msg = "Repeated"; + displayMres(mres, msg + " findForwardAdv", cur) int match_len = mres.match_len; if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) { LYXERR(Debug::INFO, "BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end); match_len = 0; } - if (match_len > 0) { + if (match_len <= 0) { + // This should exit nested insets, if any, or otherwise undefine the currsor. + cur.pos() = cur.lastpos(); + LYXERR(Debug::FIND, "Advancing pos: cur=" << cur); + cur.forwardPos(); + } + else { // match_len > 0 // Try to find the begin of searched string int increment; - int firstInvalid = 100000; - if (mres.match_prefix + mres.pos - mres.leadsize > 1) - increment = (mres.match_prefix + mres.pos - mres.leadsize + 1)*3/4; - else - increment = 10; + int firstInvalid = cur.lastpos() - cur.pos(); + { + int incrmatch = (mres.match_prefix + mres.pos - mres.leadsize + 1)*3/4; + int incrcur = (firstInvalid + 1 )*3/4; + if (incrcur < incrmatch) + increment = incrcur; + else + increment = incrmatch; + if (increment < 1) + increment = 1; + } LYXERR(Debug::FIND, "Set increment to " << increment); while (increment > 0) { DocIterator old_cur = cur; - for (int i = 0; i < increment && cur; cur.forwardPos(), i++) { - } - if (! cur || (cur.pit() > old_cur.pit())) { - // Are we outside of the paragraph? - // This can happen if moving past some UTF8-encoded chars - cur = old_cur; + if (cur.pos() + increment >= cur.lastpos()) { increment /= 2; + continue; } - else { - MatchResult mres2 = match(cur, -1, false); - displayMres(mres2,increment) - switch (interpretMatch(mres, mres2)) { + cur.pos() = cur.pos() + increment; + MatchResult mres2 = match(cur, -1, false); + displayMres(mres2, "findForwardAdv loop", cur) + switch (interpretMatch(mres, mres2)) { case MatchResult::newIsTooFar: - // behind the expected match - firstInvalid = increment; - cur = old_cur; - increment /= 2; - break; + // behind the expected match + firstInvalid = increment; + cur = old_cur; + increment /= 2; + break; case MatchResult::newIsBetter: - // not reached ye, but cur.pos()+increment is bettert - mres = mres2; - firstInvalid -= increment; - if (increment > firstInvalid*3/4) - increment = firstInvalid*3/4; - if ((mres2.pos == mres2.leadsize) && (increment >= mres2.match_prefix)) { - if (increment >= mres2.match_prefix) - increment = (mres2.match_prefix+1)*3/4; - } - break; + // not reached yet, but cur.pos()+increment is bettert + mres = mres2; + firstInvalid -= increment; + if (increment > firstInvalid*3/4) + increment = firstInvalid*3/4; + if ((mres2.pos == mres2.leadsize) && (increment >= mres2.match_prefix)) { + if (increment >= mres2.match_prefix) + increment = (mres2.match_prefix+1)*3/4; + } + break; default: - // Todo@ - // Handle not like MatchResult::newIsTooFar - // LYXERR0( "Something is wrong: Increment = " << increment << " match_prefix = " << mres.match_prefix); - firstInvalid--; - increment = increment*3/4; - cur = old_cur; - break; - } + // Todo@ + // Handle not like MatchResult::newIsTooFar + LYXERR0( "Probably too far: Increment = " << increment << " match_prefix = " << mres.match_prefix); + firstInvalid--; + increment = increment*3/4; + cur = old_cur; + break; } } - // LYXERR0("Leaving first loop"); - { - LYXERR(Debug::FIND, "Finalizing 1"); - MatchResult found_match = findAdvFinalize(cur, match, mres); - if (found_match.match_len > 0) { - LASSERT(found_match.pos_len > 0, /**/); - match.FillResults(found_match); - return found_match.pos_len; - } - else { - // try next possible match - cur.forwardPos(); - continue; - } - } - // The following code is newer reached - // but parts of it may be needed in future - int match_len_zero_count = 0; - MatchResult mres3; - for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) { - if (i++ > 3) { - mres3 = match(cur, -1, false); - displayMres(mres3, 1) - int remaining_len = mres3.match_len; - if (remaining_len <= 0) { - // Apparently the searched string is not in the remaining part - break; - } - else { - i = 0; - } - } - LYXERR(Debug::FIND, "Advancing cur: " << cur); - mres3 = match(cur, 1); - displayMres(mres3, 1) - int match_len3 = mres3.match_len; - if (match_len3 < 0) + if (mres.match_len > 0) { + if (mres.match_prefix + mres.pos - mres.leadsize > 0) { + // The match seems to indicate some deeper level + repeat = true; + orig_cur = cur; + orig_mres = mres; + cur.forwardPos(); continue; - mres3 = match(cur); - displayMres(mres3, 1) - int match_len2 = mres3.match_len; - LYXERR(Debug::FIND, "match_len2: " << match_len2); - if (match_len2 > 0) { - // Sometimes in finalize we understand it wasn't a match - // and we need to continue the outest loop - LYXERR(Debug::FIND, "Finalizing 2"); - MatchResult mres4 = findAdvFinalize(cur, match, mres.match_len); - if (mres4.match_len > 0) { - match.FillResults(mres4); - LASSERT(mres4.pos_len > 0, /**/); - return mres4.pos_len; - } - } - if (match_len2 > 0) - match_len_zero_count = 0; - else if (match_len2 == 0) - match_len_zero_count++; - if (match_len2 < 0) { - if (++match_len_zero_count > 3) { - LYXERR(Debug::FIND, "match_len2_zero_count: " << match_len_zero_count << ", match_len was " << match_len); - } - break; } } - if (!cur) - return 0; - } - if (match_len >= 0 && cur.pit() < cur.lastpit()) { - LYXERR(Debug::FIND, "Advancing par: cur=" << cur); - cur.forwardPar(); - } else { - // This should exit nested insets, if any, or otherwise undefine the currsor. - cur.pos() = cur.lastpos(); - LYXERR(Debug::FIND, "Advancing pos: cur=" << cur); - cur.forwardPos(); + else if (repeat) { + // should never be reached. + cur = orig_cur; + mres = orig_mres; + } + // LYXERR0("Leaving first loop"); + LYXERR(Debug::FIND, "Finalizing 1"); + MatchResult found_match = findAdvFinalize(cur, match, mres); + if (found_match.match_len > 0) { + LASSERT(found_match.pos_len > 0, /**/); + match.FillResults(found_match); + return found_match.pos_len; + } + else { + // try next possible match + cur.forwardPos(); + repeat = false; + continue; + } } } return 0; @@ -3955,11 +3758,12 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match) /// Find the most backward consecutive match within same paragraph while searching backwards. -MatchResult &findMostBackwards(DocIterator & cur, MatchStringAdv const & match) +MatchResult findMostBackwards(DocIterator & cur, MatchStringAdv const & match, MatchResult &expected) { - DocIterator cur_begin = doc_iterator_begin(cur.buffer()); + DocIterator cur_begin = cur; + cur_begin.pos() = 0; DocIterator tmp_cur = cur; - static MatchResult mr = findAdvFinalize(tmp_cur, match, MatchResult(-1)); + MatchResult mr = findAdvFinalize(tmp_cur, match, expected); Inset & inset = cur.inset(); for (; cur != cur_begin; cur.backwardPos()) { LYXERR(Debug::FIND, "findMostBackwards(): cur=" << cur); @@ -3967,7 +3771,7 @@ MatchResult &findMostBackwards(DocIterator & cur, MatchStringAdv const & match) new_cur.backwardPos(); if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len) break; - MatchResult new_mr = findAdvFinalize(new_cur, match, MatchResult(-1)); + MatchResult new_mr = findAdvFinalize(new_cur, match, expected); if (new_mr.match_len == mr.match_len) break; mr = new_mr; @@ -3991,9 +3795,9 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) bool pit_changed = false; do { cur.pos() = 0; - bool found_match = (match(cur, -1, false).match_len > 0); + MatchResult found_match = match(cur, -1, false); - if (found_match) { + if (found_match.match_len > 0) { if (pit_changed) cur.pos() = cur.lastpos(); else @@ -4001,14 +3805,15 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) LYXERR(Debug::FIND, "findBackAdv2: cur: " << cur); DocIterator cur_prev_iter; do { - found_match = (match(cur).match_len > 0); + found_match = match(cur); LYXERR(Debug::FIND, "findBackAdv3: found_match=" - << found_match << ", cur: " << cur); - if (found_match) { - MatchResult found_mr = findMostBackwards(cur, match); - match.FillResults(found_mr); - LASSERT(found_mr.pos_len > 0, /**/); - return found_mr.pos_len; + << (found_match.match_len > 0) << ", cur: " << cur); + if (found_match.match_len > 0) { + MatchResult found_mr = findMostBackwards(cur, match, found_match); + if (found_mr.pos_len > 0) { + match.FillResults(found_mr); + return found_mr.pos_len; + } } // Stop if begin of document reached @@ -4112,7 +3917,6 @@ static void changeFirstCase(Buffer & buffer, TextCase first_case, TextCase other } } // namespace -#if 1 static bool replaceMatches(string &t, int maxmatchnum, vector const & replacements) { // Should replace the string "$" + std::to_string(matchnum) with replacement @@ -4140,7 +3944,6 @@ static bool replaceMatches(string &t, int maxmatchnum, vector const & r t = s; return true; } -#endif /// static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv) @@ -4328,7 +4131,7 @@ ostringstream & operator<<(ostringstream & os, FindAndReplaceOptions const & opt istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) { - LYXERR(Debug::FIND, "parsing"); + // LYXERR(Debug::FIND, "parsing"); string s; string line; getline(is, line); @@ -4340,7 +4143,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) break; getline(is, line); } - LYXERR(Debug::FIND, "file_buf_name: '" << s << "'"); + // LYXERR(Debug::FIND, "file_buf_name: '" << s << "'"); opt.find_buf_name = from_utf8(s); is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat >> opt.replace_all; is.get(); // Waste space before replace string @@ -4354,7 +4157,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) break; getline(is, line); } - LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'"); + // LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'"); opt.repl_buf_name = from_utf8(s); is >> opt.keep_case; int i; @@ -4363,9 +4166,11 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) is >> i; opt.restr = FindAndReplaceOptions::SearchRestriction(i); + /* LYXERR(Debug::FIND, "parsed: " << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' ' << opt.expandmacros << ' ' << opt.ignoreformat << ' ' << opt.keep_case << ' ' << opt.scope << ' ' << opt.restr); + */ return is; }