X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Flyxfind.cpp;h=6e3b324bfa66be867f2bee4d4292bfd6b2ee3422;hb=26ba2a65838731ce639a09539f617cb0f0be3b22;hp=64da365ebbac2cf86f2950fff18dc80e08665e1e;hpb=31ac9ed59ff7d4eb22dfc2520fb216baf74115b7;p=lyx.git diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp index 64da365ebb..6e3b324bfa 100644 --- a/src/lyxfind.cpp +++ b/src/lyxfind.cpp @@ -14,6 +14,7 @@ */ #include +#include #include "lyxfind.h" @@ -56,13 +57,8 @@ //#define ResultsDebug #define USE_QT_FOR_SEARCH #if defined(USE_QT_FOR_SEARCH) - #include // sets QT_VERSION - #if (QT_VERSION >= 0x050000) - #include - #define QTSEARCH 1 - #else - #define QTSEARCH 0 - #endif + #include + #define QTSEARCH 1 #else #define QTSEARCH 0 #endif @@ -88,6 +84,8 @@ class IgnoreFormats { /// bool getShape() const { return ignoreShape_; } /// + bool getSize() const { return ignoreSize_; } + /// bool getUnderline() const { return ignoreUnderline_; } /// bool getMarkUp() const { return ignoreMarkUp_; } @@ -118,6 +116,8 @@ private: /// bool ignoreShape_ = false; /// + bool ignoreSize_ = true; + /// bool ignoreUnderline_ = false; /// bool ignoreMarkUp_ = false; @@ -166,6 +166,9 @@ void IgnoreFormats::setIgnoreFormat(string const & type, bool value, bool fromUs else if (type == "shape") { ignoreShape_ = value; } + else if (type == "size") { + ignoreSize_ = value; + } else if (type == "family") { ignoreFamily_ = value; } @@ -286,9 +289,11 @@ bool findOne(BufferView * bv, docstring const & searchstr, bool find_del, bool check_wrap, bool const auto_wrap, bool instant, bool onlysel) { + bool const had_selection = bv->cursor().selection(); + // Clean up previous selections with empty searchstr on instant if (searchstr.empty() && instant) { - if (bv->cursor().selection()) { + if (had_selection) { bv->setCursor(bv->cursor().selectionBegin()); bv->clearSelection(); } @@ -298,9 +303,10 @@ bool findOne(BufferView * bv, docstring const & searchstr, if (!searchAllowed(searchstr)) return false; - DocIterator const endcur = forward ? bv->cursor().selectionEnd() : bv->cursor().selectionBegin(); + DocIterator const startcur = bv->cursor().selectionBegin(); + DocIterator const endcur = bv->cursor().selectionEnd(); - if (onlysel && bv->cursor().selection()) { + if (onlysel && had_selection) { docstring const matchstring = bv->cursor().selectionAsString(false); docstring const lcmatchsting = support::lowercase(matchstring); if (matchstring == searchstr || (!case_sens && lcmatchsting == lowercase(searchstr))) { @@ -326,11 +332,11 @@ bool findOne(BufferView * bv, docstring const & searchstr, int match_len = forward ? findForward(cur, endcur, match, find_del, onlysel) - : findBackwards(cur, endcur, match, find_del, onlysel); + : findBackwards(cur, startcur, match, find_del, onlysel); if (match_len > 0) bv->putSelectionAt(cur, match_len, !forward); - else if (onlysel) { + else if (onlysel && had_selection) { docstring q = _("The search string was not found within the selection.\n" "Continue search outside?"); int search_answer = frontend::Alert::prompt(_("Search outside selection?"), @@ -344,7 +350,6 @@ bool findOne(BufferView * bv, docstring const & searchstr, return false; } else if (check_wrap) { - DocIterator cur_orig(bv->cursor()); bool wrap = auto_wrap; if (!auto_wrap) { docstring q; @@ -377,7 +382,13 @@ bool findOne(BufferView * bv, docstring const & searchstr, find_del, false, false, false, false)) return true; } - bv->cursor().setCursor(cur_orig); + bv->setCursor(startcur); + + // restore original selection + if (had_selection) { + bv->cursor().resetAnchor(); + bv->setSelection(startcur, endcur); + } return false; } @@ -453,7 +464,7 @@ int replaceAll(BufferView * bv, if (had_selection) { endcur.fixIfBroken(); bv->cursor().resetAnchor(); - bv->setCursorSelectionTo(endcur); + bv->setSelection(startcur, endcur); } return num; @@ -805,10 +816,82 @@ namespace { typedef vector > Escapes; +static string getRegexSpaceCount(int count) +{ + if (count > 0) { + if (count > 1) + return "\\s{" + std::to_string(count) + "}"; + else + return "\\s"; + } + return ""; +} + string string2regex(string in) { - static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\$\\])" }; - string temp = std::regex_replace(in, specialChars, R"(\$&)" ); + static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\$\\])" }; + string tempx = std::regex_replace(in, specialChars, R"(\$&)" ); + // Special handling for ' ' + string temp(""); + int blanks = 0; + for (unsigned i = 0; i < tempx.size(); i++) { + if (tempx[i] == ' ' || tempx[i] == '~' ) { + // normal blanks + blanks++; + } + else if (tempx[i] == '\302' && tempx[i+1] == '\240') { + // Normal Space + blanks++; + i++; + } + else if (tempx[i] == '\342') { + if (tempx[i+1] == '\200') { + if ((tempx[i+2] == '\257') + || (tempx[i+2] == '\203') + || (tempx[i+2] == '\202')) { + // Non-breaking Thin (1/6 em) + // Quad(1 em), (Double quad counts as 2 blanks) + // Half Quad + blanks++; + i += 2; + } + else if (tempx[i+2] == '\213') { + // Ignoring parts of Medium and Thick + i += 2; + continue; + } + else if ((tempx[i+2] == '\204') || (tempx[i+2] == '\205')) { + // Thick + // Medium + blanks++; + i += 2; + } + } + else if (tempx[i+1] == '\201') { + if (tempx[i+2] == '\240') { + // Ignoring parts of half quad + i += 2; + continue; + } + } + else if ((tempx[i+1] == '\220') && (tempx[i+2] == '\243')) { + // Visible space + blanks++; + i += 2; + } + } + else { + if (blanks > 0) { + temp += getRegexSpaceCount(blanks); + } + temp += tempx[i]; + blanks = 0; + } + } + if (blanks > 0) { + temp += getRegexSpaceCount(blanks); + } + string temp2(""); size_t lastpos = 0; size_t fl_pos = 0; @@ -838,7 +921,8 @@ string correctRegex(string t, bool withformat) * and \{, \}, \[, \] => {, }, [, ] */ string s(""); - regex wordre("(\\\\)*(\\\\(([A-Za-z]+|[\\{\\}])( |\\{\\})?|[\\[\\]\\{\\}]))"); + static std::regex wordre("(\\\\)*(\\\\(( |[A-Za-z]+|[\\{\\}%])( |\\{\\})?|[\\[\\]\\{\\}]))"); + static std::regex protectedSpace { R"(~)" }; size_t lastpos = 0; smatch sub; bool backslashed = false; @@ -846,6 +930,7 @@ string correctRegex(string t, bool withformat) buildAccentsMap(); //LYXERR0("correctRegex input '" << t << "'"); + int skip = 0; for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) { sub = *it; string replace; @@ -854,18 +939,25 @@ string correctRegex(string t, bool withformat) } else { if (sub.str(4) == "backslash") { - replace = "\\"; + replace = string("\\"); { // transforms '\backslash \{' into '\{' string next = t.substr(sub.position(2) + sub.str(2).length(), 2); - if ((next == "\\{") || (next == "\\}")) { + if ((next == "\\{") || (next == "\\}") || (next == "\\ ")) { replace = ""; backslashed = true; } + else if (withformat && next[0] == '$') { + replace = accents["lyxdollar"]; + skip = 1; // Skip following '$' + } } } else if (sub.str(4) == "mathcircumflex") replace = "^"; + else if ((sub.str(4) == "negthinspace") || (sub.str(4) == "negmedspace") || (sub.str(4) == "negthickspace")) { + replace = accents[sub.str(4)+"{}"]; + } else if (backslashed) { backslashed = false; if (withformat) { @@ -873,9 +965,11 @@ string correctRegex(string t, bool withformat) replace = accents["braceleft"]; else if (sub.str(3) == "}") replace = accents["braceright"]; + else if (sub.str(3) == " ") + replace = "\\ "; else { // else part should not exist - LASSERT(1, /**/); + LASSERT(0, /**/); } } else { @@ -883,9 +977,11 @@ string correctRegex(string t, bool withformat) replace = "\\{"; else if (sub.str(3) == "}") replace = "\\}"; + else if (sub.str(3) == " ") + replace = "\\ "; else { // else part should not exist - LASSERT(1, /**/); + LASSERT(0, /**/); } } } @@ -893,6 +989,10 @@ string correctRegex(string t, bool withformat) replace = "{"; else if (sub.str(4) == "}") replace = "}"; + else if (sub.str(4) == "%") + replace = "%"; + else if (sub.str(4) == " ") + replace = " "; else { AccentsIterator it_ac = accents.find(sub.str(4)); if (it_ac == accents.end()) { @@ -904,14 +1004,22 @@ string correctRegex(string t, bool withformat) } } if (lastpos < (size_t) sub.position(2)) - s += t.substr(lastpos, sub.position(2) - lastpos); + s += std::regex_replace(t.substr(lastpos, sub.position(2) - lastpos), protectedSpace, R"( )"); s += replace; - lastpos = sub.position(2) + sub.length(2); + lastpos = sub.position(2) + sub.length(2) + skip; + skip = 0; } if (lastpos == 0) - return t; + s = std::regex_replace(t, protectedSpace, R"( )"); else if (lastpos < t.length()) - s += t.substr(lastpos, t.length() - lastpos); + s += std::regex_replace(t.substr(lastpos, t.length() - lastpos), protectedSpace, R"( )"); + // Handle quotes in regex + // substitute all '„', '“', '»', '«' with '"' + // and all '‚', '‘', '›', '‹' with "\'" + static std::regex plainquotes { R"(„|“|»|«)" }; + static std::regex innerquotes { R"(‚|‘|›|‹)" }; + t = std::regex_replace(s, plainquotes, R"(")"); + s = std::regex_replace(t, innerquotes, R"(')"); //LYXERR0("correctRegex output '" << s << "'"); return s; } @@ -999,13 +1107,19 @@ public: ** constructor as opt.search, under the opt.* options settings. ** ** @param at_begin - ** If set, then match is searched only against beginning of text starting at cur. - ** If unset, then match is searched anywhere in text starting at cur. + ** If set to MatchStringAdv::MatchFromStart, + ** then match is searched only against beginning of text starting at cur. + ** Otherwise the match is searched anywhere in text starting at cur. ** ** @return ** The length of the matching text, or zero if no match was found. **/ - MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const; + enum matchType { + MatchAnyPlace, + MatchFromStart + }; + string matchTypeAsString(matchType const x) const { return (x == MatchFromStart ? "MatchFromStart" : "MatchAnyPlace"); } + MatchResult operator()(DocIterator const & cur, int len, matchType at_begin) const; #if QTSEARCH bool regexIsValid; string regexError; @@ -1021,7 +1135,7 @@ public: private: /// Auxiliary find method (does not account for opt.matchword) - MatchResult findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const; + MatchResult findAux(DocIterator const & cur, int len, matchType at_begin) const; void CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string = ""); /** Normalize a stringified or latexified LyX paragraph. @@ -1037,7 +1151,7 @@ private: ** @todo Normalization should also expand macros, if the corresponding ** search option was checked. **/ - string normalize(docstring const & s, bool ignore_fomat) const; + string convertLF2Space(docstring const & s, bool ignore_fomat) const; // normalized string to search string par_as_string; // regular expression to use for searching @@ -1054,10 +1168,10 @@ private: // par_as_string after removal of lead_as_string string par_as_string_nolead; // unmatched open braces in the search string/regexp - int open_braces; + int open_braces = 0; // number of (.*?) subexpressions added at end of search regexp for closing // environments, math mode, styles, etc... - int close_wildcards; + int close_wildcards = 0; public: // Are we searching with regular expressions ? bool use_regexp = false; @@ -1079,6 +1193,15 @@ void MatchStringAdv::FillResults(MatchResult &found_mr) valid_matches = 0; } +static void setFindParams(OutputParams &runparams) +{ + runparams.flavor = Flavor::XeTeX; + //runparams.use_polyglossia = true; + runparams.linelen = 10000; //lyxrc.plaintext_linelen; + // No side effect of file copying and image conversion + runparams.dryrun = true; +} + static docstring buffer_to_latex(Buffer & buffer) { //OutputParams runparams(&buffer.params().encoding()); @@ -1086,26 +1209,23 @@ static docstring buffer_to_latex(Buffer & buffer) odocstringstream ods; otexstream os(ods); runparams.nice = true; - runparams.flavor = Flavor::XeTeX; - runparams.linelen = 10000; //lyxrc.plaintext_linelen; - // No side effect of file copying and image conversion - runparams.dryrun = true; + setFindParams(runparams); if (ignoreFormats.getDeleted()) - runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + runparams.find_set_feature(OutputParams::SearchWithoutDeleted); else - runparams.for_searchAdv = OutputParams::SearchWithDeleted; + runparams.find_set_feature(OutputParams::SearchWithDeleted); if (ignoreFormats.getNonContent()) { - runparams.for_searchAdv |= OutputParams::SearchNonOutput; + runparams.find_add_feature(OutputParams::SearchNonOutput); } pit_type const endpit = buffer.paragraphs().size(); for (pit_type pit = 0; pit != endpit; ++pit) { - TeXOnePar(buffer, buffer.text(), pit, os, runparams); - LYXERR(Debug::FIND, "searchString up to here: " << ods.str()); + TeXOnePar(buffer, buffer.text(), pit, os, runparams, string(), -1, -1, true); + LYXERR(Debug::FINDVERBOSE, "searchString up to here: " << ods.str()); } return ods.str(); } -static string latexNamesToUtf8(docstring strIn) +static string latexNamesToUtf8(docstring strIn, bool withformat) { string addtmp = to_utf8(strIn); static regex const rmAcc("(\\\\)*(" @@ -1147,7 +1267,11 @@ static string latexNamesToUtf8(docstring strIn) add = addtmp; else if (addtmp.length() > lastpos) add += addtmp.substr(lastpos, addtmp.length() - lastpos); - LYXERR(Debug::FIND, "Adding to search string: '" + if (!withformat) { + static std::regex repltilde { R"(~)" }; + add = std::regex_replace(add, repltilde, accents["lyxtilde"]); + } + LYXERR(Debug::FINDVERBOSE, "Adding to search string: '" << add << "'"); return add; } @@ -1161,27 +1285,25 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co // OutputParams runparams(&buffer.params().encoding()); OutputParams runparams(encodings.fromLyXName("utf8")); runparams.nice = true; - runparams.flavor = Flavor::XeTeX; - runparams.linelen = 10000; //lyxrc.plaintext_linelen; - runparams.dryrun = true; + setFindParams(runparams); int option = AS_STR_INSETS |AS_STR_PLAINTEXT; if (ignoreFormats.getDeleted()) { option |= AS_STR_SKIPDELETE; - runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + runparams.find_set_feature(OutputParams::SearchWithoutDeleted); } else { - runparams.for_searchAdv = OutputParams::SearchWithDeleted; + runparams.find_set_feature(OutputParams::SearchWithDeleted); } if (ignoreFormats.getNonContent()) { - runparams.for_searchAdv |= OutputParams::SearchNonOutput; + runparams.find_add_feature(OutputParams::SearchNonOutput); } string t(""); for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) { Paragraph const & par = buffer.paragraphs().at(pit); string add = latexNamesToUtf8(par.asString(pos_type(0), par.size(), option, - &runparams)); - LYXERR(Debug::FIND, "Adding to search string: '" + &runparams), !opt.ignoreformat); + LYXERR(Debug::FINDVERBOSE, "Adding to search string: '" << add << "'"); t += add; } @@ -1212,8 +1334,8 @@ static size_t identifyLeading(string const & s) || regex_replace(t, t, "^ ?\\\\item\\{[a-z]+\\}", "") || regex_replace(t, t, "^\\\\begin\\{[a-zA-Z_]*\\*?\\}", "")) ; - LYXERR(Debug::FIND, " after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'"); - return s.find(t); + LYXERR(Debug::FINDVERBOSE, " after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'"); + return s.size() - t.size(); } /* @@ -1353,8 +1475,8 @@ public: string par; int ignoreidx; static vector borders; - int depts[MAXOPENED]; - int closes[MAXOPENED]; + static vector depts; + static vector closes; int actualdeptindex; int previousNotIgnored(int) const; int nextNotIgnored(int) const; @@ -1366,6 +1488,7 @@ public: void removeAccents(); void setForDefaultLang(KeyInfo const & defLang) const; int findclosing(int start, int end, char up, char down, int repeat); + void removeInvalidClosings(void); void handleParentheses(int lastpos, bool closingAllowed); bool hasTitle; // Number of disabled language specs up @@ -1378,6 +1501,8 @@ public: }; vector Intervall::borders = vector(30); +vector Intervall::depts = vector(30); +vector Intervall::closes = vector(30); int Intervall::isOpeningPar(int pos) const { @@ -1411,6 +1536,8 @@ void Intervall::setForDefaultLang(KeyInfo const & defLang) const } } +#if 0 +// Not needed, because dpts and closes are now dynamically expanded static void checkDepthIndex(int val) { static int maxdepthidx = MAXOPENED-2; @@ -1424,6 +1551,7 @@ static void checkDepthIndex(int val) LYXERR(Debug::INFO, "maxdepthidx now " << val); } } +#endif #if 0 // Not needed, because borders are now dynamically expanded @@ -1875,16 +2003,15 @@ static void buildAccentsMap() accents["cdot"] = "·"; accents["textasciicircum"] = "^"; accents["mathcircumflex"] = "^"; - accents["sim"] = "~"; accents["guillemotright"] = "»"; accents["guillemotleft"] = "«"; accents["hairspace"] = getutf8(0xf0000); // select from free unicode plane 15 accents["thinspace"] = getutf8(0xf0002); // and used _only_ by findadv - accents["negthinspace"] = getutf8(0xf0003); // to omit backslashed latex macros + accents["negthinspace{}"]= getutf8(0xf0003); // to omit backslashed latex macros accents["medspace"] = getutf8(0xf0004); // See https://en.wikipedia.org/wiki/Private_Use_Areas - accents["negmedspace"] = getutf8(0xf0005); + accents["negmedspace{}"] = getutf8(0xf0005); accents["thickspace"] = getutf8(0xf0006); - accents["negthickspace"] = getutf8(0xf0007); + accents["negthickspace{}"]= getutf8(0xf0007); accents["lyx"] = getutf8(0xf0010); // Used logos accents["LyX"] = getutf8(0xf0010); accents["tex"] = getutf8(0xf0011); @@ -1896,6 +2023,9 @@ static void buildAccentsMap() accents["lyxarrow"] = getutf8(0xf0020); accents["braceleft"] = getutf8(0xf0030); accents["braceright"] = getutf8(0xf0031); + accents["lyxtilde"] = getutf8(0xf0032); + accents["sim"] = getutf8(0xf0032); + accents["lyxdollar"] = getutf8(0xf0033); accents["backslash lyx"] = getutf8(0xf0010); // Used logos inserted with starting \backslash accents["backslash LyX"] = getutf8(0xf0010); accents["backslash tex"] = getutf8(0xf0011); @@ -1971,7 +2101,8 @@ void Intervall::removeAccents() if (accents.empty()) buildAccentsMap(); static regex const accre("\\\\(" - "([\\S]|[A-Za-z]+)\\{[^\\{\\}]+\\}" + "([\\S]|[A-Za-z]+)\\{[^\\\\\\{\\}]*\\}" + "|([\\S]|[A-Za-z]+)\\{\\\\[ij](math)?\\}" "|(" "(backslash ([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))" "|[A-Za-z]+" @@ -2007,16 +2138,20 @@ void Intervall::removeAccents() void Intervall::handleOpenP(int i) { actualdeptindex++; + if ((size_t) actualdeptindex >= depts.size()) { + depts.resize(actualdeptindex + 30); + closes.resize(actualdeptindex + 30); + } depts[actualdeptindex] = i+1; closes[actualdeptindex] = -1; - checkDepthIndex(actualdeptindex); + // checkDepthIndex(actualdeptindex); } void Intervall::handleCloseP(int i, bool closingAllowed) { if (actualdeptindex <= 0) { if (! closingAllowed) - LYXERR(Debug::FIND, "Bad closing parenthesis in latex"); /* should not happen, but the latex input may be wrong */ + LYXERR(Debug::FINDVERBOSE, "Bad closing parenthesis in latex"); /* should not happen, but the latex input may be wrong */ // if we are at the very end addIntervall(i, i+1); } @@ -2171,6 +2306,27 @@ int Intervall::findclosing(int start, int end, char up = '{', char down = '}', i return end; } +void Intervall::removeInvalidClosings(void) +{ + // this can happen, if there are deleted parts + int skip = 0; + int depth = 0; + for (unsigned i = 0; i < par.size(); i += 1 + skip) { + char c = par[i]; + skip = 0; + if (c == '\\') skip = 1; + else if (c == '{') + depth++; + else if (c == '}') { + if (depth == 0) { + addIntervall(i, i+1); + LYXERR(Debug::FINDVERBOSE, "removed invalid closing '}' at " << i); + } + else + --depth; + } + } +} class MathInfo { class MathEntry { public: @@ -2258,6 +2414,7 @@ void LatexInfo::buildEntries(bool isPatternString) static bool removeMathHull = false; interval_.removeAccents(); + interval_.removeInvalidClosings(); for (sregex_iterator itmath(interval_.par.begin(), interval_.par.end(), rmath), end; itmath != end; ++itmath) { submath = *itmath; @@ -2307,7 +2464,7 @@ void LatexInfo::buildEntries(bool isPatternString) } // Ignore language if there is math somewhere in pattern-string if (isPatternString) { - for (auto s: usedText) { + for (auto const & s: usedText) { // Remove entries created in previous search runs keys.erase(s); } @@ -2663,7 +2820,8 @@ void LatexInfo::buildKeys(bool isPatternString) if (keysBuilt && !isPatternString) return; // Keys to ignore in any case - makeKey("text|textcyrillic|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true); + makeKey("text|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true); + makeKey("nonumber|notag", KeyInfo(KeyInfo::headRemove, 0, true), true); // Known standard keys with 1 parameter. // Split is done, if not at start of region makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString); @@ -2716,9 +2874,14 @@ void LatexInfo::buildKeys(bool isPatternString) // handle like standard keys with 1 parameter. makeKey("url|href|vref|thanks", KeyInfo(KeyInfo::isStandard, 1, false), isPatternString); - // Ignore deleted text - makeKey("lyxdeleted", KeyInfo(KeyInfo::doRemove, 3, false), isPatternString); - // but preserve added text + if (ignoreFormats.getDeleted()) { + // Ignore deleted text + makeKey("lyxdeleted", KeyInfo(KeyInfo::doRemove, 3, false), isPatternString); + } + else { + // but preserve added text + makeKey("lyxdeleted", KeyInfo(KeyInfo::doRemove, 2, false), isPatternString); + } makeKey("lyxadded", KeyInfo(KeyInfo::doRemove, 2, false), isPatternString); // Macros to remove, but let the parameter survive @@ -2741,9 +2904,10 @@ void LatexInfo::buildKeys(bool isPatternString) makeKey("triangleuppar|triangledownpar|droppar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); makeKey("triangleleftpar|shapepar|dropuppar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); makeKey("hphantom|vphantom|note|footnote|shortcut|include|includegraphics", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); + makeKey("textgreek|textcyrillic", KeyInfo(KeyInfo::isStandard, 1, true), false); makeKey("parbox", KeyInfo(KeyInfo::doRemove, 1, true), isPatternString); // like ('tiny{}' or '\tiny ' ... ) - makeKey("footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge", KeyInfo(KeyInfo::isSize, 0, false), isPatternString); + makeKey("footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge", KeyInfo(KeyInfo::isSize, 0, ignoreFormats.getSize()), isPatternString); // Survives, like known character // makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); @@ -2800,6 +2964,8 @@ string Intervall::show(int lastpos) int idx = 0; /* int intervalls */ string s; int i = 0; + if ((unsigned) lastpos > par.size()) + lastpos = par.size(); for (idx = 0; idx <= ignoreidx; idx++) { while (i < lastpos) { int printsize; @@ -3346,15 +3512,17 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with while ((parlen > 0) && (par[parlen-1] == '\n')) { parlen--; } +#if 0 if (isPatternString && (parlen > 0) && (par[parlen-1] == '~')) { // Happens to be there in case of description or labeling environment parlen--; } +#endif string result; if (withformat) { // Split the latex input into pieces which // can be digested by our search engine - LYXERR(Debug::FIND, "input: \"" << par << "\""); + LYXERR(Debug::FINDVERBOSE, "input: \"" << par << "\""); if (isPatternString && (pbuf != nullptr)) { // Check if we should disable/enable test for language // We check for polyglossia, because in runparams.flavor we use Flavor::XeTeX string doclang = pbuf->params().language->polyglossia(); @@ -3372,7 +3540,7 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with } result = splitOnKnownMacros(par.substr(0,parlen), isPatternString); - LYXERR(Debug::FIND, "After splitOnKnownMacros:\n\"" << result << "\""); + LYXERR(Debug::FINDVERBOSE, "After splitOnKnownMacros:\n\"" << result << "\""); } else result = par.substr(0, parlen); @@ -3387,7 +3555,7 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with features += " " + a; // LYXERR(Debug::INFO, "Identified regex format:" << a); } - LYXERR(Debug::FIND, "Identified Features" << features); + LYXERR(Debug::FINDVERBOSE, "Identified Features" << features); } } else if (regex_with_format) { @@ -3397,7 +3565,7 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with bool b = it->second; if (b && ! info[a]) { missed++; - LYXERR(Debug::FIND, "Missed(" << missed << " " << a <<", srclen = " << parlen ); + LYXERR(Debug::FINDVERBOSE, "Missed(" << missed << " " << a <<", srclen = " << parlen ); return ""; } } @@ -3411,24 +3579,23 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with // Remove trailing closure of math, macros and environments, so to catch parts of them. -static int identifyClosing(string & t) +static void identifyClosing(string & t, bool ignoreformat) { - int open_braces = 0; do { - LYXERR(Debug::FIND, "identifyClosing(): t now is '" << t << "'"); + LYXERR(Debug::FINDVERBOSE, "identifyClosing(): t now is '" << t << "'"); if (regex_replace(t, t, "(.*[^\\\\])\\$$", "$1")) continue; if (regex_replace(t, t, "(.*[^\\\\])\\\\\\]$", "$1")) continue; - if (regex_replace(t, t, "(.*[^\\\\])\\\\end\\{[a-zA-Z_]*\\*?\\}$", "$1")) - continue; - if (regex_replace(t, t, "(.*[^\\\\])\\}$", "$1")) { - ++open_braces; + if (regex_replace(t, t, "(.*[^\\\\])\\\\end\\{[a-zA-Z_]+\\*?\\}$", "$1")) continue; + if (! ignoreformat) { + if (regex_replace(t, t, "(.*[^\\\\])\\}$", "$1")) + continue; } break; } while (true); - return open_braces; + return; } static int num_replaced = 0; @@ -3532,7 +3699,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) CreateRegexp(opt, "", "", ""); return; } - use_regexp = lyx::to_utf8(ds).find("\\regexp{") != std::string::npos; + use_regexp = ds.find(from_utf8("\\regexp{")) != std::string::npos; if (opt.replace_all && previous_single_replace) { previous_single_replace = false; num_replaced = 0; @@ -3542,9 +3709,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) previous_single_replace = true; } // When using regexp, braces are hacked already by escape_for_regex() - par_as_string = normalize(ds, opt.ignoreformat); - open_braces = 0; - close_wildcards = 0; + par_as_string = convertLF2Space(ds, opt.ignoreformat); size_t lead_size = 0; // correct the language settings @@ -3555,7 +3720,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) } opt.matchAtStart = false; if (!use_regexp) { - identifyClosing(par_as_string); // Removes math closings ($, ], ...) at end of string + identifyClosing(par_as_string, opt.ignoreformat); // Removes math closings ($, ], ...) at end of string if (opt.ignoreformat) { lead_size = 0; } @@ -3576,14 +3741,14 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) string regexp2_str = "(" + lead_as_regex_string + ")(.*?)" + par_as_regex_string_nolead; CreateRegexp(opt, regexp_str, regexp2_str); use_regexp = true; - LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); - LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); return; } if (!opt.ignoreformat) { lead_size = identifyLeading(par_as_string); - LYXERR(Debug::FIND, "Lead_size: " << lead_size); + LYXERR(Debug::FINDVERBOSE, "Lead_size: " << lead_size); lead_as_string = par_as_string.substr(0, lead_size); par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size); } @@ -3597,13 +3762,13 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) (void)regex_replace(par_as_string_nolead, par_as_string_nolead, "\\$$", ""); (void)regex_replace(par_as_string_nolead, par_as_string_nolead, "}$", ""); par_as_string = par_as_string_nolead; - LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'"); - LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); + LYXERR(Debug::FINDVERBOSE, "lead_as_regexp is '" << lead_as_regexp << "'"); + LYXERR(Debug::FINDVERBOSE, "par_as_string now is '" << par_as_string << "'"); } - // LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'"); + // LYXERR(Debug::FINDVERBOSE, "par_as_string before escape_for_regex() is '" << par_as_string << "'"); par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat); // Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them. - // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); + // LYXERR(Debug::FINDVERBOSE, "par_as_string now is '" << par_as_string << "'"); ++close_wildcards; size_t lng = par_as_string.size(); if (!opt.ignoreformat) { @@ -3621,17 +3786,17 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) break; } if (lng < par_as_string.size()) - par_as_string = par_as_string.substr(0,lng); + par_as_string.resize(lng); } - LYXERR(Debug::FIND, "par_as_string after correctRegex is '" << par_as_string << "'"); + LYXERR(Debug::FINDVERBOSE, "par_as_string after correctRegex is '" << par_as_string << "'"); if ((lng > 0) && (par_as_string[0] == '^')) { par_as_string = par_as_string.substr(1); --lng; opt.matchAtStart = true; } - // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); - // LYXERR(Debug::FIND, "Open braces: " << open_braces); - // LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string); + // LYXERR(Debug::FINDVERBOSE, "par_as_string now is '" << par_as_string << "'"); + // LYXERR(Debug::FINDVERBOSE, "Open braces: " << open_braces); + // LYXERR(Debug::FINDVERBOSE, "Replaced text (to be used as regex): " << par_as_string); // If entered regexp must match at begin of searched string buffer // Kornel: Added parentheses to use $1 for size of the leading string @@ -3648,29 +3813,26 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) } if (opt.matchword) { modifyRegexForMatchWord(par_as_string); - opt.matchword = false; + // opt.matchword = false; } regexp_str = "(" + lead_as_regexp + ")()" + par_as_string; regexp2_str = "(" + lead_as_regexp + ")(.*?)" + par_as_string; } - LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); - LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); CreateRegexp(opt, regexp_str, regexp2_str, par_as_string); } } -MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const +MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, MatchStringAdv::matchType at_begin) const { MatchResult mres; mres.searched_size = len; - if (at_begin && - (opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) ) - return mres; docstring docstr = stringifyFromForSearch(opt, cur, len); string str; - str = normalize(docstr, opt.ignoreformat); + str = convertLF2Space(docstr, opt.ignoreformat); if (!opt.ignoreformat) { str = correctlanguagesetting(str, false, !opt.ignoreformat); // remove closing '}' and '\n' to allow for use of '$' in regex @@ -3680,22 +3842,25 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be if (lng != str.size()) { str = str.substr(0, lng); } + // Replace occurences of '~' to ' ' + static std::regex specialChars { R"(~)" }; + str = std::regex_replace(str, specialChars, R"( )" ); } if (str.empty()) { mres.match_len = -1; return mres; } - LYXERR(Debug::FIND, "After normalization: Matching against:\n'" << str << "'"); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "After normalization: Matching against:\n'" << str << "'"); LASSERT(use_regexp, /**/); { // use_regexp always true - LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin); + LYXERR(Debug::FINDVERBOSE, "Searching in regexp mode: at_begin=" << matchTypeAsString(at_begin)); #if QTSEARCH QString qstr = QString::fromStdString(str); QRegularExpression const *p_regexp; QRegularExpression::MatchType flags = QRegularExpression::NormalMatch; - if (at_begin) { + if (at_begin == MatchStringAdv::MatchFromStart) { p_regexp = ®exp; } else { p_regexp = ®exp2; @@ -3706,7 +3871,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be #else regex const *p_regexp; regex_constants::match_flag_type flags; - if (at_begin) { + if (at_begin == MatchStringAdv::MatchFromStart) { flags = regex_constants::match_continuous; p_regexp = ®exp; } else { @@ -3745,7 +3910,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be int matchend = match.capturedEnd(0); size_t strsize = qstr.size(); if (!opt.ignoreformat) { - while (mres.match_len > 0) { + while (mres.match_len > 1) { QChar c = qstr.at(matchend - 1); if ((c == '\n') || (c == '}') || (c == '{')) { mres.match_len--; @@ -3774,7 +3939,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be int matchend = strend; size_t strsize = str.size(); if (!opt.ignoreformat) { - while (mres.match_len > 0) { + while (mres.match_len > 1) { char c = str.at(matchend - 1); if ((c == '\n') || (c == '}') || (c == '{')) { mres.match_len--; @@ -3820,23 +3985,20 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be } -MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin) const +MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, MatchStringAdv::matchType at_begin) const { MatchResult mres = findAux(cur, len, at_begin); - int res = mres.match_len; - LYXERR(Debug::FIND, - "res=" << res << ", at_begin=" << at_begin + LYXERR(Debug::FINDVERBOSE, + "res=" << mres.match_len << ", at_begin=" << matchTypeAsString(at_begin) << ", matchAtStart=" << opt.matchAtStart << ", inTexted=" << cur.inTexted()); - if (opt.matchAtStart) { - if (cur.pos() != 0) - mres.match_len = 0; - else if (mres.match_prefix > 0) - mres.match_len = 0; - return mres; + if (mres.match_len > 0) { + if (opt.matchAtStart) { + if (cur.pos() > 0 || mres.match_prefix > 0) + mres.match_len = 0; + } } - else - return mres; + return mres; } #if 0 @@ -3864,71 +4026,76 @@ static bool simple_replace(string &t, string from, string to) } #endif -string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const +string MatchStringAdv::convertLF2Space(docstring const &s, bool ignore_format) const { - string t; - t = lyx::to_utf8(s); - // Remove \n at begin - while (!t.empty() && t[0] == '\n') - t = t.substr(1); - // Remove [%]*\n at end - while (!t.empty() && t[t.size() - 1] == '\n') { - size_t count = 1; - if (!ignore_format) { - while ((t.size() > 1 + count) && (t[t.size() - 1 - count] == '%')) - count++; - } - t = t.substr(0, t.size() - count); - } + // Using original docstring to handle '\n' + + if (s.size() == 0) return ""; + stringstream t; size_t pos; - // Handle all other '\n' - while ((pos = t.find("\n")) != string::npos) { - if (pos > 1 && t[pos-1] == '\\' && t[pos-2] == '\\' ) { - // Handle '\\\n' - if (isAlnumASCII(t[pos+1])) { - t.replace(pos-2, 3, " "); - } - else { - t.replace(pos-2, 3, ""); - } + size_t start = 0; + size_t end = s.size() - 1; + if (!ignore_format) { + while (s[start] == '\n' && start <= end) + start++; + while (end >= start && s[end] == '\n') + end--; + if (start >= end + 1) + return ""; + } + do { + bool dospace = true; + int skip = -1; + pos = s.find('\n', start); + if (pos >= end) { + t << lyx::to_utf8(s.substr(start, end + 1 - start)); + break; } - else { - if (!isAlnumASCII(t[pos+1]) || !isAlnumASCII(t[pos-1])) { - // '\n' adjacent to non-alpha-numerics, discard - t.replace(pos, 1, ""); - } - else { - // Replace all other \n with spaces - t.replace(pos, 1, " "); + if (!ignore_format) { + if ((pos > start + 1) && + s[pos-1] == '\\' && + s[pos-2] == '\\') { + skip = 2; + if ((pos > start + 2) && + (s[pos+1] == '~' || isSpace(s[pos+1]) || + s[pos-3] == '~' || isSpace(s[pos-3]))) { + // discard "\\\\\n", do not replace with space + dospace = false; + } } - if (!ignore_format) { - size_t count = 0; - while ((pos > count + 1) && (t[pos - 1 -count] == '%')) { - count++; + else if (pos > start) { + if (s[pos-1] == '%') { + skip = 1; + while ((pos > start+skip) && (s[pos-1-skip] == '%')) + skip++; + if ((pos > start+skip) && + (s[pos+1] == '~' || isSpace(s[pos+1]) || + s[pos-1-skip] == '~' || isSpace(s[pos-1-skip]))) { + // discard '%%%%%\n' + dospace = false; + } } - if (count > 0) { - t.replace(pos - count, count, ""); + else if (!isAlnumASCII(s[pos+1]) || !isAlnumASCII(s[pos-1])) { + dospace = false; + skip = 0; // remove the '\n' only } } } - } - // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify - // Kornel: Added textsl, textsf, textit, texttt and noun - // + allow to seach for colored text too - LYXERR(Debug::FIND, "Removing stale empty macros from: " << t); - while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", "")) - LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); - while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", "")) - LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); - while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", "")); - - return t; + else { + dospace = true; + skip = 0; + } + t << lyx::to_utf8(s.substr(start, pos-skip-start)); + if (dospace) + t << ' '; + start = pos+1; + } while (start <= end); + return(t.str()); } - docstring stringifyFromCursor(DocIterator const & cur, int len) { - LYXERR(Debug::FIND, "Stringifying with len=" << len << " from cursor at pos: " << cur); + LYXERR(Debug::FINDVERBOSE, "Stringifying with len=" << len << " from cursor at pos: " << cur); if (cur.inTexted()) { Paragraph const & par = cur.paragraph(); // TODO what about searching beyond/across paragraph breaks ? @@ -3938,26 +4105,25 @@ docstring stringifyFromCursor(DocIterator const & cur, int len) // OutputParams runparams(&cur.buffer()->params().encoding()); OutputParams runparams(encodings.fromLyXName("utf8")); runparams.nice = true; - runparams.flavor = Flavor::XeTeX; - runparams.linelen = 10000; //lyxrc.plaintext_linelen; - // No side effect of file copying and image conversion - runparams.dryrun = true; + setFindParams(runparams); int option = AS_STR_INSETS | AS_STR_PLAINTEXT; if (ignoreFormats.getDeleted()) { option |= AS_STR_SKIPDELETE; - runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + runparams.find_set_feature(OutputParams::SearchWithoutDeleted); } else { - runparams.for_searchAdv = OutputParams::SearchWithDeleted; + runparams.find_set_feature(OutputParams::SearchWithDeleted); } if (ignoreFormats.getNonContent()) { - runparams.for_searchAdv |= OutputParams::SearchNonOutput; + runparams.find_add_feature(OutputParams::SearchNonOutput); } - LYXERR(Debug::FIND, "Stringifying with cur: " + LYXERR(Debug::FINDVERBOSE, "Stringifying with cur: " << cur << ", from pos: " << cur.pos() << ", end: " << end); - return from_utf8(latexNamesToUtf8(par.asString(cur.pos(), end, - option, - &runparams))); + docstring res = from_utf8(latexNamesToUtf8(par.asString(cur.pos(), end, + option, + &runparams), false)); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Stringified text from pos(" << cur.pos() << ") len(" << len << "): " << res); + return res; } else if (cur.inMathed()) { CursorSlice cs = cur.top(); MathData md = cs.cell(); @@ -3968,11 +4134,11 @@ docstring stringifyFromCursor(DocIterator const & cur, int len) MathData md2; for (MathData::const_iterator it = md.begin() + cs.pos(); it != it_end; ++it) md2.push_back(*it); - docstring s = from_utf8(latexNamesToUtf8(asString(md2))); - LYXERR(Debug::FIND, "Stringified math: '" << s << "'"); - return s; + docstring res = from_utf8(latexNamesToUtf8(asString(md2), false)); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Stringified math from pos(" << cur.pos() << ") len(" << len << "): " << res); + return res; } - LYXERR(Debug::FIND, "Don't know how to stringify from here: " << cur); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Don't know how to stringify from here: " << cur); return docstring(); } @@ -3983,8 +4149,8 @@ docstring stringifyFromCursor(DocIterator const & cur, int len) docstring latexifyFromCursor(DocIterator const & cur, int len) { /* - LYXERR(Debug::FIND, "Latexifying with len=" << len << " from cursor at pos: " << cur); - LYXERR(Debug::FIND, " with cur.lastpost=" << cur.lastpos() << ", cur.lastrow=" + LYXERR(Debug::FINDVERBOSE, "Latexifying with len=" << len << " from cursor at pos: " << cur); + LYXERR(Debug::FINDVERBOSE, " with cur.lastpost=" << cur.lastpos() << ", cur.lastrow=" << cur.lastrow() << ", cur.lastcol=" << cur.lastcol()); */ Buffer const & buf = *cur.buffer(); @@ -3994,18 +4160,15 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) //OutputParams runparams(&buf.params().encoding()); OutputParams runparams(encodings.fromLyXName("utf8")); runparams.nice = false; - runparams.flavor = Flavor::XeTeX; - runparams.linelen = 8000; //lyxrc.plaintext_linelen; - // No side effect of file copying and image conversion - runparams.dryrun = true; + setFindParams(runparams); if (ignoreFormats.getDeleted()) { - runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + runparams.find_set_feature(OutputParams::SearchWithoutDeleted); } else { - runparams.for_searchAdv = OutputParams::SearchWithDeleted; + runparams.find_set_feature(OutputParams::SearchWithDeleted); } if (ignoreFormats.getNonContent()) { - runparams.for_searchAdv |= OutputParams::SearchNonOutput; + runparams.find_add_feature(OutputParams::SearchNonOutput); } if (cur.inTexted()) { @@ -4014,10 +4177,9 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) if (len != -1 && endpos > cur.pos() + len) endpos = cur.pos() + len; TeXOnePar(buf, *cur.innerText(), cur.pit(), os, runparams, - string(), cur.pos(), endpos); - string s = lyx::to_utf8(ods.str()); - LYXERR(Debug::FIND, "Latexified +modified text: '" << s << "'"); - return(lyx::from_utf8(s)); + string(), cur.pos(), endpos, true); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Latexified text from pos(" << cur.pos() << ") len(" << len << "): " << ods.str()); + return(ods.str()); } else if (cur.inMathed()) { // Retrieve the math environment type, and add '$' or '$[' or others (\begin{equation}) accordingly for (int s = cur.depth() - 1; s >= 0; --s) { @@ -4052,9 +4214,9 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) break; } } - LYXERR(Debug::FIND, "Latexified math: '" << lyx::to_utf8(ods.str()) << "'"); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Latexified math from pos(" << cur.pos() << ") len(" << len << "): " << ods.str()); } else { - LYXERR(Debug::FIND, "Don't know how to stringify from here: " << cur); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Don't know how to stringify from here: " << cur); } return ods.str(); } @@ -4111,13 +4273,13 @@ MatchResult findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Mat // either one sees "http://www.bla.bla" or nothing // so the search for "www" gives prefix_len = 7 (== sizeof("http://") // and although we search for only 3 chars, we find the whole hyperlink inset - bool at_begin = (expected.match_prefix == 0); + MatchStringAdv::matchType at_begin = (expected.match_prefix == 0) ? MatchStringAdv::MatchFromStart : MatchStringAdv::MatchAnyPlace; if (!match.opt.forward && match.opt.ignoreformat) { if (expected.pos > 0) return fail; } - LASSERT(at_begin, /**/); - if (expected.match_len > 0 && at_begin) { + LASSERT(at_begin == MatchStringAdv::MatchFromStart, /**/); + if (expected.match_len > 0 && at_begin == MatchStringAdv::MatchFromStart) { // Search for deepest match old_cur = cur; max_match = expected; @@ -4149,12 +4311,15 @@ MatchResult findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Mat } else { // (expected.match_len <= 0) - mres = match(cur); /* match valid only if not searching whole words */ + mres = match(cur, -1, MatchStringAdv::MatchFromStart); /* match valid only if not searching whole words */ displayMres(mres, "Start with negative match", cur); max_match = mres; } - if (max_match.match_len <= 0) return fail; - LYXERR(Debug::FIND, "Ok"); + // Only now we are really at_begin + if ((max_match.match_len <= 0) || + (match.opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed())) + return fail; + LYXERR(Debug::FINDVERBOSE, "Ok"); // Compute the match length int len = 1; @@ -4245,15 +4410,20 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match) { if (!cur) return 0; - bool repeat = false; + int repeat = 0; DocIterator orig_cur; // to be used if repeat not successful MatchResult orig_mres; + do { + orig_cur = cur; + cur.forwardPos(); + } while (cur.depth() > orig_cur.depth()); + cur = orig_cur; while (!theApp()->longOperationCancelled() && cur) { //(void) findAdvForwardInnermost(cur); - LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur); - MatchResult mres = match(cur, -1, false); + LYXERR(Debug::FINDVERBOSE, "findForwardAdv() cur: " << cur); + MatchResult mres = match(cur, -1, MatchStringAdv::MatchAnyPlace); string msg = "Starting"; - if (repeat) + if (repeat > 0) msg = "Repeated"; displayMres(mres, msg + " findForwardAdv", cur) int match_len = mres.match_len; @@ -4262,9 +4432,14 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match) match_len = 0; } if (match_len <= 0) { - // This should exit nested insets, if any, or otherwise undefine the currsor. - cur.pos() = cur.lastpos(); - LYXERR(Debug::FIND, "Advancing pos: cur=" << cur); + if (repeat > 0) { + repeat--; + } + else { + // This should exit nested insets, if any, or otherwise undefine the currsor. + cur.pos() = cur.lastpos(); + } + LYXERR(Debug::FINDVERBOSE, "Advancing pos: cur=" << cur); cur.forwardPos(); } else { // match_len > 0 @@ -4281,7 +4456,7 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match) if (increment < 1) increment = 1; } - LYXERR(Debug::FIND, "Set increment to " << increment); + LYXERR(Debug::FINDVERBOSE, "Set increment to " << increment); while (increment > 0) { DocIterator old_cur = cur; if (cur.pos() + increment >= cur.lastpos()) { @@ -4289,9 +4464,9 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match) continue; } cur.pos() = cur.pos() + increment; - MatchResult mres2 = match(cur, -1, false); + MatchResult mres2 = match(cur, -1, MatchStringAdv::MatchAnyPlace); displayMres(mres2, "findForwardAdv loop", cur) - switch (interpretMatch(mres, mres2)) { + switch (interpretMatch(mres, mres2)) { case MatchResult::newIsTooFar: // behind the expected match firstInvalid = increment; @@ -4299,7 +4474,7 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match) increment /= 2; break; case MatchResult::newIsBetter: - // not reached yet, but cur.pos()+increment is bettert + // not reached yet, but cur.pos()+increment is better mres = mres2; firstInvalid -= increment; if (increment > firstInvalid*3/4) @@ -4312,7 +4487,7 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match) default: // Todo@ // Handle not like MatchResult::newIsTooFar - LYXERR0( "Probably too far: Increment = " << increment << " match_prefix = " << mres.match_prefix); + LYXERR(Debug::FINDVERBOSE, "Probably too far: Increment = " << increment << " match_prefix = " << mres.match_prefix); firstInvalid--; increment = increment*3/4; cur = old_cur; @@ -4322,23 +4497,22 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match) if (mres.match_len > 0) { if (mres.match_prefix + mres.pos - mres.leadsize > 0) { // The match seems to indicate some deeper level - repeat = true; + repeat = 2; orig_cur = cur; orig_mres = mres; cur.forwardPos(); continue; } } - else if (repeat) { + else if (repeat > 0) { // should never be reached. cur = orig_cur; mres = orig_mres; } // LYXERR0("Leaving first loop"); - LYXERR(Debug::FIND, "Finalizing 1"); + LYXERR(Debug::FINDVERBOSE, "Finalizing 1"); MatchResult found_match = findAdvFinalize(cur, match, mres); if (found_match.match_len > 0) { - LASSERT(found_match.pos_len > 0, /**/); match.FillResults(found_match); return found_match.pos_len; } @@ -4363,17 +4537,18 @@ MatchResult findMostBackwards(DocIterator & cur, MatchStringAdv const & match, M MatchResult mr = findAdvFinalize(tmp_cur, match, expected); Inset & inset = cur.inset(); for (; cur != cur_begin; cur.backwardPos()) { - LYXERR(Debug::FIND, "findMostBackwards(): cur=" << cur); + LYXERR(Debug::FINDVERBOSE, "findMostBackwards(): cur=" << cur); DocIterator new_cur = cur; new_cur.backwardPos(); - if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len) + if (new_cur == cur || &new_cur.inset() != &inset + || match(new_cur, -1, MatchStringAdv::MatchFromStart).match_len <= 0) break; MatchResult new_mr = findAdvFinalize(new_cur, match, expected); if (new_mr.match_len == mr.match_len) break; mr = new_mr; } - LYXERR(Debug::FIND, "findMostBackwards(): exiting with cur=" << cur); + LYXERR(Debug::FINDVERBOSE, "findMostBackwards(): exiting with cur=" << cur); return mr; } @@ -4392,18 +4567,18 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) bool pit_changed = false; do { cur.pos() = 0; - MatchResult found_match = match(cur, -1, false); + MatchResult found_match = match(cur, -1, MatchStringAdv::MatchAnyPlace); if (found_match.match_len > 0) { if (pit_changed) cur.pos() = cur.lastpos(); else cur.pos() = cur_orig.pos(); - LYXERR(Debug::FIND, "findBackAdv2: cur: " << cur); + LYXERR(Debug::FINDVERBOSE, "findBackAdv2: cur: " << cur); DocIterator cur_prev_iter; do { - found_match = match(cur); - LYXERR(Debug::FIND, "findBackAdv3: found_match=" + found_match = match(cur, -1, MatchStringAdv::MatchFromStart); + LYXERR(Debug::FINDVERBOSE, "findBackAdv3: found_match=" << (found_match.match_len > 0) << ", cur: " << cur); if (found_match.match_len > 0) { MatchResult found_mr = findMostBackwards(cur, match, found_match); @@ -4468,7 +4643,7 @@ static bool allNonLowercase(Cursor const & cur, int len) pos_type beg_pos = cur.selectionBegin().pos(); pos_type end_pos = cur.selectionBegin().pos() + len; if (len > cur.lastpos() + 1 - beg_pos) { - LYXERR(Debug::FIND, "This should not happen, more debug needed"); + LYXERR(Debug::FINDVERBOSE, "This should not happen, more debug needed"); len = cur.lastpos() + 1 - beg_pos; end_pos = beg_pos + len; } @@ -4485,13 +4660,13 @@ static bool firstUppercase(Cursor const & cur) char_type ch1, ch2; pos_type pos = cur.selectionBegin().pos(); if (pos >= cur.lastpos() - 1) { - LYXERR(Debug::FIND, "No upper-case at cur: " << cur); + LYXERR(Debug::FINDVERBOSE, "No upper-case at cur: " << cur); return false; } ch1 = cur.paragraph().getChar(pos); ch2 = cur.paragraph().getChar(pos + 1); bool result = isUpperCase(ch1) && isLowerCase(ch2); - LYXERR(Debug::FIND, "firstUppercase(): " + LYXERR(Debug::FINDVERBOSE, "firstUppercase(): " << "ch1=" << ch1 << "(" << char(ch1) << "), ch2=" << ch2 << "(" << char(ch2) << ")" << ", result=" << result << ", cur=" << cur); @@ -4558,13 +4733,13 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma || sel_beg.idx() != sel_end.idx()) return 0; int sel_len = sel_end.pos() - sel_beg.pos(); - LYXERR(Debug::FIND, "sel_beg: " << sel_beg << ", sel_end: " << sel_end + LYXERR(Debug::FINDVERBOSE, "sel_beg: " << sel_beg << ", sel_end: " << sel_end << ", sel_len: " << sel_len << endl); if (sel_len == 0) return 0; LASSERT(sel_len > 0, return 0); - if (!matchAdv(sel_beg, sel_len).match_len) + if (matchAdv(sel_beg, sel_len, MatchStringAdv::MatchFromStart).match_len <= 0) return 0; // Build a copy of the replace buffer, adapted to the KeepCase option @@ -4579,7 +4754,7 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma repl_buffer.setUnnamed(true); LASSERT(repl_buffer.readString(lyx), return 0); if (opt.keep_case && sel_len >= 2) { - LYXERR(Debug::FIND, "keep_case true: cur.pos()=" << cur.pos() << ", sel_len=" << sel_len); + LYXERR(Debug::FINDVERBOSE, "keep_case true: cur.pos()=" << cur.pos() << ", sel_len=" << sel_len); if (cur.inTexted()) { if (firstUppercase(cur)) changeFirstCase(repl_buffer, text_uppercase, text_lowercase); @@ -4592,13 +4767,13 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma repl_buffer.changeLanguage( repl_buffer.language(), cur.getFont().language()); - LYXERR(Debug::FIND, "Replacing by pasteParagraphList()ing repl_buffer"); - LYXERR(Debug::FIND, "Before pasteParagraphList() cur=" << cur << endl); + LYXERR(Debug::FINDVERBOSE, "Replacing by pasteParagraphList()ing repl_buffer"); + LYXERR(Debug::FINDVERBOSE, "Before pasteParagraphList() cur=" << cur << endl); cap::pasteParagraphList(cur, repl_buffer.paragraphs(), repl_buffer.params().documentClassPtr(), repl_buffer.params().authors(), bv->buffer().errorList("Paste")); - LYXERR(Debug::FIND, "After pasteParagraphList() cur=" << cur << endl); + LYXERR(Debug::FINDVERBOSE, "After pasteParagraphList() cur=" << cur << endl); sel_len = repl_buffer.paragraphs().begin()->size(); } else if (cur.inMathed()) { odocstringstream ods; @@ -4606,34 +4781,36 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma // OutputParams runparams(&repl_buffer.params().encoding()); OutputParams runparams(encodings.fromLyXName("utf8")); runparams.nice = false; - runparams.flavor = Flavor::XeTeX; - runparams.linelen = 8000; //lyxrc.plaintext_linelen; - runparams.dryrun = true; - TeXOnePar(repl_buffer, repl_buffer.text(), 0, os, runparams); + setFindParams(runparams); + TeXOnePar(repl_buffer, repl_buffer.text(), 0, os, runparams, string(), -1, -1, true); //repl_buffer.getSourceCode(ods, 0, repl_buffer.paragraphs().size(), false); docstring repl_latex = ods.str(); - LYXERR(Debug::FIND, "Latexified replace_buffer: '" << repl_latex << "'"); + LYXERR(Debug::FINDVERBOSE, "Latexified replace_buffer: '" << repl_latex << "'"); string s; (void)regex_replace(to_utf8(repl_latex), s, "\\$(.*)\\$", "$1"); (void)regex_replace(s, s, "\\\\\\[(.*)\\\\\\]", "$1"); repl_latex = from_utf8(s); - LYXERR(Debug::FIND, "Replacing by insert()ing latex: '" << repl_latex << "' cur=" << cur << " with depth=" << cur.depth()); + LYXERR(Debug::FINDVERBOSE, "Replacing by insert()ing latex: '" << repl_latex << "' cur=" << cur << " with depth=" << cur.depth()); MathData ar(cur.buffer()); asArray(repl_latex, ar, Parse::NORMAL); cur.insert(ar); sel_len = ar.size(); - LYXERR(Debug::FIND, "After insert() cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len); + LYXERR(Debug::FINDVERBOSE, "After insert() cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len); } if (cur.pos() >= sel_len) cur.pos() -= sel_len; else cur.pos() = 0; - LYXERR(Debug::FIND, "After pos adj cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len); + LYXERR(Debug::FINDVERBOSE, "After pos adj cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len); bv->putSelectionAt(DocIterator(cur), sel_len, !opt.forward); bv->processUpdateFlags(Update::Force); return 1; } +static bool isWordChar(char_type c) +{ + return isLetterChar(c) || isNumberChar(c); +} /// Perform a FindAdv operation. bool findAdv(BufferView * bv, FindAndReplaceOptions & opt) @@ -4659,8 +4836,52 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions & opt) bv->putSelectionAt(bv->cursor().selectionBegin(), length, !opt.forward); num_replaced += findAdvReplace(bv, opt, matchAdv); cur = bv->cursor(); - if (opt.forward) + if (opt.forward) { + if (opt.matchword && cur.pos() > 0) { // Skip word-characters if we are in the mid of a word + if (cur.inTexted()) { + Paragraph const & par = cur.paragraph(); + int len_limit, new_pos; + if (cur.lastpos() < par.size()) + len_limit = cur.lastpos(); + else + len_limit = par.size(); + for (new_pos = cur.pos() - 1; new_pos < len_limit; new_pos++) { + if (!isWordChar(par.getChar(new_pos))) + break; + } + if (new_pos > cur.pos()) + cur.pos() = new_pos; + } + else if (cur.inMathed()) { + // Check if 'cur.pos()-1' and 'cur.pos()' both point to a letter, + // I am not sure, we should consider the selection + bool sel = bv->cursor().selection(); + if (!sel && cur.pos() < cur.lastpos()) { + CursorSlice const & cs = cur.top(); + MathData md = cs.cell(); + int len = -1; + MathData::const_iterator it_end = md.end(); + MathData md2; + // Start the check with one character before actual cursor position + for (MathData::const_iterator it = md.begin() + cs.pos() - 1; + it != it_end; ++it) + md2.push_back(*it); + docstring inp = asString(md2); + for (len = 0; (unsigned) len < inp.size() && len + cur.pos() <= cur.lastpos(); len++) { + if (!isWordChar(inp[len])) + break; + } + // len == 0 means previous char was a word separator + // len == 1 search starts with a word separator + // len == 2 ... we have to skip len -1 chars + if (len > 1) + cur.pos() = cur.pos() + len - 1; + } + } + opt.matchword = false; + } pos_len = findForwardAdv(cur, matchAdv); + } else pos_len = findBackwardsAdv(cur, matchAdv); } catch (exception & ex) { @@ -4700,7 +4921,7 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions & opt) // Should never happen, maybe LASSERT() here? pos_len = cur.lastpos() - cur.pos(); } - LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << pos_len); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Putting selection at cur=" << cur << " with len: " << pos_len); bv->putSelectionAt(cur, pos_len, !opt.forward); return true; @@ -4721,7 +4942,7 @@ ostringstream & operator<<(ostringstream & os, FindAndReplaceOptions const & opt << int(opt.scope) << ' ' << int(opt.restr); - LYXERR(Debug::FIND, "built: " << os.str()); + LYXERR(Debug::FINDVERBOSE, "built: " << os.str()); return os; } @@ -4729,7 +4950,7 @@ ostringstream & operator<<(ostringstream & os, FindAndReplaceOptions const & opt istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) { - // LYXERR(Debug::FIND, "parsing"); + // LYXERR(Debug::FINDVERBOSE, "parsing"); string s; string line; getline(is, line); @@ -4741,7 +4962,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) break; getline(is, line); } - // LYXERR(Debug::FIND, "file_buf_name: '" << s << "'"); + // LYXERR(Debug::FINDVERBOSE, "file_buf_name: '" << s << "'"); opt.find_buf_name = from_utf8(s); is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat >> opt.replace_all; is.get(); // Waste space before replace string @@ -4755,7 +4976,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) break; getline(is, line); } - // LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'"); + // LYXERR(Debug::FINDVERBOSE, "repl_buf_name: '" << s << "'"); opt.repl_buf_name = from_utf8(s); is >> opt.keep_case; int i; @@ -4765,7 +4986,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) opt.restr = FindAndReplaceOptions::SearchRestriction(i); /* - LYXERR(Debug::FIND, "parsed: " << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' ' + LYXERR(Debug::FINDVERBOSE, "parsed: " << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' ' << opt.expandmacros << ' ' << opt.ignoreformat << ' ' << opt.keep_case << ' ' << opt.scope << ' ' << opt.restr); */