X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Flyxfind.cpp;h=86cf64d44a55320f73c3d609c563d954e5031de1;hb=bca1b63d89e27b31b089ab48c63368640084b3a6;hp=94a54bc37a203e62fc55a6818add4320a723079f;hpb=649755f48180b0466cdf212178dc9bdcc5d18210;p=lyx.git diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp index 94a54bc37a..86cf64d44a 100644 --- a/src/lyxfind.cpp +++ b/src/lyxfind.cpp @@ -31,6 +31,7 @@ #include "Paragraph.h" #include "Text.h" #include "Encoding.h" +#include "Language.h" #include "frontends/Application.h" #include "frontends/alert.h" @@ -49,8 +50,10 @@ #include "support/lstrings.h" #include "support/textutils.h" -#include +#include #include + +//#define ResultsDebug #define USE_QT_FOR_SEARCH #if defined(USE_QT_FOR_SEARCH) #include // sets QT_VERSION @@ -69,6 +72,9 @@ using namespace lyx::support; namespace lyx { +typedef unordered_map AccentsMap; +typedef unordered_map::const_iterator AccentsIterator; +static AccentsMap accents = unordered_map(); // Helper class for deciding what should be ignored class IgnoreFormats { @@ -96,7 +102,11 @@ class IgnoreFormats { /// bool getLanguage() const { return ignoreLanguage_; } /// - void setIgnoreFormat(string const & type, bool value); + bool getDeleted() const { return ignoreDeleted_; } + /// + void setIgnoreDeleted(bool value); + /// + void setIgnoreFormat(string const & type, bool value, bool fromUser = true); private: /// @@ -119,16 +129,23 @@ private: bool ignoreColor_ = false; /// bool ignoreLanguage_ = false; + bool userSelectedIgnoreLanguage_ = false; + /// + bool ignoreDeleted_ = true; }; - -void IgnoreFormats::setIgnoreFormat(string const & type, bool value) +void IgnoreFormats::setIgnoreFormat(string const & type, bool value, bool fromUser) { if (type == "color") { ignoreColor_ = value; } else if (type == "language") { - ignoreLanguage_ = value; + if (fromUser) { + userSelectedIgnoreLanguage_ = value; + ignoreLanguage_ = value; + } + else + ignoreLanguage_ = (value || userSelectedIgnoreLanguage_); } else if (type == "sectioning") { ignoreSectioning_ = value; @@ -157,24 +174,27 @@ void IgnoreFormats::setIgnoreFormat(string const & type, bool value) else if (type == "strike") { ignoreStrikeOut_ = value; } + else if (type == "deleted") { + ignoreDeleted_ = value; + } } // The global variable that can be changed from outside IgnoreFormats ignoreFormats; -void setIgnoreFormat(string const & type, bool value) +void setIgnoreFormat(string const & type, bool value, bool fromUser) { - ignoreFormats.setIgnoreFormat(type, value); + ignoreFormats.setIgnoreFormat(type, value, fromUser); } namespace { -bool parse_bool(docstring & howto) +bool parse_bool(docstring & howto, bool const defvalue = false) { if (howto.empty()) - return false; + return defvalue; docstring var; howto = split(howto, var, ' '); return var == "1"; @@ -205,24 +225,33 @@ private: }; -int findForward(DocIterator & cur, MatchString const & match, - bool find_del = true) +int findForward(DocIterator & cur, DocIterator const endcur, + MatchString const & match, + bool find_del = true, bool onlysel = false) { - for (; cur; cur.forwardChar()) + for (; cur; cur.forwardChar()) { + if (onlysel && endcur.pit() == cur.pit() + && endcur.idx() == cur.idx() && endcur.pos() < cur.pos()) + break; if (cur.inTexted()) { int len = match(cur.paragraph(), cur.pos(), find_del); if (len > 0) return len; } + } return 0; } -int findBackwards(DocIterator & cur, MatchString const & match, - bool find_del = true) +int findBackwards(DocIterator & cur, DocIterator const endcur, + MatchString const & match, + bool find_del = true, bool onlysel = false) { while (cur) { cur.backwardChar(); + if (onlysel && endcur.pit() == cur.pit() + && endcur.idx() == cur.idx() && endcur.pos() > cur.pos()) + break; if (cur.inTexted()) { int len = match(cur.paragraph(), cur.pos(), find_del); if (len > 0) @@ -242,38 +271,87 @@ bool searchAllowed(docstring const & str) return true; } +} // namespace + bool findOne(BufferView * bv, docstring const & searchstr, bool case_sens, bool whole, bool forward, - bool find_del = true, bool check_wrap = false) + bool find_del, bool check_wrap, bool const auto_wrap, + bool instant, bool onlysel) { + // Clean up previous selections with empty searchstr on instant + if (searchstr.empty() && instant) { + if (bv->cursor().selection()) { + bv->setCursor(bv->cursor().selectionBegin()); + bv->clearSelection(); + } + return true; + } + if (!searchAllowed(searchstr)) return false; + DocIterator const endcur = forward ? bv->cursor().selectionEnd() : bv->cursor().selectionBegin(); + + if (onlysel && bv->cursor().selection()) { + docstring const matchstring = bv->cursor().selectionAsString(false); + docstring const lcmatchsting = support::lowercase(matchstring); + if (matchstring == searchstr || (!case_sens && lcmatchsting == lowercase(searchstr))) { + docstring q = _("The search string matches the selection, and search is limited to selection.\n" + "Continue search outside?"); + int search_answer = frontend::Alert::prompt(_("Search outside selection?"), + q, 0, 1, _("&Yes"), _("&No")); + if (search_answer == 0) { + bv->clearSelection(); + if (findOne(bv, searchstr, case_sens, whole, forward, + find_del, check_wrap, auto_wrap, false, false)) + return true; + } + return false; + } + } + DocIterator cur = forward - ? bv->cursor().selectionEnd() - : bv->cursor().selectionBegin(); + ? ((instant || onlysel) ? bv->cursor().selectionBegin() : bv->cursor().selectionEnd()) + : ((instant || onlysel) ? bv->cursor().selectionEnd() : bv->cursor().selectionBegin()); MatchString const match(searchstr, case_sens, whole); int match_len = forward - ? findForward(cur, match, find_del) - : findBackwards(cur, match, find_del); + ? findForward(cur, endcur, match, find_del, onlysel) + : findBackwards(cur, endcur, match, find_del, onlysel); if (match_len > 0) bv->putSelectionAt(cur, match_len, !forward); + else if (onlysel) { + docstring q = _("The search string was not found within the selection.\n" + "Continue search outside?"); + int search_answer = frontend::Alert::prompt(_("Search outside selection?"), + q, 0, 1, _("&Yes"), _("&No")); + if (search_answer == 0) { + bv->clearSelection(); + if (findOne(bv, searchstr, case_sens, whole, forward, + find_del, check_wrap, auto_wrap, false, false)) + return true; + } + return false; + } else if (check_wrap) { DocIterator cur_orig(bv->cursor()); - docstring q; - if (forward) - q = _("End of file reached while searching forward.\n" - "Continue searching from the beginning?"); - else - q = _("Beginning of file reached while searching backward.\n" - "Continue searching from the end?"); - int wrap_answer = frontend::Alert::prompt(_("Wrap search?"), - q, 0, 1, _("&Yes"), _("&No")); - if (wrap_answer == 0) { + bool wrap = auto_wrap; + if (!auto_wrap) { + docstring q; + if (forward) + q = _("End of file reached while searching forward.\n" + "Continue searching from the beginning?"); + else + q = _("Beginning of file reached while searching backward.\n" + "Continue searching from the end?"); + int wrap_answer = frontend::Alert::prompt(_("Wrap search?"), + q, 0, 1, _("&Yes"), _("&No")); + wrap = wrap_answer == 0; + } + if (wrap) { if (forward) { bv->cursor().clear(); bv->cursor().push_back(CursorSlice(bv->buffer().inset())); @@ -281,8 +359,15 @@ bool findOne(BufferView * bv, docstring const & searchstr, bv->cursor().setCursor(doc_iterator_end(&bv->buffer())); bv->cursor().backwardPos(); } + if (auto_wrap) { + docstring const msg = forward + ? _("Search reached end of document, continuing from beginning.") + : _("Search reached beginning of document, continuing from end."); + bv->message(msg); + } bv->clearSelection(); - if (findOne(bv, searchstr, case_sens, whole, forward, find_del, false)) + if (findOne(bv, searchstr, case_sens, whole, forward, + find_del, false, false, false, false)) return true; } bv->cursor().setCursor(cur_orig); @@ -293,16 +378,20 @@ bool findOne(BufferView * bv, docstring const & searchstr, } +namespace { + int replaceAll(BufferView * bv, docstring const & searchstr, docstring const & replacestr, - bool case_sens, bool whole) + bool case_sens, bool whole, bool onlysel) { Buffer & buf = bv->buffer(); if (!searchAllowed(searchstr) || buf.isReadonly()) return 0; - DocIterator cur_orig(bv->cursor()); + DocIterator startcur = bv->cursor().selectionBegin(); + DocIterator endcur = bv->cursor().selectionEnd(); + bool const had_selection = bv->cursor().selection(); MatchString const match(searchstr, case_sens, whole); int num = 0; @@ -312,29 +401,53 @@ int replaceAll(BufferView * bv, Cursor cur(*bv); cur.setCursor(doc_iterator_begin(&buf)); - int match_len = findForward(cur, match, false); + int match_len = findForward(cur, endcur, match, false, onlysel); while (match_len > 0) { // Backup current cursor position and font. pos_type const pos = cur.pos(); Font const font = cur.paragraph().getFontSettings(buf.params(), pos); cur.recordUndo(); - int striked = ssize - + int ct_deleted_text = ssize - cur.paragraph().eraseChars(pos, pos + match_len, buf.params().track_changes); cur.paragraph().insert(pos, replacestr, font, Change(buf.params().track_changes ? Change::INSERTED : Change::UNCHANGED)); - for (int i = 0; i < rsize + striked; ++i) - cur.forwardChar(); + for (int i = 0; i < rsize + ct_deleted_text + && cur.pos() < cur.lastpos(); ++i) + cur.forwardPos(); + if (onlysel && cur.pit() == endcur.pit() && cur.idx() == endcur.idx()) { + // Adjust end of selection for replace-all in selection + if (rsize > ssize) { + int const offset = rsize - ssize; + for (int i = 0; i < offset + ct_deleted_text + && endcur.pos() < endcur.lastpos(); ++i) + endcur.forwardPos(); + } else { + int const offset = ssize - rsize; + for (int i = 0; i < offset && endcur.pos() > 0; ++i) + endcur.backwardPos(); + for (int i = 0; i < ct_deleted_text + && endcur.pos() < endcur.lastpos(); ++i) + endcur.forwardPos(); + } + } ++num; - match_len = findForward(cur, match, false); + match_len = findForward(cur, endcur, match, false, onlysel); } bv->putSelectionAt(doc_iterator_begin(&buf), 0, false); - cur_orig.fixIfBroken(); - bv->setCursor(cur_orig); + startcur.fixIfBroken(); + bv->setCursor(startcur); + + // Reset selection, accounting for changes in selection + if (had_selection) { + endcur.fixIfBroken(); + bv->cursor().resetAnchor(); + bv->setCursorSelectionTo(endcur); + } return num; } @@ -357,13 +470,15 @@ int replaceAll(BufferView * bv, // whether anything at all was done. pair replaceOne(BufferView * bv, docstring searchstr, docstring const & replacestr, bool case_sens, - bool whole, bool forward, bool findnext) + bool whole, bool forward, bool findnext, bool wrap, + bool onlysel) { Cursor & cur = bv->cursor(); - if (!cur.selection()) { + if (!cur.selection() || onlysel) { // no selection, non-empty search string: find it if (!searchstr.empty()) { - bool const found = findOne(bv, searchstr, case_sens, whole, forward, true, findnext); + bool const found = findOne(bv, searchstr, case_sens, whole, + forward, true, findnext, wrap, false, onlysel); return make_pair(found, 0); } // empty search string @@ -374,7 +489,7 @@ pair replaceOne(BufferView * bv, docstring searchstr, // This causes a minor bug as undo will restore this selection, // which the user did not create (#8986). cur.innerText()->selectWord(cur, WHOLE_WORD); - searchstr = cur.selectionAsString(false); + searchstr = cur.selectionAsString(false, true); } // if we still don't have a search string, report the error @@ -383,7 +498,7 @@ pair replaceOne(BufferView * bv, docstring searchstr, return make_pair(false, 0); bool have_selection = cur.selection(); - docstring const selected = cur.selectionAsString(false); + docstring const selected = cur.selectionAsString(false, true); bool match = case_sens ? searchstr == selected @@ -392,7 +507,8 @@ pair replaceOne(BufferView * bv, docstring searchstr, // no selection or current selection is not search word: // just find the search word if (!have_selection || !match) { - bool const found = findOne(bv, searchstr, case_sens, whole, forward, true, findnext); + bool const found = findOne(bv, searchstr, case_sens, whole, forward, + true, findnext, wrap, false, onlysel); return make_pair(found, 0); } @@ -408,7 +524,8 @@ pair replaceOne(BufferView * bv, docstring searchstr, cur.pos() = cur.lastpos()); } if (findnext) - findOne(bv, searchstr, case_sens, whole, forward, false, findnext); + findOne(bv, searchstr, case_sens, whole, + forward, false, findnext, wrap, false, onlysel); return make_pair(true, 1); } @@ -417,13 +534,18 @@ pair replaceOne(BufferView * bv, docstring searchstr, docstring const find2string(docstring const & search, - bool casesensitive, bool matchword, bool forward) + bool casesensitive, bool matchword, + bool forward, bool wrap, bool instant, + bool onlysel) { odocstringstream ss; ss << search << '\n' << int(casesensitive) << ' ' << int(matchword) << ' ' - << int(forward); + << int(forward) << ' ' + << int(wrap) << ' ' + << int(instant) << ' ' + << int(onlysel); return ss.str(); } @@ -431,7 +553,8 @@ docstring const find2string(docstring const & search, docstring const replace2string(docstring const & replace, docstring const & search, bool casesensitive, bool matchword, - bool all, bool forward, bool findnext) + bool all, bool forward, bool findnext, + bool wrap, bool onlysel) { odocstringstream ss; ss << replace << '\n' @@ -440,34 +563,60 @@ docstring const replace2string(docstring const & replace, << int(matchword) << ' ' << int(all) << ' ' << int(forward) << ' ' - << int(findnext); + << int(findnext) << ' ' + << int(wrap) << ' ' + << int(onlysel); return ss.str(); } -bool lyxfind(BufferView * bv, FuncRequest const & ev) +docstring const string2find(docstring const & argument, + bool &casesensitive, + bool &matchword, + bool &forward, + bool &wrap, + bool &instant, + bool &onlysel) { - if (!bv || ev.action() != LFUN_WORD_FIND) - return false; - - //lyxerr << "find called, cmd: " << ev << endl; - // data is of the form // " - // " + // " docstring search; - docstring howto = split(ev.argument(), search, '\n'); + docstring howto = split(argument, search, '\n'); - bool casesensitive = parse_bool(howto); - bool matchword = parse_bool(howto); - bool forward = parse_bool(howto); + casesensitive = parse_bool(howto); + matchword = parse_bool(howto); + forward = parse_bool(howto, true); + wrap = parse_bool(howto); + instant = parse_bool(howto); + onlysel = parse_bool(howto); - return findOne(bv, search, casesensitive, matchword, forward, true, true); + return search; } -bool lyxreplace(BufferView * bv, - FuncRequest const & ev, bool has_deleted) +bool lyxfind(BufferView * bv, FuncRequest const & ev) +{ + if (!bv || ev.action() != LFUN_WORD_FIND) + return false; + + //lyxerr << "find called, cmd: " << ev << endl; + bool casesensitive; + bool matchword; + bool forward; + bool wrap; + bool instant; + bool onlysel; + + docstring search = string2find(ev.argument(), casesensitive, + matchword, forward, wrap, instant, onlysel); + + return findOne(bv, search, casesensitive, matchword, forward, + false, true, wrap, instant, onlysel); +} + + +bool lyxreplace(BufferView * bv, FuncRequest const & ev) { if (!bv || ev.action() != LFUN_WORD_REPLACE) return false; @@ -475,7 +624,7 @@ bool lyxreplace(BufferView * bv, // data is of the form // " // - // " + // " docstring search; docstring rplc; docstring howto = split(ev.argument(), rplc, '\n'); @@ -484,45 +633,48 @@ bool lyxreplace(BufferView * bv, bool casesensitive = parse_bool(howto); bool matchword = parse_bool(howto); bool all = parse_bool(howto); - bool forward = parse_bool(howto); - bool findnext = howto.empty() ? true : parse_bool(howto); + bool forward = parse_bool(howto, true); + bool findnext = parse_bool(howto, true); + bool wrap = parse_bool(howto); + bool onlysel = parse_bool(howto); + + if (!bv->cursor().selection()) + // only selection only makes sense with selection + onlysel = false; bool update = false; - if (!has_deleted) { - int replace_count = 0; - if (all) { - replace_count = replaceAll(bv, search, rplc, casesensitive, matchword); - update = replace_count > 0; - } else { - pair rv = - replaceOne(bv, search, rplc, casesensitive, matchword, forward, findnext); - update = rv.first; - replace_count = rv.second; - } + int replace_count = 0; + if (all) { + replace_count = replaceAll(bv, search, rplc, casesensitive, + matchword, onlysel); + update = replace_count > 0; + } else { + pair rv = + replaceOne(bv, search, rplc, casesensitive, matchword, + forward, findnext, wrap, onlysel); + update = rv.first; + replace_count = rv.second; + } - Buffer const & buf = bv->buffer(); - if (!update) { - // emit message signal. + Buffer const & buf = bv->buffer(); + if (!update) { + // emit message signal. + if (onlysel) + buf.message(_("String not found in selection.")); + else buf.message(_("String not found.")); + } else { + if (replace_count == 0) { + buf.message(_("String found.")); + } else if (replace_count == 1) { + buf.message(_("String has been replaced.")); } else { - if (replace_count == 0) { - buf.message(_("String found.")); - } else if (replace_count == 1) { - buf.message(_("String has been replaced.")); - } else { - docstring const str = - bformat(_("%1$d strings have been replaced."), replace_count); - buf.message(str); - } + docstring const str = onlysel + ? bformat(_("%1$d strings have been replaced in the selection."), replace_count) + : bformat(_("%1$d strings have been replaced."), replace_count); + buf.message(str); } - } else if (findnext) { - // if we have deleted characters, we do not replace at all, but - // rather search for the next occurence - if (findOne(bv, search, casesensitive, matchword, forward, true, findnext)) - update = true; - else - bv->message(_("String not found.")); } return update; } @@ -646,139 +798,111 @@ namespace { typedef vector > Escapes; -/// A map of symbols and their escaped equivalent needed within a regex. -/// @note Beware of order -Escapes const & get_regexp_escapes() -{ - typedef std::pair P; - - static Escapes escape_map; - if (escape_map.empty()) { - escape_map.push_back(P("$", "_x_$")); - escape_map.push_back(P("{", "_x_{")); - escape_map.push_back(P("}", "_x_}")); - escape_map.push_back(P("[", "_x_[")); - escape_map.push_back(P("]", "_x_]")); - escape_map.push_back(P("(", "_x_(")); - escape_map.push_back(P(")", "_x_)")); - escape_map.push_back(P("+", "_x_+")); - escape_map.push_back(P("*", "_x_*")); - escape_map.push_back(P(".", "_x_.")); - escape_map.push_back(P("\\", "(?:\\\\|\\\\backslash)")); - escape_map.push_back(P("~", "(?:\\\\textasciitilde|\\\\sim)")); - escape_map.push_back(P("^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\textasciicircum|\\\\mathcircumflex)")); - escape_map.push_back(P("_x_", "\\")); - } - return escape_map; -} - -/// A map of lyx escaped strings and their unescaped equivalent. -Escapes const & get_lyx_unescapes() +string string2regex(string in) { - typedef std::pair P; - - static Escapes escape_map; - if (escape_map.empty()) { - escape_map.push_back(P("\\%", "%")); - escape_map.push_back(P("\\{", "{")); - escape_map.push_back(P("\\}", "}")); - escape_map.push_back(P("\\mathcircumflex ", "^")); - escape_map.push_back(P("\\mathcircumflex", "^")); - escape_map.push_back(P("\\backslash ", "\\")); - escape_map.push_back(P("\\backslash", "\\")); - escape_map.push_back(P("\\sim ", "~")); - escape_map.push_back(P("\\sim", "~")); + static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\$\\])" }; + string temp = std::regex_replace(in, specialChars, R"(\$&)" ); + string temp2(""); + size_t lastpos = 0; + size_t fl_pos = 0; + int offset = 1; + while (fl_pos < temp.size()) { + fl_pos = temp.find("\\\\foreignlanguage", lastpos + offset); + if (fl_pos == string::npos) + break; + offset = 16; + temp2 += temp.substr(lastpos, fl_pos - lastpos); + temp2 += "\\n"; + lastpos = fl_pos; } - return escape_map; -} - -/// A map of escapes turning a regexp matching text to one matching latex. -Escapes const & get_regexp_latex_escapes() -{ - typedef std::pair P; - - static Escapes escape_map; - if (escape_map.empty()) { - escape_map.push_back(P("\\\\", "(?:\\\\\\\\|\\\\backslash|\\\\textbackslash\\{\\}|\\\\textbackslash)")); - escape_map.push_back(P("(first << " as " << it->second); - unsigned int pos = 0; - while (pos < s.length() && (pos = s.find(it->first, pos)) < s.length()) { - s.replace(pos, it->first.length(), it->second); - LYXERR(Debug::FIND, "After escape: " << s); - pos += it->second.length(); -// LYXERR(Debug::FIND, "pos: " << pos); + /* Convert \backslash => \ + * and \{, \}, \[, \] => {, }, [, ] + */ + string s(""); + regex wordre("(\\\\)*(\\\\((backslash|mathcircumflex) ?|[\\[\\]\\{\\}]))"); + size_t lastpos = 0; + smatch sub; + bool backslashed = false; + for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) { + sub = *it; + string replace; + if ((sub.position(2) - sub.position(0)) % 2 == 1) { + continue; + } + else { + if (sub.str(4) == "backslash") { + replace = "\\"; + if (withformat) { + // transforms '\backslash \{' into '\{' + // and '\{' into '{' + string next = t.substr(sub.position(2) + sub.str(2).length(), 2); + if ((next == "\\{") || (next == "\\}")) { + replace = ""; + backslashed = true; + } + } + } + else if (sub.str(4) == "mathcircumflex") + replace = "^"; + else if (backslashed) { + backslashed = false; + if (withformat && (sub.str(3) == "{")) + replace = accents["braceleft"]; + else if (withformat && (sub.str(3) == "}")) + replace = accents["braceright"]; + else { + // else part should not exist + LASSERT(1, /**/); + } + } + else + replace = sub.str(3); } + if (lastpos < (size_t) sub.position(2)) + s += t.substr(lastpos, sub.position(2) - lastpos); + s += replace; + lastpos = sub.position(2) + sub.length(2); } - LYXERR(Debug::FIND, "Escaped : '" << s << "'"); + if (lastpos == 0) + return t; + else if (lastpos < t.length()) + s += t.substr(lastpos, t.length() - lastpos); return s; } - /// Within \regexp{} apply get_lyx_unescapes() only (i.e., preserve regexp semantics of the string), /// while outside apply get_lyx_unescapes()+get_regexp_escapes(). /// If match_latex is true, then apply regexp_latex_escapes() to \regexp{} contents as well. -string escape_for_regex(string s, bool match_latex) +string escape_for_regex(string s, bool withformat) { - size_t pos = 0; - while (pos < s.size()) { - size_t new_pos = s.find("\\regexp{", pos); - if (new_pos == string::npos) - new_pos = s.size(); - string t; - if (new_pos > pos) { - LYXERR(Debug::FIND, "new_pos: " << new_pos); - t = apply_escapes(s.substr(pos, new_pos - pos), get_lyx_unescapes()); - LYXERR(Debug::FIND, "t [lyx]: " << t); - t = apply_escapes(t, get_regexp_escapes()); - LYXERR(Debug::FIND, "t [rxp]: " << t); - s.replace(pos, new_pos - pos, t); - new_pos = pos + t.size(); - LYXERR(Debug::FIND, "Regexp after escaping: " << s); - LYXERR(Debug::FIND, "new_pos: " << new_pos); - if (new_pos == s.size()) - break; - } - // Might fail if \\endregexp{} is preceeded by unexpected stuff (weird escapes) - size_t end_pos = s.find("\\endregexp{}}", new_pos + 8); - LYXERR(Debug::FIND, "end_pos: " << end_pos); - t = s.substr(new_pos + 8, end_pos - (new_pos + 8)); - LYXERR(Debug::FIND, "t in regexp : " << t); - t = apply_escapes(t, get_lyx_unescapes()); - LYXERR(Debug::FIND, "t in regexp after unescapes [lyx]: " << t); - if (match_latex) { - t = apply_escapes(t, get_regexp_latex_escapes()); - LYXERR(Debug::FIND, "t in regexp after latex_escapes [ltx]: " << t); + size_t lastpos = 0; + string result = ""; + while (lastpos < s.size()) { + size_t regex_pos = s.find("\\regexp{", lastpos); + if (regex_pos == string::npos) { + regex_pos = s.size(); } - if (end_pos == s.size()) { - s.replace(new_pos, end_pos - new_pos, t); - LYXERR(Debug::FIND, "Regexp after \\regexp{} removal: " << s); - break; + if (regex_pos > lastpos) { + result += string2regex(s.substr(lastpos, regex_pos-lastpos)); + lastpos = regex_pos; + if (lastpos == s.size()) + break; } - s.replace(new_pos, end_pos + 13 - new_pos, t); - LYXERR(Debug::FIND, "Regexp after \\regexp{...\\endregexp{}} removal: " << s); - pos = new_pos + t.size(); - LYXERR(Debug::FIND, "pos: " << pos); + size_t end_pos = s.find("\\endregexp{}}", regex_pos + 8); + result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)), withformat); + lastpos = end_pos + 13; } - return s; + return result; } @@ -796,62 +920,6 @@ bool regex_replace(string const & s, string & t, string const & searchstr, return rv; } - -/** Checks if supplied string segment is well-formed from the standpoint of matching open-closed braces. - ** - ** Verify that closed braces exactly match open braces. This avoids that, for example, - ** \frac{.*}{x} matches \frac{x+\frac{y}{x}}{z} with .* being 'x+\frac{y'. - ** - ** @param unmatched - ** Number of open braces that must remain open at the end for the verification to succeed. - **/ -#if QTSEARCH -bool braces_match(QString const & beg, - int unmatched = 0) -#else -bool braces_match(string const & beg, - int unmatched = 0) -#endif -{ - int open_pars = 0; -#if QTSEARCH - LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg.toStdString() << "'"); -#else - LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg << "'"); -#endif - int lastidx = beg.size(); - for (int i=0; i < lastidx; ++i) { - // Skip escaped braces in the count -#if QTSEARCH - QChar c = beg.at(i); -#else - char c = beg.at(i); -#endif - if (c == '\\') { - ++i; - if (i >= lastidx) - break; - } else if (c == '{') { - ++open_pars; - } else if (c == '}') { - if (open_pars == 0) { - LYXERR(Debug::FIND, "Found unmatched closed brace"); - return false; - } else - --open_pars; - } - } - if (open_pars != unmatched) { - LYXERR(Debug::FIND, "Found " << open_pars - << " instead of " << unmatched - << " unmatched open braces at the end of count"); - return false; - } - LYXERR(Debug::FIND, "Braces match as expected"); - return true; -} - - class MatchResult { public: enum range { @@ -864,7 +932,10 @@ public: int match2end; int pos; int leadsize; - MatchResult(): match_len(0),match_prefix(0),match2end(0), pos(0),leadsize(0) {}; + int pos_len; + int searched_size; + vector result = vector (); + MatchResult(int len = 0): match_len(len),match_prefix(0),match2end(0), pos(0),leadsize(0),pos_len(-1),searched_size(0) {} }; static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres) @@ -873,8 +944,11 @@ static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newre return MatchResult::newIsTooFar; if (newres.match_len < oldres.match_len) return MatchResult::newIsTooFar; - if ((newres.match_len == oldres.match_len) && (newres.match2end == oldres.match2end)) - return MatchResult::newIsBetter; + + if (newres.match_len == oldres.match_len) { + if (newres.match2end == oldres.match2end) + return MatchResult::newIsBetter; + } return MatchResult::newIsInvalid; } @@ -883,7 +957,7 @@ static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newre class MatchStringAdv { public: - MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt); + MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt); /** Tests if text starting at the supplied position matches with the one provided to the MatchStringAdv ** constructor as opt.search, under the opt.* options settings. @@ -912,6 +986,7 @@ public: private: /// Auxiliary find method (does not account for opt.matchword) MatchResult findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const; + void CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string = ""); /** Normalize a stringified or latexified LyX paragraph. ** @@ -926,7 +1001,7 @@ private: ** @todo Normalization should also expand macros, if the corresponding ** search option was checked. **/ - string normalize(docstring const & s, bool hack_braces) const; + string normalize(docstring const & s) const; // normalized string to search string par_as_string; // regular expression to use for searching @@ -947,10 +1022,27 @@ private: // number of (.*?) subexpressions added at end of search regexp for closing // environments, math mode, styles, etc... int close_wildcards; +public: // Are we searching with regular expressions ? bool use_regexp; + static int valid_matches; + static vector matches; + void FillResults(MatchResult &found_mr); }; +int MatchStringAdv::valid_matches = 0; +vector MatchStringAdv::matches = vector (10); + +void MatchStringAdv::FillResults(MatchResult &found_mr) +{ + if (found_mr.match_len > 0) { + valid_matches = found_mr.result.size(); + for (size_t i = 0; i < found_mr.result.size(); i++) + matches[i] = found_mr.result[i]; + } + else + valid_matches = 0; +} static docstring buffer_to_latex(Buffer & buffer) { @@ -963,7 +1055,10 @@ static docstring buffer_to_latex(Buffer & buffer) runparams.linelen = 10000; //lyxrc.plaintext_linelen; // No side effect of file copying and image conversion runparams.dryrun = true; - runparams.for_search = true; + if (ignoreFormats.getDeleted()) + runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + else + runparams.for_searchAdv = OutputParams::SearchWithDeleted; pit_type const endpit = buffer.paragraphs().size(); for (pit_type pit = 0; pit != endpit; ++pit) { TeXOnePar(buffer, buffer.text(), pit, os, runparams); @@ -985,21 +1080,28 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co runparams.flavor = Flavor::XeTeX; runparams.linelen = 10000; //lyxrc.plaintext_linelen; runparams.dryrun = true; - runparams.for_search = true; + int option = AS_STR_INSETS |AS_STR_PLAINTEXT; + if (ignoreFormats.getDeleted()) { + option |= AS_STR_SKIPDELETE; + runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + } + else { + runparams.for_searchAdv = OutputParams::SearchWithDeleted; + } for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) { Paragraph const & par = buffer.paragraphs().at(pit); LYXERR(Debug::FIND, "Adding to search string: '" << par.asString(pos_type(0), par.size(), - AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, + option, &runparams) << "'"); str += par.asString(pos_type(0), par.size(), - AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, + option, &runparams); } // Even in ignore-format we have to remove "\text{}, \lyxmathsym{}" parts string t = to_utf8(str); - while (regex_replace(t, t, "\\\\(text|lyxmathsym)\\{([^\\}]*)\\}", "$2")); + while (regex_replace(t, t, "\\\\(text|lyxmathsym|ensuremath)\\{([^\\}]*)\\}", "$2")); str = from_utf8(t); } return str; @@ -1146,7 +1248,7 @@ class KeyInfo { class Border { public: - Border(int l=0, int u=0) : low(l), upper(u) {}; + Border(int l=0, int u=0) : low(l), upper(u) {} int low; int upper; }; @@ -1320,9 +1422,6 @@ void Intervall::addIntervall(int low, int upper) } } -typedef map AccentsMap; -static AccentsMap accents = map(); - static void buildaccent(string n, string param, string values) { stringstream s(n); @@ -1394,12 +1493,234 @@ static string getutf8(unsigned uchar) return(ret); } +static void addAccents(string latex_in, string unicode_out) +{ + latex_in = latex_in.substr(1); + AccentsIterator it_ac = accents.find(latex_in); + if (it_ac == accents.end()) { + accents[latex_in] = unicode_out; + } + else { + LYXERR0("Key " << latex_in << " already set"); + } +} + +void static fillMissingUnicodesymbols() +{ + addAccents("\\textyen", getutf8(0x00a5)); + addAccents("\\yen", getutf8(0x00a5)); + addAccents("\\textsection", getutf8(0x00a7)); + addAccents("\\mathsection", getutf8(0x00a7)); + addAccents("\\textlnot", getutf8(0x00ac)); + addAccents("\\neg", getutf8(0x00ac)); + addAccents("\\textpm", getutf8(0x00b1)); + addAccents("\\pm", getutf8(0x00b1)); + addAccents("\\textparagraph", getutf8(0x00b6)); + addAccents("\\mathparagraph", getutf8(0x00b6)); + addAccents("\\textperiodcentered", getutf8(0x00b7)); + addAccents("\\texttimes", getutf8(0x00d7)); + addAccents("\\times", getutf8(0x00d7)); + addAccents("\\dh", getutf8(0x00f0)); + addAccents("\\eth", getutf8(0x00f0)); + addAccents("\\textdiv", getutf8(0x00f7)); + addAccents("\\div", getutf8(0x00f7)); + addAccents("\\o", getutf8(0x00f8)); + addAccents("\\j", getutf8(0x0237)); + addAccents("\\textalpha", getutf8(0x03b1)); + addAccents("\\alpha", getutf8(0x03b1)); + addAccents("\\textbeta", getutf8(0x03b2)); + addAccents("\\beta", getutf8(0x03b2)); + addAccents("\\textgamma", getutf8(0x03b3)); + addAccents("\\gamma", getutf8(0x03b3)); + addAccents("\\textdelta", getutf8(0x03b4)); + addAccents("\\delta", getutf8(0x03b4)); + addAccents("\\textepsilon", getutf8(0x03b5)); + addAccents("\\varepsilon", getutf8(0x03b5)); + addAccents("\\textzeta", getutf8(0x03b6)); + addAccents("\\zeta", getutf8(0x03b6)); + addAccents("\\texteta", getutf8(0x03b7)); + addAccents("\\eta", getutf8(0x03b7)); + addAccents("\\texttheta", getutf8(0x03b8)); + addAccents("\\theta", getutf8(0x03b8)); + addAccents("\\textiota", getutf8(0x03b9)); + addAccents("\\iota", getutf8(0x03b9)); + addAccents("\\textkappa", getutf8(0x03ba)); + addAccents("\\kappa", getutf8(0x03ba)); + addAccents("\\textlambda", getutf8(0x03bb)); + addAccents("\\lambda", getutf8(0x03bb)); + addAccents("\\textmu", getutf8(0x03bc)); + addAccents("\\mu", getutf8(0x03bc)); + addAccents("\\textnu", getutf8(0x03bd)); + addAccents("\\nu", getutf8(0x03bd)); + addAccents("\\textxi", getutf8(0x03be)); + addAccents("\\xi", getutf8(0x03be)); + addAccents("\\textpi", getutf8(0x03c0)); + addAccents("\\pi", getutf8(0x03c0)); + addAccents("\\textrho", getutf8(0x03c1)); + addAccents("\\rho", getutf8(0x03c1)); + addAccents("\\textfinalsigma", getutf8(0x03c2)); + addAccents("\\varsigma", getutf8(0x03c2)); + addAccents("\\textsigma", getutf8(0x03c3)); + addAccents("\\sigma", getutf8(0x03c3)); + addAccents("\\texttau", getutf8(0x03c4)); + addAccents("\\tau", getutf8(0x03c4)); + addAccents("\\textupsilon", getutf8(0x03c5)); + addAccents("\\upsilon", getutf8(0x03c5)); + addAccents("\\textphi", getutf8(0x03c6)); + addAccents("\\varphi", getutf8(0x03c6)); + addAccents("\\textchi", getutf8(0x03c7)); + addAccents("\\chi", getutf8(0x03c7)); + addAccents("\\textpsi", getutf8(0x03c8)); + addAccents("\\psi", getutf8(0x03c8)); + addAccents("\\textomega", getutf8(0x03c9)); + addAccents("\\omega", getutf8(0x03c9)); + addAccents("\\textdigamma", getutf8(0x03dd)); + addAccents("\\digamma", getutf8(0x03dd)); + addAccents("\\hebalef", getutf8(0x05d0)); + addAccents("\\aleph", getutf8(0x05d0)); + addAccents("\\hebbet", getutf8(0x05d1)); + addAccents("\\beth", getutf8(0x05d1)); + addAccents("\\hebgimel", getutf8(0x05d2)); + addAccents("\\gimel", getutf8(0x05d2)); + addAccents("\\hebdalet", getutf8(0x05d3)); + addAccents("\\daleth", getutf8(0x05d3)); + // Thai characters + addAccents("\\thaiKoKai", getutf8(0x0e01)); + addAccents("\\thaiKhoKhai", getutf8(0x0e02)); + addAccents("\\thaiKhoKhuat", getutf8(0x0e03)); + addAccents("\\thaiKhoKhwai", getutf8(0x0e04)); + addAccents("\\thaiKhoKhon", getutf8(0x0e05)); + addAccents("\\thaiKhoRakhang", getutf8(0x0e06)); + addAccents("\\thaiNgoNgu", getutf8(0x0e07)); + addAccents("\\thaiChoChan", getutf8(0x0e08)); + addAccents("\\thaiChoChing", getutf8(0x0e09)); + addAccents("\\thaiChoChang", getutf8(0x0e0a)); + addAccents("\\thaiSoSo", getutf8(0x0e0b)); + addAccents("\\thaiChoChoe", getutf8(0x0e0c)); + addAccents("\\thaiYoYing", getutf8(0x0e0d)); + addAccents("\\thaiDoChada", getutf8(0x0e0e)); + addAccents("\\thaiToPatak", getutf8(0x0e0f)); + addAccents("\\thaiThoThan", getutf8(0x0e10)); + addAccents("\\thaiThoNangmontho", getutf8(0x0e11)); + addAccents("\\thaiThoPhuthao", getutf8(0x0e12)); + addAccents("\\thaiNoNen", getutf8(0x0e13)); + addAccents("\\thaiDoDek", getutf8(0x0e14)); + addAccents("\\thaiToTao", getutf8(0x0e15)); + addAccents("\\thaiThoThung", getutf8(0x0e16)); + addAccents("\\thaiThoThahan", getutf8(0x0e17)); + addAccents("\\thaiThoThong", getutf8(0x0e18)); + addAccents("\\thaiNoNu", getutf8(0x0e19)); + addAccents("\\thaiBoBaimai", getutf8(0x0e1a)); + addAccents("\\thaiPoPla", getutf8(0x0e1b)); + addAccents("\\thaiPhoPhung", getutf8(0x0e1c)); + addAccents("\\thaiFoFa", getutf8(0x0e1d)); + addAccents("\\thaiPhoPhan", getutf8(0x0e1e)); + addAccents("\\thaiFoFan", getutf8(0x0e1f)); + addAccents("\\thaiPhoSamphao", getutf8(0x0e20)); + addAccents("\\thaiMoMa", getutf8(0x0e21)); + addAccents("\\thaiYoYak", getutf8(0x0e22)); + addAccents("\\thaiRoRua", getutf8(0x0e23)); + addAccents("\\thaiRu", getutf8(0x0e24)); + addAccents("\\thaiLoLing", getutf8(0x0e25)); + addAccents("\\thaiLu", getutf8(0x0e26)); + addAccents("\\thaiWoWaen", getutf8(0x0e27)); + addAccents("\\thaiSoSala", getutf8(0x0e28)); + addAccents("\\thaiSoRusi", getutf8(0x0e29)); + addAccents("\\thaiSoSua", getutf8(0x0e2a)); + addAccents("\\thaiHoHip", getutf8(0x0e2b)); + addAccents("\\thaiLoChula", getutf8(0x0e2c)); + addAccents("\\thaiOAng", getutf8(0x0e2d)); + addAccents("\\thaiHoNokhuk", getutf8(0x0e2e)); + addAccents("\\thaiPaiyannoi", getutf8(0x0e2f)); + addAccents("\\thaiSaraA", getutf8(0x0e30)); + addAccents("\\thaiMaiHanakat", getutf8(0x0e31)); + addAccents("\\thaiSaraAa", getutf8(0x0e32)); + addAccents("\\thaiSaraAm", getutf8(0x0e33)); + addAccents("\\thaiSaraI", getutf8(0x0e34)); + addAccents("\\thaiSaraIi", getutf8(0x0e35)); + addAccents("\\thaiSaraUe", getutf8(0x0e36)); + addAccents("\\thaiSaraUee", getutf8(0x0e37)); + addAccents("\\thaiSaraU", getutf8(0x0e38)); + addAccents("\\thaiSaraUu", getutf8(0x0e39)); + addAccents("\\thaiPhinthu", getutf8(0x0e3a)); + addAccents("\\thaiSaraE", getutf8(0x0e40)); + addAccents("\\thaiSaraAe", getutf8(0x0e41)); + addAccents("\\thaiSaraO", getutf8(0x0e42)); + addAccents("\\thaiSaraAiMaimuan", getutf8(0x0e43)); + addAccents("\\thaiSaraAiMaimalai", getutf8(0x0e44)); + addAccents("\\thaiLakkhangyao", getutf8(0x0e45)); + addAccents("\\thaiMaiyamok", getutf8(0x0e46)); + addAccents("\\thaiMaitaikhu", getutf8(0x0e47)); + addAccents("\\thaiMaiEk", getutf8(0x0e48)); + addAccents("\\thaiMaiTho", getutf8(0x0e49)); + addAccents("\\thaiMaiTri", getutf8(0x0e4a)); + addAccents("\\thaiMaiChattawa", getutf8(0x0e4b)); + addAccents("\\thaiThanthakhat", getutf8(0x0e4c)); + addAccents("\\thaiNikhahit", getutf8(0x0e4d)); + addAccents("\\thaiYamakkan", getutf8(0x0e4e)); + addAccents("\\thaiFongman", getutf8(0x0e4f)); + addAccents("\\thaizero", getutf8(0x0e50)); + addAccents("\\thaione", getutf8(0x0e51)); + addAccents("\\thaitwo", getutf8(0x0e52)); + addAccents("\\thaithree", getutf8(0x0e53)); + addAccents("\\thaifour", getutf8(0x0e54)); + addAccents("\\thaifive", getutf8(0x0e55)); + addAccents("\\thaisix", getutf8(0x0e56)); + addAccents("\\thaiseven", getutf8(0x0e57)); + addAccents("\\thaieight", getutf8(0x0e58)); + addAccents("\\thainine", getutf8(0x0e59)); + addAccents("\\thaiAngkhankhu", getutf8(0x0e5a)); + addAccents("\\thaiKhomut", getutf8(0x0e5b)); + + addAccents("\\dag", getutf8(0x2020)); + addAccents("\\dagger", getutf8(0x2020)); + addAccents("\\ddag", getutf8(0x2021)); + addAccents("\\ddagger", getutf8(0x2021)); + addAccents("\\textbullet", getutf8(0x2022)); + addAccents("\\bullet", getutf8(0x2022)); + addAccents("\\dots", getutf8(0x2026)); + addAccents("\\ldots", getutf8(0x2026)); + addAccents("\\textasciiacute", getutf8(0x2032)); + addAccents("\\prime", getutf8(0x2032)); + addAccents("\\textasciigrave", getutf8(0x2035)); + addAccents("\\backprime", getutf8(0x2035)); + addAccents("\\textasteriskcentered", getutf8(0x204e)); + addAccents("\\ast", getutf8(0x204e)); + addAccents("\\textmho", getutf8(0x2127)); + addAccents("\\mho", getutf8(0x2127)); + addAccents("\\textleftarrow", getutf8(0x2190)); + addAccents("\\leftarrow", getutf8(0x2190)); + addAccents("\\textuparrow", getutf8(0x2191)); + addAccents("\\uparrow", getutf8(0x2191)); + addAccents("\\textrightarrow", getutf8(0x2192)); + addAccents("\\rightarrow", getutf8(0x2192)); + addAccents("\\textdownarrow", getutf8(0x2193)); + addAccents("\\downarrow", getutf8(0x2193)); + addAccents("\\textglobrise", getutf8(0x2197)); + addAccents("\\nearrow", getutf8(0x2197)); + addAccents("\\textglobfall", getutf8(0x2198)); + addAccents("\\searrow", getutf8(0x2198)); + addAccents("\\textsurd", getutf8(0x221a)); + addAccents("\\surd", getutf8(0x221a)); + addAccents("\\textbigcircle", getutf8(0x25ef)); + addAccents("\\bigcirc", getutf8(0x25ef)); + addAccents("\\textlangle", getutf8(0x27e8)); + addAccents("\\langle", getutf8(0x27e8)); + addAccents("\\textrangle", getutf8(0x27e9)); + addAccents("\\rangle", getutf8(0x27e9)); +} + static void buildAccentsMap() { accents["imath"] = "ı"; accents["i"] = "ı"; accents["jmath"] = "ȷ"; accents["cdot"] = "·"; + accents["textasciicircum"] = "^"; + accents["mathcircumflex"] = "^"; + accents["sim"] = "~"; + accents["guillemotright"] = "»"; + accents["guillemotleft"] = "«"; accents["hairspace"] = getutf8(0xf0000); // select from free unicode plane 15 accents["thinspace"] = getutf8(0xf0002); // and used _only_ by findadv accents["negthinspace"] = getutf8(0xf0003); // to omit backslashed latex macros @@ -1415,6 +1736,9 @@ static void buildAccentsMap() accents["LaTeX"] = getutf8(0xf0012); accents["latexe"] = getutf8(0xf0013); accents["LaTeXe"] = getutf8(0xf0013); + accents["lyxarrow"] = getutf8(0xf0020); + accents["braceleft"] = getutf8(0xf0030); + accents["braceright"] = getutf8(0xf0031); accents["backslash lyx"] = getutf8(0xf0010); // Used logos inserted with starting \backslash accents["backslash LyX"] = getutf8(0xf0010); accents["backslash tex"] = getutf8(0xf0011); @@ -1423,6 +1747,7 @@ static void buildAccentsMap() accents["backslash LaTeX"] = getutf8(0xf0012); accents["backslash latexe"] = getutf8(0xf0013); accents["backslash LaTeXe"] = getutf8(0xf0013); + accents["backslash lyxarrow"] = getutf8(0xf0020); accents["ddot{\\imath}"] = "ï"; buildaccent("ddot", "aAeEhHiIioOtuUwWxXyY", "äÄëËḧḦïÏïöÖẗüÜẅẄẍẌÿŸ"); // umlaut @@ -1476,6 +1801,8 @@ static void buildAccentsMap() "ȂȃȆȇȊȋȎȏȒȓȖȗ"); // inverted breve buildaccent("slashed", "oO", "øØ"); // slashed + fillMissingUnicodesymbols(); // Add some still not handled entries contained in 'unicodesynbols' + // LYXERR0("Number of accents " << accents.size()); } /* @@ -1488,13 +1815,14 @@ void Intervall::removeAccents() buildAccentsMap(); static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|" "cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}" - "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?)))(?![a-zA-Z]))"); + "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|(brace|guillemot)(left|right)|textasciicircum|mathcircumflex|sim)(?![a-zA-Z]))"); smatch sub; for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) { sub = *itacc; string key = sub.str(1); - if (accents.find(key) != accents.end()) { - string val = accents[key]; + AccentsIterator it_ac = accents.find(key); + if (it_ac != accents.end()) { + string val = it_ac->second; size_t pos = sub.position(size_t(0)); for (size_t i = 0; i < val.size(); i++) { par[pos+i] = val[i]; @@ -1569,9 +1897,10 @@ int Intervall::nextNotIgnored(int start) const return start; } -typedef map KeysMap; +typedef unordered_map KeysMap; +typedef unordered_map::const_iterator KeysIterator; typedef vector< KeyInfo> Entries; -static KeysMap keys = map(); +static KeysMap keys = unordered_map(); class LatexInfo { private: @@ -1591,7 +1920,7 @@ class LatexInfo { buildKeys(isPatternString); entries_ = vector(); buildEntries(isPatternString); - }; + } int getFirstKey() { entidx_ = 0; if (entries_.empty()) { @@ -1612,7 +1941,7 @@ class LatexInfo { return -1; } return 0; - }; + } int getNextKey() { entidx_++; if (int(entries_.size()) > entidx_) { @@ -1621,7 +1950,7 @@ class LatexInfo { else { return -1; } - }; + } bool setNextKey(int idx) { if ((idx == entidx_) && (entidx_ >= 0)) { entidx_--; @@ -1629,7 +1958,7 @@ class LatexInfo { } else return false; - }; + } int find(int start, KeyInfo::KeyType keytype) const { if (start < 0) return -1; @@ -1640,20 +1969,20 @@ class LatexInfo { tmpIdx++; } return -1; - }; + } int process(ostringstream & os, KeyInfo const & actual); int dispatch(ostringstream & os, int previousStart, KeyInfo & actual); - // string show(int lastpos) { return interval.show(lastpos);}; - int nextNotIgnored(int start) { return interval_.nextNotIgnored(start);}; + // string show(int lastpos) { return interval.show(lastpos);} + int nextNotIgnored(int start) { return interval_.nextNotIgnored(start);} KeyInfo &getKeyInfo(int keyinfo) { static KeyInfo invalidInfo = KeyInfo(); if ((keyinfo < 0) || ( keyinfo >= int(entries_.size()))) return invalidInfo; else return entries_[keyinfo]; - }; - void setForDefaultLang(KeyInfo const & defLang) {interval_.setForDefaultLang(defLang);}; - void addIntervall(int low, int up) { interval_.addIntervall(low, up); }; + } + void setForDefaultLang(KeyInfo const & defLang) {interval_.setForDefaultLang(defLang);} + void addIntervall(int low, int up) { interval_.addIntervall(low, up); } }; @@ -1686,7 +2015,9 @@ class MathInfo { public: string wait; size_t mathEnd; + size_t mathpostfixsize; size_t mathStart; + size_t mathprefixsize; size_t mathSize; }; size_t actualIdx_; @@ -1695,15 +2026,17 @@ class MathInfo { MathInfo() { actualIdx_ = 0; } - void insert(string const & wait, size_t start, size_t end) { + void insert(string const & wait, size_t start, size_t prefixsize, size_t end, size_t postfixsize) { MathEntry m = MathEntry(); m.wait = wait; m.mathStart = start; - m.mathEnd = end; - m.mathSize = end - start; + m.mathprefixsize = prefixsize; + m.mathEnd = end + postfixsize; + m.mathpostfixsize = postfixsize; + m.mathSize = m.mathEnd - m.mathStart; entries_.push_back(m); } - bool empty() const { return entries_.empty(); }; + bool empty() const { return entries_.empty(); } size_t getEndPos() const { if (entries_.empty() || (actualIdx_ >= entries_.size())) { return 0; @@ -1716,6 +2049,18 @@ class MathInfo { } return entries_[actualIdx_].mathStart; } + size_t getPrefixSize() const { + if (entries_.empty() || (actualIdx_ >= entries_.size())) { + return 0; + } + return entries_[actualIdx_].mathprefixsize; + } + size_t getPostfixSize() const { + if (entries_.empty() || (actualIdx_ >= entries_.size())) { + return 0; + } + return entries_[actualIdx_].mathpostfixsize; + } size_t getFirstPos() { actualIdx_ = 0; return getStartPos(); @@ -1726,13 +2071,13 @@ class MathInfo { } return entries_[actualIdx_].mathSize; } - void incrEntry() { actualIdx_++; }; + void incrEntry() { actualIdx_++; } }; void LatexInfo::buildEntries(bool isPatternString) { - static regex const rmath("\\$|\\\\\\[|\\\\\\]|\\\\(begin|end)\\{((eqnarray|equation|flalign|gather|multline|align|alignat)\\*?)\\}"); - static regex const rkeys("\\$|\\\\\\[|\\\\\\]|\\\\((([a-zA-Z]+\\*?)(\\{([a-z]+\\*?)\\}|=[0-9]+[a-z]+)?))"); + static regex const rmath("(\\\\)*(\\$|\\\\\\[|\\\\\\]|\\\\(begin|end)\\{((eqnarray|equation|flalign|gather|multline|align|x?x?alignat)\\*?\\})(\\{[0-9]+\\})?)"); + static regex const rkeys("(\\\\)*(\\$|\\\\\\[|\\\\\\]|\\\\((([a-zA-Z]+\\*?)(\\{([a-z]+\\*?)\\}|=[0-9]+[a-z]+)?)))"); static bool disableLanguageOverride = false; smatch sub, submath; bool evaluatingRegexp = false; @@ -1746,52 +2091,56 @@ void LatexInfo::buildEntries(bool isPatternString) KeyInfo found; bool math_end_waiting = false; size_t math_pos = 10000; + size_t math_prefix_size = 1; string math_end; static vector usedText = vector(); + static bool removeMathHull = false; interval_.removeAccents(); for (sregex_iterator itmath(interval_.par.begin(), interval_.par.end(), rmath), end; itmath != end; ++itmath) { submath = *itmath; + if ((submath.position(2) - submath.position(0)) %2 == 1) { + // prefixed by odd count of '\\' + continue; + } if (math_end_waiting) { - size_t pos = submath.position(size_t(0)); + size_t pos = submath.position(size_t(2)); if ((math_end == "$") && - (submath.str(0) == "$") && - (interval_.par[pos-1] != '\\')) { - mi.insert("$", math_pos, pos + 1); + (submath.str(2) == "$")) { + mi.insert("$", math_pos, 1, pos, 1); math_end_waiting = false; } else if ((math_end == "\\]") && - (submath.str(0) == "\\]")) { - mi.insert("\\]", math_pos, pos + 2); + (submath.str(2) == "\\]")) { + mi.insert("\\]", math_pos, 2, pos, 2); math_end_waiting = false; } - else if ((submath.str(1).compare("end") == 0) && - (submath.str(2).compare(math_end) == 0)) { - mi.insert(math_end, math_pos, pos + submath.str(0).length()); + else if ((submath.str(3).compare("end") == 0) && + (submath.str(5).compare(math_end) == 0)) { + mi.insert(math_end, math_pos, math_prefix_size, pos, submath.str(2).length()); math_end_waiting = false; } else continue; } else { - if (submath.str(1).compare("begin") == 0) { + if (submath.str(3).compare("begin") == 0) { math_end_waiting = true; - math_end = submath.str(2); - math_pos = submath.position(size_t(0)); + math_end = submath.str(5); + math_pos = submath.position(size_t(2)); + math_prefix_size = submath.str(2).length(); } - else if (submath.str(0).compare("\\[") == 0) { + else if (submath.str(2).compare("\\[") == 0) { math_end_waiting = true; math_end = "\\]"; - math_pos = submath.position(size_t(0)); + math_pos = submath.position(size_t(2)); } - else if (submath.str(0) == "$") { - size_t pos = submath.position(size_t(0)); - if ((pos == 0) || (interval_.par[pos-1] != '\\')) { - math_end_waiting = true; - math_end = "$"; - math_pos = pos; - } + else if (submath.str(2) == "$") { + size_t pos = submath.position(size_t(2)); + math_end_waiting = true; + math_end = "$"; + math_pos = pos; } } } @@ -1806,9 +2155,12 @@ void LatexInfo::buildEntries(bool isPatternString) // Disable language keys["foreignlanguage"].disabled = true; disableLanguageOverride = true; + removeMathHull = false; } - else + else { + removeMathHull = true; // used later if not isPatternString disableLanguageOverride = false; + } } else { if (disableLanguageOverride) { @@ -1818,27 +2170,25 @@ void LatexInfo::buildEntries(bool isPatternString) math_pos = mi.getFirstPos(); for (sregex_iterator it(interval_.par.begin(), interval_.par.end(), rkeys), end; it != end; ++it) { sub = *it; - string key = sub.str(3); + if ((sub.position(2) - sub.position(0)) %2 == 1) { + // prefixed by odd count of '\\' + continue; + } + string key = sub.str(5); if (key == "") { - if (sub.str(0)[0] == '\\') - key = sub.str(0)[1]; + if (sub.str(2)[0] == '\\') + key = sub.str(2)[1]; else { - key = sub.str(0); - if (key == "$") { - size_t k_pos = sub.position(size_t(0)); - if ((k_pos > 0) && (interval_.par[k_pos - 1] == '\\')) { - // Escaped '$', ignoring - continue; - } - } + key = sub.str(2); } - }; - if (keys.find(key) != keys.end()) { - if (keys[key].keytype == KeyInfo::headRemove) { - KeyInfo found1 = keys[key]; + } + KeysIterator it_key = keys.find(key); + if (it_key != keys.end()) { + if (it_key->second.keytype == KeyInfo::headRemove) { + KeyInfo found1 = it_key->second; found1.disabled = true; found1.head = "\\" + key + "{"; - found1._tokenstart = sub.position(size_t(0)); + found1._tokenstart = sub.position(size_t(2)); found1._tokensize = found1.head.length(); found1._dataStart = found1._tokenstart + found1.head.length(); int endpos = interval_.findclosing(found1._dataStart, interval_.par.length(), '{', '}', 1); @@ -1848,10 +2198,10 @@ void LatexInfo::buildEntries(bool isPatternString) } } if (evaluatingRegexp) { - if (sub.str(1).compare("endregexp") == 0) { + if (sub.str(3).compare("endregexp") == 0) { evaluatingRegexp = false; // found._tokenstart already set - found._dataEnd = sub.position(size_t(0)) + 13; + found._dataEnd = sub.position(size_t(2)) + 13; found._dataStart = found._dataEnd; found._tokensize = found._dataEnd - found._tokenstart; found.parenthesiscount = 0; @@ -1863,13 +2213,13 @@ void LatexInfo::buildEntries(bool isPatternString) } else { if (evaluatingMath) { - if (size_t(sub.position(size_t(0))) < mi.getEndPos()) + if (size_t(sub.position(size_t(2))) < mi.getEndPos()) continue; evaluatingMath = false; mi.incrEntry(); math_pos = mi.getStartPos(); } - if (keys.find(key) == keys.end()) { + if (it_key == keys.end()) { found = KeyInfo(KeyInfo::isStandard, 0, true); LYXERR(Debug::INFO, "Undefined key " << key << " ==> will be used as text"); found = KeyInfo(KeyInfo::isText, 0, false); @@ -1885,7 +2235,7 @@ void LatexInfo::buildEntries(bool isPatternString) found = keys[key]; if (key.compare("regexp") == 0) { evaluatingRegexp = true; - found._tokenstart = sub.position(size_t(0)); + found._tokenstart = sub.position(size_t(2)); found._tokensize = 0; continue; } @@ -1894,14 +2244,25 @@ void LatexInfo::buildEntries(bool isPatternString) if (found.keytype == KeyInfo::isIgnored) continue; else if (found.keytype == KeyInfo::isMath) { - if (size_t(sub.position(size_t(0))) == math_pos) { + if (size_t(sub.position(size_t(2))) == math_pos) { found = keys[key]; - found._tokenstart = sub.position(size_t(0)); + found._tokenstart = sub.position(size_t(2)); found._tokensize = mi.getSize(); found._dataEnd = found._tokenstart + found._tokensize; found._dataStart = found._dataEnd; found.parenthesiscount = 0; found.head = interval_.par.substr(found._tokenstart, found._tokensize); + if (removeMathHull) { + interval_.addIntervall(found._tokenstart, found._tokenstart + mi.getPrefixSize()); + interval_.addIntervall(found._dataEnd - mi.getPostfixSize(), found._dataEnd); + } + else { + // Treate all math constructs as simple math + interval_.par[found._tokenstart] = '$'; + interval_.par[found._dataEnd - mi.getPostfixSize()] = '$'; + interval_.addIntervall(found._tokenstart + 1, found._tokenstart + mi.getPrefixSize()); + interval_.addIntervall(found._dataEnd - mi.getPostfixSize() + 1, found._dataEnd); + } evaluatingMath = true; } else { @@ -1911,21 +2272,21 @@ void LatexInfo::buildEntries(bool isPatternString) bool discardComment; found = keys[key]; found.keytype = KeyInfo::doRemove; - if ((sub.str(5).compare("longtable") == 0) || - (sub.str(5).compare("tabular") == 0)) { + if ((sub.str(7).compare("longtable") == 0) || + (sub.str(7).compare("tabular") == 0)) { discardComment = true; /* '%' */ } else { discardComment = false; static regex const removeArgs("^(multicols|multipar|sectionbox|subsectionbox|tcolorbox)$"); smatch sub2; - string token = sub.str(5); + string token = sub.str(7); if (regex_match(token, sub2, removeArgs)) { found.keytype = KeyInfo::removeWithArg; } } - // discard spaces before pos(0) - int pos = sub.position(size_t(0)); + // discard spaces before pos(2) + int pos = sub.position(size_t(2)); int count; for (count = 0; pos - count > 0; count++) { char c = interval_.par[pos-count-1]; @@ -1937,9 +2298,9 @@ void LatexInfo::buildEntries(bool isPatternString) break; } found._tokenstart = pos - count; - if (sub.str(1).compare(0, 5, "begin") == 0) { - size_t pos1 = pos + sub.str(0).length(); - if (sub.str(5).compare("cjk") == 0) { + if (sub.str(3).compare(0, 5, "begin") == 0) { + size_t pos1 = pos + sub.str(2).length(); + if (sub.str(7).compare("cjk") == 0) { pos1 = interval_.findclosing(pos1+1, interval_.par.length()) + 1; if ((interval_.par[pos1] == '{') && (interval_.par[pos1+1] == '}')) pos1 += 2; @@ -1972,7 +2333,7 @@ void LatexInfo::buildEntries(bool isPatternString) } else { // Handle "\end{...}" - found._dataStart = pos + sub.str(0).length(); + found._dataStart = pos + sub.str(2).length(); found._dataEnd = found._dataStart; found._tokensize = count + found._dataEnd - pos; found.parenthesiscount = 0; @@ -1982,16 +2343,16 @@ void LatexInfo::buildEntries(bool isPatternString) } } else if (found.keytype != KeyInfo::isRegex) { - found._tokenstart = sub.position(size_t(0)); + found._tokenstart = sub.position(size_t(2)); if (found.parenthesiscount == 0) { // Probably to be discarded - size_t following_pos = sub.position(size_t(0)) + sub.str(3).length() + 1; + size_t following_pos = sub.position(size_t(2)) + sub.str(5).length() + 1; char following = interval_.par[following_pos]; if (following == ' ') - found.head = "\\" + sub.str(3) + " "; + found.head = "\\" + sub.str(5) + " "; else if (following == '=') { // like \uldepth=1000pt - found.head = sub.str(0); + found.head = sub.str(2); } else found.head = "\\" + key; @@ -2018,12 +2379,17 @@ void LatexInfo::buildEntries(bool isPatternString) key += interval_.par.substr(params, optend-params); evaluatingOptional = true; optionalEnd = optend; + if (found.keytype == KeyInfo::isSectioning) { + // Remove optional values (but still keep in header) + interval_.addIntervall(params, optend); + } } - string token = sub.str(5); + string token = sub.str(7); int closings; if (interval_.par[optend] != '{') { closings = 0; found.parenthesiscount = 0; + found.head = "\\" + key; } else closings = found.parenthesiscount; @@ -2032,7 +2398,7 @@ void LatexInfo::buildEntries(bool isPatternString) } else if (found.parenthesiscount > 1) { if (token != "") { - found.head = sub.str(0) + "{"; + found.head = sub.str(2) + "{"; closings = found.parenthesiscount - 1; } else { @@ -2042,12 +2408,20 @@ void LatexInfo::buildEntries(bool isPatternString) found._tokensize = found.head.length(); found._dataStart = found._tokenstart + found.head.length(); if (found.keytype == KeyInfo::doRemove) { - int endpar = 2 + interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings); - found._dataStart = endpar; - found._tokensize = found._dataStart - found._tokenstart; + if (closings > 0) { + size_t endpar = 2 + interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings); + if (endpar >= interval_.par.length()) + found._dataStart = interval_.par.length(); + else + found._dataStart = endpar; + found._tokensize = found._dataStart - found._tokenstart; + } + else { + found._dataStart = found._tokenstart + found._tokensize; + } closings = 0; } - if (interval_.par.substr(found._dataStart-1, 15).compare("\\endarguments{}") == 0) { + if (interval_.par.substr(found._dataStart, 15).compare("\\endarguments{}") == 0) { found._dataStart += 15; } size_t endpos; @@ -2128,7 +2502,7 @@ void LatexInfo::buildKeys(bool isPatternString) if (keysBuilt && !isPatternString) return; // Keys to ignore in any case - makeKey("text|textcyrillic|lyxmathsym", KeyInfo(KeyInfo::headRemove, 1, true), true); + makeKey("text|textcyrillic|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true); // Known standard keys with 1 parameter. // Split is done, if not at start of region makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString); @@ -2160,7 +2534,6 @@ void LatexInfo::buildKeys(bool isPatternString) makeKey("textasciicircum|textasciitilde", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); makeKey("textasciiacute|texemdash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); makeKey("dots|ldots", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); - makeKey("guillemotright|guillemotleft", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); // Spaces makeKey("quad|qquad|hfill|dotfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); makeKey("textvisiblespace|nobreakspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); @@ -2420,8 +2793,11 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) } case KeyInfo::isSize: { if (actual.disabled || (interval_.par[actual._dataStart] != '{') || (interval_.par[actual._dataStart-1] == ' ')) { - processRegion(actual._dataEnd, actual._dataEnd+1); /* remove possibly following {} */ - interval_.addIntervall(actual._tokenstart, actual._dataEnd+1); + if (actual.parenthesiscount == 0) + interval_.addIntervall(actual._tokenstart, actual._dataEnd); + else { + interval_.addIntervall(actual._tokenstart, actual._dataEnd+1); + } nextKeyIdx = getNextKey(); } else { // Here _dataStart points to '{', so correct it @@ -2491,7 +2867,12 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) case KeyInfo::doRemove: { // Remove the key with all parameters and following spaces size_t pos; - for (pos = actual._dataEnd+1; pos < interval_.par.length(); pos++) { + size_t start; + if (interval_.par[actual._dataEnd-1] == ' ') + start = actual._dataEnd; + else + start = actual._dataEnd+1; + for (pos = start; pos < interval_.par.length(); pos++) { if ((interval_.par[pos] != ' ') && (interval_.par[pos] != '%')) break; } @@ -2709,7 +3090,10 @@ int LatexInfo::process(ostringstream & os, KeyInfo const & actual ) } if (dstart < output_end) interval_.output(os, output_end); - interval_.addIntervall(actual._tokenstart, end); + if (nextKeyIdx < 0) + interval_.addIntervall(0, end); + else + interval_.addIntervall(actual._tokenstart, end); return nextKeyIdx; } @@ -2793,7 +3177,7 @@ string splitOnKnownMacros(string par, bool isPatternString) * Resulting modified string is set to "", if * the searched tex does not contain all the features in the search pattern */ -static string correctlanguagesetting(string par, bool isPatternString, bool withformat) +static string correctlanguagesetting(string par, bool isPatternString, bool withformat, lyx::Buffer *pbuf = nullptr) { static Features regex_f; static int missed = 0; @@ -2813,8 +3197,24 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with // Split the latex input into pieces which // can be digested by our search engine LYXERR(Debug::FIND, "input: \"" << par << "\""); + if (isPatternString && (pbuf != nullptr)) { // Check if we should disable/enable test for language + // We check for polyglossia, because in runparams.flavor we use Flavor::XeTeX + string doclang = pbuf->params().language->polyglossia(); + static regex langre("\\\\(foreignlanguage)\\{([^\\}]+)\\}"); + smatch sub; + bool toIgnoreLang = true; + for (sregex_iterator it(par.begin(), par.end(), langre), end; it != end; ++it) { + sub = *it; + if (sub.str(2) != doclang) { + toIgnoreLang = false; + break; + } + } + setIgnoreFormat("language", toIgnoreLang, false); + + } result = splitOnKnownMacros(par.substr(0,parlen), isPatternString); - LYXERR(Debug::FIND, "After split: \"" << result << "\""); + LYXERR(Debug::FIND, "After splitOnKnownMacros:\n\"" << result << "\""); } else result = par.substr(0, parlen); @@ -2843,6 +3243,7 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with return ""; } } + } else { // LYXERR(Debug::INFO, "No regex formats"); @@ -2875,7 +3276,91 @@ static int identifyClosing(string & t) static int num_replaced = 0; static bool previous_single_replace = true; -MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt) +void MatchStringAdv::CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string) +{ +#if QTSEARCH + // Handle \w properly + QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption; + if (! opt.casesensitive) { + popts |= QRegularExpression::CaseInsensitiveOption; + } + regexp = QRegularExpression(QString::fromStdString(regexp_str), popts); + regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts); + regexError = ""; + if (regexp.isValid() && regexp2.isValid()) { + regexIsValid = true; + // Check '{', '}' pairs inside the regex + int balanced = 0; + int skip = 1; + for (unsigned i = 0; i < par_as_string.size(); i+= skip) { + char c = par_as_string[i]; + if (c == '\\') { + skip = 2; + continue; + } + if (c == '{') + balanced++; + else if (c == '}') { + balanced--; + if (balanced < 0) + break; + } + skip = 1; + } + if (balanced != 0) { + regexIsValid = false; + regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\""; + } + } + else { + regexIsValid = false; + if (!regexp.isValid()) + regexError += "Invalid regexp \"" + regexp_str + "\", error = " + regexp.errorString().toStdString(); + else + regexError += "Invalid regexp2 \"" + regexp2_str + "\", error = " + regexp2.errorString().toStdString(); + } +#else + (void)par_as_string; + if (opt.casesensitive) { + regexp = regex(regexp_str); + regexp2 = regex(regexp2_str); + } + else { + regexp = regex(regexp_str, std::regex_constants::icase); + regexp2 = regex(regexp2_str, std::regex_constants::icase); + } +#endif +} + +static void modifyRegexForMatchWord(string &t) +{ + string s(""); + regex wordre("(\\\\)*((\\.|\\\\b))"); + size_t lastpos = 0; + smatch sub; + for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) { + sub = *it; + if ((sub.position(2) - sub.position(0)) % 2 == 1) { + continue; + } + else if (sub.str(2) == "\\\\b") + return; + if (lastpos < (size_t) sub.position(2)) + s += t.substr(lastpos, sub.position(2) - lastpos); + s += "\\S"; + lastpos = sub.position(2) + sub.length(2); + } + if (lastpos == 0) { + s = "\\b" + t + "\\b"; + t = s; + return; + } + else if (lastpos < t.length()) + s += t.substr(lastpos, t.length() - lastpos); + t = "\\b" + s + "\\b"; +} + +MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) : p_buf(&buf), p_first_buf(&buf), opt(opt) { Buffer & find_buf = *theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true); @@ -2890,62 +3375,67 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & previous_single_replace = true; } // When using regexp, braces are hacked already by escape_for_regex() - par_as_string = normalize(ds, !use_regexp); + par_as_string = normalize(ds); open_braces = 0; close_wildcards = 0; size_t lead_size = 0; // correct the language settings - par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat); - if (opt.ignoreformat) { - if (!use_regexp) { - // if par_as_string_nolead were emty, - // the following call to findAux will always *find* the string - // in the checked data, and thus always using the slow - // examining of the current text part. - par_as_string_nolead = par_as_string; + par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat, &buf); + opt.matchAtStart = false; + if (!use_regexp) { + identifyClosing(par_as_string); // Removes math closings ($, ], ...) at end of string + if (opt.ignoreformat) { + lead_size = 0; } - } else { + else { + lead_size = identifyLeading(par_as_string); + } + lead_as_string = par_as_string.substr(0, lead_size); + string lead_as_regex_string = string2regex(lead_as_string); + par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size); + string par_as_regex_string_nolead = string2regex(par_as_string_nolead); + /* Handle whole words too in this case + */ + if (opt.matchword) { + par_as_regex_string_nolead = "\\b" + par_as_regex_string_nolead + "\\b"; + opt.matchword = false; + } + string regexp_str = "(" + lead_as_regex_string + ")()" + par_as_regex_string_nolead; + string regexp2_str = "(" + lead_as_regex_string + ")(.*?)" + par_as_regex_string_nolead; + CreateRegexp(opt, regexp_str, regexp2_str); + use_regexp = true; + LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); + LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); + return; + } + + if (!opt.ignoreformat) { lead_size = identifyLeading(par_as_string); LYXERR(Debug::FIND, "Lead_size: " << lead_size); lead_as_string = par_as_string.substr(0, lead_size); par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size); } - if (!use_regexp) { - open_braces = identifyClosing(par_as_string); - identifyClosing(par_as_string_nolead); - LYXERR(Debug::FIND, "Open braces: " << open_braces); - LYXERR(Debug::FIND, "Built MatchStringAdv object: par_as_string = '" << par_as_string << "'"); - } else { + // Here we are using regexp + LASSERT(use_regexp, /**/); + { string lead_as_regexp; if (lead_size > 0) { - // @todo No need to search for \regexp{} insets in leading material - lead_as_regexp = escape_for_regex(par_as_string.substr(0, lead_size), !opt.ignoreformat); + lead_as_regexp = string2regex(par_as_string.substr(0, lead_size)); + (void)regex_replace(par_as_string_nolead, par_as_string_nolead, "}$", ""); par_as_string = par_as_string_nolead; LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'"); LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); } - LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'"); + // LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'"); par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat); // Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them. - LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); - if ( - // Insert .* before trailing '\$' ('$' has been escaped by escape_for_regex) - regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\$)\\'", "$1(.*?)$2") - // Insert .* before trailing '\\\]' ('\]' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])( \\\\\\\\\\\\\\])\\'", "$1(.*?)$2") - // Insert .* before trailing '\\end\{...}' ('\end{...}' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, - "(.*[^\\\\])( \\\\\\\\end\\\\\\{[a-zA-Z_]*)(\\\\\\*)?(\\\\\\})\\'", "$1(.*?)$2$3$4") - // Insert .* before trailing '\}' ('}' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\})\\'", "$1(.*?)$2") - ) { - ++close_wildcards; - } + // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); + ++close_wildcards; + size_t lng = par_as_string.size(); if (!opt.ignoreformat) { // Remove extra '\}' at end if not part of \{\.\} - size_t lng = par_as_string.size(); while(lng > 2) { if (par_as_string.substr(lng-2, 2).compare("\\}") == 0) { if (lng >= 6) { @@ -2960,23 +3450,16 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & } if (lng < par_as_string.size()) par_as_string = par_as_string.substr(0,lng); - /* - // save '\.' - regex_replace(par_as_string, par_as_string, "\\\\\\.", "_xxbdotxx_"); - // handle '.' -> '[^]', replace later as '[^\}\{\\]' - regex_replace(par_as_string, par_as_string, "\\.", "[^]"); - // replace '[^...]' with '[^...\}\{\\]' - regex_replace(par_as_string, par_as_string, "\\[\\^([^\\\\\\]]*)\\]", "_xxbrlxx_$1\\}\\{\\\\_xxbrrxx_"); - regex_replace(par_as_string, par_as_string, "_xxbrlxx_", "[^"); - regex_replace(par_as_string, par_as_string, "_xxbrrxx_", "]"); - // restore '\.' - regex_replace(par_as_string, par_as_string, "_xxbdotxx_", "\\."); - */ } - LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); - LYXERR(Debug::FIND, "Open braces: " << open_braces); - LYXERR(Debug::FIND, "Close .*? : " << close_wildcards); - LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string); + LYXERR(Debug::FIND, "par_as_string after correctRegex is '" << par_as_string << "'"); + if ((lng > 0) && (par_as_string[0] == '^')) { + par_as_string = par_as_string.substr(1); + --lng; + opt.matchAtStart = true; + } + // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); + // LYXERR(Debug::FIND, "Open braces: " << open_braces); + // LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string); // If entered regexp must match at begin of searched string buffer // Kornel: Added parentheses to use $1 for size of the leading string @@ -2991,151 +3474,50 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & string dest = "\\" + std::to_string(i+2); while (regex_replace(par_as_string, par_as_string, orig, dest)); } + if (opt.matchword) { + modifyRegexForMatchWord(par_as_string); + opt.matchword = false; + } regexp_str = "(" + lead_as_regexp + ")()" + par_as_string; regexp2_str = "(" + lead_as_regexp + ")(.*?)" + par_as_string; } LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); -#if QTSEARCH - // Handle \w properly - QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption; - if (! opt.casesensitive) { - popts |= QRegularExpression::CaseInsensitiveOption; - } - regexp = QRegularExpression(QString::fromStdString(regexp_str), popts); - regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts); - regexError = ""; - if (regexp.isValid() && regexp2.isValid()) { - regexIsValid = true; - // Check '{', '}' pairs inside the regex - int balanced = 0; - int skip = 1; - for (unsigned i = 0; i < par_as_string.size(); i+= skip) { - char c = par_as_string[i]; - if (c == '\\') { - skip = 2; - continue; - } - if (c == '{') - balanced++; - else if (c == '}') { - balanced--; - if (balanced < 0) - break; - } - skip = 1; - } - if (balanced != 0) { - regexIsValid = false; - regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\""; - } - } - else { - regexIsValid = false; - if (!regexp.isValid()) - regexError += "Invalid regexp \"" + regexp_str + "\", error = " + regexp.errorString().toStdString(); - if (!regexp2.isValid()) - regexError += "Invalid regexp2 \"" + regexp2_str + "\", error = " + regexp2.errorString().toStdString(); - } -#else - if (opt.casesensitive) { - regexp = regex(regexp_str); - regexp2 = regex(regexp2_str); - } - else { - regexp = regex(regexp_str, std::regex_constants::icase); - regexp2 = regex(regexp2_str, std::regex_constants::icase); - } -#endif + CreateRegexp(opt, regexp_str, regexp2_str, par_as_string); } } - -// Count number of characters in string -// {]} ==> 1 -// \& ==> 1 -// --- ==> 1 -// \\[a-zA-Z]+ ==> 1 -#if QTSEARCH -static int computeSize(QStringRef s, int len) -#define isLyxAlpha(arg) arg.isLetter() -#else -static int computeSize(string s, int len) -#define isLyxAlpha(arg) isalpha(arg) -#endif -{ - if (len == 0) - return 0; - int skip = 1; - int count = 0; - for (int i = 0; i < len; i += skip, count++) { - if (s.at(i) == '\\') { - skip = 2; - if (i + 1 < len && isLyxAlpha(s.at(i+1))) { - for (int j = 2; i+j < len; j++) { - if (! isLyxAlpha(s.at(i+j))) { - if (s.at(i+j) == ' ') - skip++; - else if (s.at(i+j) == '{') { - if (i+j+1 < len && s.at(i+j+1) == '}') - skip += 2; - else if (i + j + 1 >= len) - skip++; - } - break; - } - skip++; - } - } - } - else if (s.at(i) == '{') { - if (i + 1 < len && s.at(i+1) == '}') - skip = 2; - else - skip = 3; - } - else if (s.at(i) == '-') { - if (i+1 < len && s.at(i+1) == '-') { - if (i + 2 < len && s.at(i+2) == '-') - skip = 3; - else - skip = 2; - } - else - skip = 1; - } - else { - skip = 1; - } - } - return count; -} - MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const { MatchResult mres; + mres.searched_size = len; if (at_begin && (opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) ) return mres; docstring docstr = stringifyFromForSearch(opt, cur, len); string str; - if (use_regexp || opt.casesensitive) - str = normalize(docstr, true); - else - str = normalize(lowercase(docstr), true); + str = normalize(docstr); if (!opt.ignoreformat) { str = correctlanguagesetting(str, false, !opt.ignoreformat); + // remove closing '}' and '\n' to allow for use of '$' in regex + size_t lng = str.size(); + while ((lng > 1) && ((str[lng -1] == '}') || (str[lng -1] == '\n'))) + lng--; + if (lng != str.size()) { + str = str.substr(0, lng); + } } if (str.empty()) { mres.match_len = -1; return mres; } - LYXERR(Debug::FIND, "Matching against '" << lyx::to_utf8(docstr) << "'"); - LYXERR(Debug::FIND, "After normalization: '" << str << "'"); + LYXERR(Debug::FIND, "After normalization: Matching against:\n'" << str << "'"); - if (use_regexp) { + LASSERT(use_regexp, /**/); + { + // use_regexp always true LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin); #if QTSEARCH QString qstr = QString::fromStdString(str); @@ -3149,11 +3531,6 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be QRegularExpressionMatch match = p_regexp->match(qstr, 0, flags); if (!match.hasMatch()) return mres; - // Check braces on segments that matched all (.*?) subexpressions, - // except the last "padding" one inserted by lyx. - for (int i = 3; i < match.lastCapturedIndex(); ++i) - if (!braces_match(match.captured(i), open_braces)) - return mres; #else regex const *p_regexp; regex_constants::match_flag_type flags; @@ -3168,95 +3545,106 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be if (re_it == sregex_iterator()) return mres; match_results const & m = *re_it; - // Check braces on segments that matched all (.*?) subexpressions, - // except the last "padding" one inserted by lyx. - for (size_t i = 3; i < m.size() - 1; ++i) - if (!braces_match(m[i], open_braces)) - return mres; #endif - // Exclude from the returned match length any length - // due to close wildcards added at end of regexp - // and also the length of the leading (e.g. '\emph{}') + // Whole found string, including the leading + // std: m[0].second - m[0].first + // Qt: match.capturedEnd(0) - match.capturedStart(0) // - // Whole found string, including the leading: m[0].second - m[0].first - // Size of the leading string: m[1].second - m[1].first + // Size of the leading string + // std: m[1].second - m[1].first + // Qt: match.capturedEnd(1) - match.capturedStart(1) int leadingsize = 0; - int result; #if QTSEARCH if (match.lastCapturedIndex() > 0) { leadingsize = match.capturedEnd(1) - match.capturedStart(1); } - int lastidx = match.lastCapturedIndex(); - for (int i = 0; i <= lastidx; i++) { - LYXERR(Debug::FIND, "Match " << i << " is " << match.capturedEnd(i) - match.capturedStart(i) << " long"); - } - if (close_wildcards == 0) - result = match.capturedEnd(0) - match.capturedStart(0); - else - result = match.capturedStart(lastidx + 1 - close_wildcards) - match.capturedStart(0); #else if (m.size() > 2) { leadingsize = m[1].second - m[1].first; } - for (size_t i = 0; i < m.size(); i++) { - LYXERR(Debug::FIND, "Match " << i << " is " << m[i].second - m[i].first << " long"); - } - if (close_wildcards == 0) - result = m[0].second - m[0].first; - else - result = m[m.size() - close_wildcards].first - m[0].first; #endif - if (result > leadingsize) - result -= leadingsize; - else - result = 0; #if QTSEARCH mres.match_prefix = match.capturedEnd(2) - match.capturedStart(2); - // mres.match_len = computeSize(QStringRef(&qstr, pos+leadingsize,result), result) - mres.match_prefix; mres.match_len = match.capturedEnd(0) - match.capturedEnd(2); - // mres.match2end = qstr.size() - pos - leadingsize - mres.match_prefix; - mres.match2end = qstr.size() - match.capturedEnd(0); + // because of different number of closing at end of string + // we have to 'unify' the length of the post-match. + // Done by ignoring closing parenthesis and linefeeds at string end + int matchend = match.capturedEnd(0); + size_t strsize = qstr.size(); + if (!opt.ignoreformat) { + while (mres.match_len > 0) { + QChar c = qstr.at(matchend - 1); + if ((c == '\n') || (c == '}') || (c == '{')) { + mres.match_len--; + matchend--; + } + else + break; + } + while (strsize > (size_t) match.capturedEnd(0)) { + QChar c = qstr.at(strsize-1); + if ((c == '\n') || (c == '}')) { + --strsize; + } + else + break; + } + } + // LYXERR0(qstr.toStdString()); + mres.match2end = strsize - matchend; mres.pos = match.capturedStart(2); #else mres.match_prefix = m[2].second - m[2].first; - // mres.match_len = computeSize(str.substr(pos+leadingsize,result), result) - mres.match_prefix; mres.match_len = m[0].second - m[2].second; - // mres.match2end = str.size() - pos - leadingsize - mres.match_prefix; - mres.match2end = str.size() - m[0].second; - mres.pos = m[2].first; + // ignore closing parenthesis and linefeeds at string end + size_t strend = m[0].second - m[0].first; + int matchend = strend; + size_t strsize = str.size(); + if (!opt.ignoreformat) { + while (mres.match_len > 0) { + char c = str.at(matchend - 1); + if ((c == '\n') || (c == '}') || (c == '{')) { + mres.match_len--; + matchend--; + } + else + break; + } + while (strsize > strend) { + if ((str.at(strsize-1) == '}') || (str.at(strsize-1) == '\n')) { + --strsize; + } + else + break; + } + } + // LYXERR0(str); + mres.match2end = strsize - matchend; + mres.pos = m[2].first - m[0].first;; #endif + if (mres.match2end < 0) + mres.match_len = 0; mres.leadsize = leadingsize; - return mres; - } - - // else !use_regexp: but all code paths above return - LYXERR(Debug::FIND, "Searching in normal mode: par_as_string='" - << par_as_string << "', str='" << str << "'"); - LYXERR(Debug::FIND, "Searching in normal mode: lead_as_string='" - << lead_as_string << "', par_as_string_nolead='" - << par_as_string_nolead << "'"); - - if (at_begin) { - LYXERR(Debug::FIND, "size=" << par_as_string.size() - << ", substr='" << str.substr(0, par_as_string.size()) << "'"); - if (str.substr(0, par_as_string.size()) == par_as_string) { - mres.match_len = par_as_string.size(); - mres.match2end = str.size(); - mres.pos = 0; - return mres; +#if QTSEARCH + if (mres.match_len > 0) { + string a0 = match.captured(0).mid(mres.pos + mres.match_prefix, mres.match_len).toStdString(); + mres.result.push_back(a0); + for (int i = 3; i <= match.lastCapturedIndex(); i++) { + mres.result.push_back(match.captured(i).toStdString()); + } } - } else { - // Start the search _after_ the leading part - size_t pos = str.find(par_as_string_nolead, lead_as_string.size()); - if (pos != string::npos) { - mres.match_len = par_as_string.size(); - mres.match2end = str.size() - pos; - mres.pos = pos; - return mres; +#else + if (mres.match_len > 0) { + string a0 = m[0].str().substr(mres.pos + mres.match_prefix, mres.match_len); + mres.result.push_back(a0); + for (size_t i = 3; i < m.size(); i++) { + mres.result.push_back(m[i]); + } } +#endif + return mres; } - return mres; } @@ -3266,47 +3654,45 @@ MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at int res = mres.match_len; LYXERR(Debug::FIND, "res=" << res << ", at_begin=" << at_begin - << ", matchword=" << opt.matchword + << ", matchAtStart=" << opt.matchAtStart << ", inTexted=" << cur.inTexted()); - if (res == 0 || !at_begin || !opt.matchword || !cur.inTexted()) + if (opt.matchAtStart) { + if (cur.pos() != 0) + mres.match_len = 0; + else if (mres.match_prefix > 0) + mres.match_len = 0; return mres; - if ((len > 0) && (res < len)) { - mres.match_len = 0; - return mres; - } - Paragraph const & par = cur.paragraph(); - bool ws_left = (cur.pos() > 0) - ? par.isWordSeparator(cur.pos() - 1) - : true; - bool ws_right; - if (len < 0) - ws_right = true; - else { - ws_right = (cur.pos() + len < par.size()) - ? par.isWordSeparator(cur.pos() + len) - : true; } - LYXERR(Debug::FIND, - "cur.pos()=" << cur.pos() << ", res=" << res - << ", separ: " << ws_left << ", " << ws_right - << ", len: " << len - << endl); - if (ws_left && ws_right) { - // Check for word separators inside the found 'word' - for (int i = 0; i < len; i++) { - if (par.isWordSeparator(cur.pos() + i)) { - mres.match_len = 0; - return mres; - } - } - return mres; - } - mres.match_len = 0; - return mres; + else + return mres; } +#if 0 +static bool simple_replace(string &t, string from, string to) +{ + regex repl("(\\\\)*(" + from + ")"); + string s(""); + size_t lastpos = 0; + smatch sub; + for (sregex_iterator it(t.begin(), t.end(), repl), end; it != end; ++it) { + sub = *it; + if ((sub.position(2) - sub.position(0)) % 2 == 1) + continue; + if (lastpos < (size_t) sub.position(2)) + s += t.substr(lastpos, sub.position(2) - lastpos); + s += to; + lastpos = sub.position(2) + sub.length(2); + } + if (lastpos == 0) + return false; + else if (lastpos < t.length()) + s += t.substr(lastpos, t.length() - lastpos); + t = s; + return true; +} +#endif -string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const +string MatchStringAdv::normalize(docstring const & s) const { string t; t = lyx::to_utf8(s); @@ -3340,23 +3726,12 @@ string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify // Kornel: Added textsl, textsf, textit, texttt and noun // + allow to seach for colored text too - LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, \\*section{} macros from: " << t); + LYXERR(Debug::FIND, "Removing stale empty macros from: " << t); while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", "")) LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", "")) LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", "")); - // FIXME - check what preceeds the brace - if (hack_braces) { - if (opt.ignoreformat) - while (regex_replace(t, t, "\\{", "_x_<") - || regex_replace(t, t, "\\}", "_x_>")) - LYXERR(Debug::FIND, "After {} replacement: '" << t << "'"); - else - while (regex_replace(t, t, "\\\\\\{", "_x_<") - || regex_replace(t, t, "\\\\\\}", "_x_>")) - LYXERR(Debug::FIND, "After {} replacement: '" << t << "'"); - } return t; } @@ -3378,11 +3753,18 @@ docstring stringifyFromCursor(DocIterator const & cur, int len) runparams.linelen = 10000; //lyxrc.plaintext_linelen; // No side effect of file copying and image conversion runparams.dryrun = true; - runparams.for_search = true; + int option = AS_STR_INSETS | AS_STR_PLAINTEXT; + if (ignoreFormats.getDeleted()) { + option |= AS_STR_SKIPDELETE; + runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + } + else { + runparams.for_searchAdv = OutputParams::SearchWithDeleted; + } LYXERR(Debug::FIND, "Stringifying with cur: " << cur << ", from pos: " << cur.pos() << ", end: " << end); return par.asString(cur.pos(), end, - AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, + option, &runparams); } else if (cur.inMathed()) { CursorSlice cs = cur.top(); @@ -3410,9 +3792,11 @@ docstring stringifyFromCursor(DocIterator const & cur, int len) */ docstring latexifyFromCursor(DocIterator const & cur, int len) { + /* LYXERR(Debug::FIND, "Latexifying with len=" << len << " from cursor at pos: " << cur); LYXERR(Debug::FIND, " with cur.lastpost=" << cur.lastpos() << ", cur.lastrow=" << cur.lastrow() << ", cur.lastcol=" << cur.lastcol()); + */ Buffer const & buf = *cur.buffer(); odocstringstream ods; @@ -3424,7 +3808,12 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) runparams.linelen = 8000; //lyxrc.plaintext_linelen; // No side effect of file copying and image conversion runparams.dryrun = true; - runparams.for_search = true; + if (ignoreFormats.getDeleted()) { + runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + } + else { + runparams.for_searchAdv = OutputParams::SearchWithDeleted; + } if (cur.inTexted()) { // @TODO what about searching beyond/across paragraph breaks ? @@ -3477,78 +3866,145 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) return ods.str(); } +#if defined(ResultsDebug) +// Debugging output +static void displayMResult(MatchResult &mres, string from, DocIterator & cur) +{ + LYXERR0( "from:\t\t\t" << from); + string status; + if (mres.pos_len > 0) { + // Set in finalize + status = "FINALSEARCH"; + } + else { + if (mres.match_len > 0) { + if ((mres.match_prefix == 0) && (mres.pos == mres.leadsize)) + status = "Good Match"; + else + status = "Matched in"; + } + else + status = "MissedSearch"; + } + + LYXERR0( status << "(" << cur.pos() << " ... " << mres.searched_size + cur.pos() << ") cur.lastpos(" << cur.lastpos() << ")"); + if ((mres.leadsize > 0) || (mres.match_len > 0) || (mres.match2end > 0)) + LYXERR0( "leadsize(" << mres.leadsize << ") match_len(" << mres.match_len << ") match2end(" << mres.match2end << ")"); + if ((mres.pos > 0) || (mres.match_prefix > 0)) + LYXERR0( "pos(" << mres.pos << ") match_prefix(" << mres.match_prefix << ")"); + for (size_t i = 0; i < mres.result.size(); i++) + LYXERR0( "Match " << i << " = \"" << mres.result[i] << "\""); +} + #define displayMres(s, txt, cur) displayMResult(s, txt, cur); +#else + #define displayMres(s, txt, cur) +#endif /** Finalize an advanced find operation, advancing the cursor to the innermost ** position that matches, plus computing the length of the matching text to ** be selected + ** Return the cur.pos() difference between start and end of found match **/ -int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match) +MatchResult findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, MatchResult const & expected = MatchResult(-1)) { // Search the foremost position that matches (avoids find of entire math // inset when match at start of it) - size_t d; DocIterator old_cur(cur.buffer()); - do { - LYXERR(Debug::FIND, "Forwarding one step (searching for innermost match)"); - d = cur.depth(); + MatchResult mres; + static MatchResult fail = MatchResult(); + MatchResult max_match; + // If (prefix_len > 0) means that forwarding 1 position will remove the complete entry + // Happens with e.g. hyperlinks + // either one sees "http://www.bla.bla" or nothing + // so the search for "www" gives prefix_len = 7 (== sizeof("http://") + // and although we search for only 3 chars, we find the whole hyperlink inset + bool at_begin = (expected.match_prefix == 0); + if (!match.opt.forward && match.opt.ignoreformat) { + if (expected.pos > 0) + return fail; + } + LASSERT(at_begin, /**/); + if (expected.match_len > 0 && at_begin) { + // Search for deepest match old_cur = cur; - cur.forwardPos(); - } while (cur && cur.depth() > d && match(cur).match_len > 0); - cur = old_cur; - int max_match = match(cur).match_len; /* match valid only if not searching whole words */ - if (max_match <= 0) return 0; + max_match = expected; + do { + size_t d = cur.depth(); + cur.forwardPos(); + if (!cur) + break; + if (cur.depth() < d) + break; + if (cur.depth() == d) + break; + size_t lastd = d; + while (cur && cur.depth() > lastd) { + lastd = cur.depth(); + mres = match(cur, -1, at_begin); + displayMres(mres, "Checking innermost", cur); + if (mres.match_len > 0) + break; + // maybe deeper? + cur.forwardPos(); + } + if (mres.match_len < expected.match_len) + break; + max_match = mres; + old_cur = cur;; + } while(1); + cur = old_cur; + } + else { + // (expected.match_len <= 0) + mres = match(cur); /* match valid only if not searching whole words */ + displayMres(mres, "Start with negative match", cur); + max_match = mres; + } + if (max_match.match_len <= 0) return fail; LYXERR(Debug::FIND, "Ok"); // Compute the match length - int len = 1; + int len = 1; if (cur.pos() + len > cur.lastpos()) - return 0; - if (match.opt.matchword) { - LYXERR(Debug::FIND, "verifying unmatch with len = " << len); - while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) { - ++len; - LYXERR(Debug::FIND, "verifying unmatch with len = " << len); - } - // Length of matched text (different from len param) - int old_match = match(cur, len).match_len; - if (old_match < 0) - old_match = 0; - int new_match; + return fail; + + LASSERT(match.use_regexp, /**/); + { + int minl = 1; + int maxl = cur.lastpos() - cur.pos(); // Greedy behaviour while matching regexps - while ((new_match = match(cur, len + 1).match_len) > old_match) { - ++len; - old_match = new_match; - LYXERR(Debug::FIND, "verifying match with len = " << len); + while (maxl > minl) { + MatchResult mres2; + mres2 = match(cur, len, at_begin); + displayMres(mres2, "Finalize loop", cur); + int actual_match_len = mres2.match_len; + if (actual_match_len >= max_match.match_len) { + // actual_match_len > max_match _can_ happen, + // if the search area splits + // some following word so that the regex + // (e.g. 'r.*r\b' matches 'r' from the middle of the + // splitted word) + // This means, the len value is too big + actual_match_len = max_match.match_len; + max_match = mres2; + max_match.match_len = actual_match_len; + maxl = len; + if (maxl - minl < 4) + len = (maxl + minl)/2; + else + len = minl + (maxl - minl + 3)/4; + } + else { + // (actual_match_len < max_match.match_len) + minl = len + 1; + len = (maxl + minl)/2; + } } - if (old_match == 0) - len = 0; - } - else { - int minl = 1; - int maxl = cur.lastpos() - cur.pos(); - // Greedy behaviour while matching regexps - while (maxl > minl) { - int actual_match = match(cur, len).match_len; - if (actual_match >= max_match) { - // actual_match > max_match _can_ happen, - // if the search area splits - // some following word so that the regex - // (e.g. 'r.*r\b' matches 'r' from the middle of the - // splitted word) - // This means, the len value is too big - maxl = len; - len = (int)((maxl + minl)/2); - } - else { - // (actual_match < max_match) - minl = len + 1; - len = (int)((maxl + minl)/2); - } - } + len = minl; old_cur = cur; // Search for real start of matched characters while (len > 1) { - int actual_match; + MatchResult actual_match; do { cur.forwardPos(); } while (cur.depth() > old_cur.depth()); /* Skip inner insets */ @@ -3559,11 +4015,12 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match) } if (cur.pos() != old_cur.pos()) { // OK, forwarded 1 pos in actual inset - actual_match = match(cur, len-1).match_len; - if (actual_match == max_match) { + actual_match = match(cur, len-1, at_begin); + if (actual_match.match_len == max_match.match_len) { // Ha, got it! The shorter selection has the same match length len--; old_cur = cur; + max_match = actual_match; } else { // OK, the shorter selection matches less chars, revert to previous value @@ -3573,146 +4030,131 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match) } else { LYXERR(Debug::INFO, "cur.pos() == old_cur.pos(), this should never happen"); - actual_match = match(cur, len).match_len; - if (actual_match == max_match) + actual_match = match(cur, len, at_begin); + if (actual_match.match_len == max_match.match_len) { old_cur = cur; + max_match = actual_match; + } } } - } - return len; -} - -#if 0 -static void displayMResult(MatchResult &mres) -{ - LYXERR0( "pos: " << mres.pos); - LYXERR0( "leadsize: " << mres.leadsize); - LYXERR0( "match_len: " << mres.match_len); - LYXERR0( "match_prefix: " << mres.match_prefix); - LYXERR0( "match2end: " << mres.match2end); + if (len == 0) + return fail; + else { + max_match.pos_len = len; + displayMres(max_match, "SEARCH RESULT", cur) + return max_match; + } + } } - #define displayMres(s) displayMResult(s); -#else - #define displayMres(s) -#endif /// Finds forward -int findForwardAdv(DocIterator & cur, MatchStringAdv const & match) +int findForwardAdv(DocIterator & cur, MatchStringAdv & match) { if (!cur) return 0; + bool repeat = false; + DocIterator orig_cur; // to be used if repeat not successful + MatchResult orig_mres; while (!theApp()->longOperationCancelled() && cur) { + //(void) findAdvForwardInnermost(cur); LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur); MatchResult mres = match(cur, -1, false); - displayMres(mres) + string msg = "Starting"; + if (repeat) + msg = "Repeated"; + displayMres(mres, msg + " findForwardAdv", cur) int match_len = mres.match_len; if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) { LYXERR(Debug::INFO, "BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end); match_len = 0; } - if (match_len > 0) { + if (match_len <= 0) { + // This should exit nested insets, if any, or otherwise undefine the currsor. + cur.pos() = cur.lastpos(); + LYXERR(Debug::FIND, "Advancing pos: cur=" << cur); + cur.forwardPos(); + } + else { // match_len > 0 // Try to find the begin of searched string int increment; - int firstInvalid = 100000; - if (mres.match_prefix + mres.pos - mres.leadsize > 0) - increment = (mres.match_prefix + mres.pos - mres.leadsize)/2; - else - increment = 10; + int firstInvalid = cur.lastpos() - cur.pos(); + { + int incrmatch = (mres.match_prefix + mres.pos - mres.leadsize + 1)*3/4; + int incrcur = (firstInvalid + 1 )*3/4; + if (incrcur < incrmatch) + increment = incrcur; + else + increment = incrmatch; + if (increment < 1) + increment = 1; + } LYXERR(Debug::FIND, "Set increment to " << increment); while (increment > 0) { DocIterator old_cur = cur; - for (int i = 0; i < increment && cur; cur.forwardPos(), i++) { - } - if (! cur || (cur.pit() > old_cur.pit())) { - // Are we outside of the paragraph? - // This can happen if moving past some UTF8-encoded chars - cur = old_cur; + if (cur.pos() + increment >= cur.lastpos()) { increment /= 2; + continue; } - else { - MatchResult mres2 = match(cur, -1, false); - displayMres(mres2) - switch (interpretMatch(mres, mres2)) { - case MatchResult::newIsTooFar: - // behind the expected match - firstInvalid = increment; - cur = old_cur; - increment /= 2; - break; - case MatchResult::newIsBetter: - // not reached yet - mres = mres2; - firstInvalid -= increment; - if (increment > firstInvalid/2) - increment = firstInvalid/2; - break; - default: - // Handle not like MatchResult::newIsTooFar - LYXERR0( "Something is wrong: Increment = " << increment << " match_prefix = " << mres.match_prefix); - firstInvalid--; - increment = firstInvalid -1; - cur = old_cur; - break; - } - } - } - // LYXERR0("Leaving first loop"); - int match_len_zero_count = 0; - MatchResult mres3; - for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) { - if (i++ > 3) { - mres3 = match(cur, -1, false); - displayMres(mres3) - int remaining_len = mres3.match_len; - if (remaining_len <= 0) { - // Apparently the searched string is not in the remaining part + cur.pos() = cur.pos() + increment; + MatchResult mres2 = match(cur, -1, false); + displayMres(mres2, "findForwardAdv loop", cur) + switch (interpretMatch(mres, mres2)) { + case MatchResult::newIsTooFar: + // behind the expected match + firstInvalid = increment; + cur = old_cur; + increment /= 2; break; - } - else { - i = 0; - } + case MatchResult::newIsBetter: + // not reached yet, but cur.pos()+increment is bettert + mres = mres2; + firstInvalid -= increment; + if (increment > firstInvalid*3/4) + increment = firstInvalid*3/4; + if ((mres2.pos == mres2.leadsize) && (increment >= mres2.match_prefix)) { + if (increment >= mres2.match_prefix) + increment = (mres2.match_prefix+1)*3/4; + } + break; + default: + // Todo@ + // Handle not like MatchResult::newIsTooFar + LYXERR0( "Probably too far: Increment = " << increment << " match_prefix = " << mres.match_prefix); + firstInvalid--; + increment = increment*3/4; + cur = old_cur; + break; } - LYXERR(Debug::FIND, "Advancing cur: " << cur); - mres3 = match(cur, 1); - displayMres(mres3) - int match_len3 = mres3.match_len; - if (match_len3 < 0) + } + if (mres.match_len > 0) { + if (mres.match_prefix + mres.pos - mres.leadsize > 0) { + // The match seems to indicate some deeper level + repeat = true; + orig_cur = cur; + orig_mres = mres; + cur.forwardPos(); continue; - mres3 = match(cur); - displayMres(mres3) - int match_len2 = mres3.match_len; - LYXERR(Debug::FIND, "match_len2: " << match_len2); - if (match_len2 > 0) { - // Sometimes in finalize we understand it wasn't a match - // and we need to continue the outest loop - LYXERR(Debug::FIND, "Finalizing"); - int len = findAdvFinalize(cur, match); - if (len > 0) { - return len; - } - } - if (match_len2 > 0) - match_len_zero_count = 0; - else if (match_len2 == 0) - match_len_zero_count++; - if (match_len2 < 0) { - if (++match_len_zero_count > 3) { - LYXERR(Debug::FIND, "match_len2_zero_count: " << match_len_zero_count << ", match_len was " << match_len); - } - break; } } - if (!cur) - return 0; - } - if (match_len >= 0 && cur.pit() < cur.lastpit()) { - LYXERR(Debug::FIND, "Advancing par: cur=" << cur); - cur.forwardPar(); - } else { - // This should exit nested insets, if any, or otherwise undefine the currsor. - cur.pos() = cur.lastpos(); - LYXERR(Debug::FIND, "Advancing pos: cur=" << cur); - cur.forwardPos(); + else if (repeat) { + // should never be reached. + cur = orig_cur; + mres = orig_mres; + } + // LYXERR0("Leaving first loop"); + LYXERR(Debug::FIND, "Finalizing 1"); + MatchResult found_match = findAdvFinalize(cur, match, mres); + if (found_match.match_len > 0) { + LASSERT(found_match.pos_len > 0, /**/); + match.FillResults(found_match); + return found_match.pos_len; + } + else { + // try next possible match + cur.forwardPos(); + repeat = false; + continue; + } } } return 0; @@ -3720,11 +4162,12 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv const & match) /// Find the most backward consecutive match within same paragraph while searching backwards. -int findMostBackwards(DocIterator & cur, MatchStringAdv const & match) +MatchResult findMostBackwards(DocIterator & cur, MatchStringAdv const & match, MatchResult &expected) { - DocIterator cur_begin = doc_iterator_begin(cur.buffer()); + DocIterator cur_begin = cur; + cur_begin.pos() = 0; DocIterator tmp_cur = cur; - int len = findAdvFinalize(tmp_cur, match); + MatchResult mr = findAdvFinalize(tmp_cur, match, expected); Inset & inset = cur.inset(); for (; cur != cur_begin; cur.backwardPos()) { LYXERR(Debug::FIND, "findMostBackwards(): cur=" << cur); @@ -3732,13 +4175,13 @@ int findMostBackwards(DocIterator & cur, MatchStringAdv const & match) new_cur.backwardPos(); if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len) break; - int new_len = findAdvFinalize(new_cur, match); - if (new_len == len) + MatchResult new_mr = findAdvFinalize(new_cur, match, expected); + if (new_mr.match_len == mr.match_len) break; - len = new_len; + mr = new_mr; } LYXERR(Debug::FIND, "findMostBackwards(): exiting with cur=" << cur); - return len; + return mr; } @@ -3756,9 +4199,9 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) bool pit_changed = false; do { cur.pos() = 0; - bool found_match = (match(cur, -1, false).match_len > 0); + MatchResult found_match = match(cur, -1, false); - if (found_match) { + if (found_match.match_len > 0) { if (pit_changed) cur.pos() = cur.lastpos(); else @@ -3766,11 +4209,16 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) LYXERR(Debug::FIND, "findBackAdv2: cur: " << cur); DocIterator cur_prev_iter; do { - found_match = (match(cur).match_len > 0); + found_match = match(cur); LYXERR(Debug::FIND, "findBackAdv3: found_match=" - << found_match << ", cur: " << cur); - if (found_match) - return findMostBackwards(cur, match); + << (found_match.match_len > 0) << ", cur: " << cur); + if (found_match.match_len > 0) { + MatchResult found_mr = findMostBackwards(cur, match, found_match); + if (found_mr.pos_len > 0) { + match.FillResults(found_mr); + return found_mr.pos_len; + } + } // Stop if begin of document reached if (cur == cur_begin) @@ -3871,9 +4319,36 @@ static void changeFirstCase(Buffer & buffer, TextCase first_case, TextCase other right = pit->size(); pit->changeCase(buffer.params(), pos_type(1), right, others_case); } - } // namespace +static bool replaceMatches(string &t, int maxmatchnum, vector const & replacements) +{ + // Should replace the string "$" + std::to_string(matchnum) with replacement + // if the char '$' is not prefixed with odd number of char '\\' + static regex const rematch("(\\\\)*(\\$\\$([0-9]))"); + string s; + size_t lastpos = 0; + smatch sub; + for (sregex_iterator it(t.begin(), t.end(), rematch), end; it != end; ++it) { + sub = *it; + if ((sub.position(2) - sub.position(0)) % 2 == 1) + continue; + int num = stoi(sub.str(3), nullptr, 10); + if (num >= maxmatchnum) + continue; + if (lastpos < (size_t) sub.position(2)) + s += t.substr(lastpos, sub.position(2) - lastpos); + s += replacements[num]; + lastpos = sub.position(2) + sub.length(2); + } + if (lastpos == 0) + return false; + else if (lastpos < t.length()) + s += t.substr(lastpos, t.length() - lastpos); + t = s; + return true; +} + /// static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv) { @@ -3904,7 +4379,10 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma ostringstream oss; repl_buffer_orig.write(oss); string lyx = oss.str(); - Buffer repl_buffer("", false); + if (matchAdv.valid_matches > 0) + replaceMatches(lyx, matchAdv.valid_matches, matchAdv.matches); + Buffer repl_buffer(string(), false); + repl_buffer.setInternal(true); repl_buffer.setUnnamed(true); LASSERT(repl_buffer.readString(lyx), return 0); if (opt.keep_case && sel_len >= 2) { @@ -3925,6 +4403,7 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma LYXERR(Debug::FIND, "Before pasteParagraphList() cur=" << cur << endl); cap::pasteParagraphList(cur, repl_buffer.paragraphs(), repl_buffer.params().documentClassPtr(), + repl_buffer.params().authors(), bv->buffer().errorList("Paste")); LYXERR(Debug::FIND, "After pasteParagraphList() cur=" << cur << endl); sel_len = repl_buffer.paragraphs().begin()->size(); @@ -3964,10 +4443,10 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma /// Perform a FindAdv operation. -bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) +bool findAdv(BufferView * bv, FindAndReplaceOptions & opt) { DocIterator cur; - int match_len = 0; + int pos_len = 0; // e.g., when invoking word-findadv from mini-buffer wither with // wrong options syntax or before ever opening advanced F&R pane @@ -3988,16 +4467,15 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) num_replaced += findAdvReplace(bv, opt, matchAdv); cur = bv->cursor(); if (opt.forward) - match_len = findForwardAdv(cur, matchAdv); + pos_len = findForwardAdv(cur, matchAdv); else - match_len = findBackwardsAdv(cur, matchAdv); - } catch (...) { - // This may only be raised by lyx::regex() - bv->message(_("Invalid regular expression!")); + pos_len = findBackwardsAdv(cur, matchAdv); + } catch (exception & ex) { + bv->message(from_utf8(ex.what())); return false; } - if (match_len == 0) { + if (pos_len == 0) { if (num_replaced > 0) { switch (num_replaced) { @@ -4024,8 +4502,13 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) else bv->message(_("Match found.")); - LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << match_len); - bv->putSelectionAt(cur, match_len, !opt.forward); + if (cur.pos() + pos_len > cur.lastpos()) { + // Prevent crash in bv->putSelectionAt() + // Should never happen, maybe LASSERT() here? + pos_len = cur.lastpos() - cur.pos(); + } + LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << pos_len); + bv->putSelectionAt(cur, pos_len, !opt.forward); return true; } @@ -4053,7 +4536,7 @@ ostringstream & operator<<(ostringstream & os, FindAndReplaceOptions const & opt istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) { - LYXERR(Debug::FIND, "parsing"); + // LYXERR(Debug::FIND, "parsing"); string s; string line; getline(is, line); @@ -4065,7 +4548,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) break; getline(is, line); } - LYXERR(Debug::FIND, "file_buf_name: '" << s << "'"); + // LYXERR(Debug::FIND, "file_buf_name: '" << s << "'"); opt.find_buf_name = from_utf8(s); is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat >> opt.replace_all; is.get(); // Waste space before replace string @@ -4079,7 +4562,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) break; getline(is, line); } - LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'"); + // LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'"); opt.repl_buf_name = from_utf8(s); is >> opt.keep_case; int i; @@ -4088,9 +4571,11 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) is >> i; opt.restr = FindAndReplaceOptions::SearchRestriction(i); + /* LYXERR(Debug::FIND, "parsed: " << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' ' << opt.expandmacros << ' ' << opt.ignoreformat << ' ' << opt.keep_case << ' ' << opt.scope << ' ' << opt.restr); + */ return is; }