X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Flyxfind.cpp;h=0e79172fe57af321fbb8e147e2b543642945dc76;hb=48c7d9b028b069b7819987378577253b7e803e86;hp=1e45b6502bbd230caf429244527c1f5d772cbd76;hpb=1ef605f6254ef3f3b8cec5440a2e67e6f23a707b;p=lyx.git diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp index 1e45b6502b..139e7b398e 100644 --- a/src/lyxfind.cpp +++ b/src/lyxfind.cpp @@ -32,29 +32,140 @@ #include "ParIterator.h" #include "TexRow.h" #include "Text.h" +#include "Encoding.h" +#include "frontends/Application.h" #include "frontends/alert.h" #include "mathed/InsetMath.h" #include "mathed/InsetMathGrid.h" #include "mathed/InsetMathHull.h" +#include "mathed/MathData.h" #include "mathed/MathStream.h" +#include "mathed/MathSupport.h" #include "support/convert.h" #include "support/debug.h" #include "support/docstream.h" +#include "support/FileName.h" #include "support/gettext.h" #include "support/lassert.h" #include "support/lstrings.h" #include "support/regex.h" -#include +#include "support/textutils.h" +#include using namespace std; using namespace lyx::support; namespace lyx { + +// Helper class for deciding what should be ignored +class IgnoreFormats { + public: + /// + IgnoreFormats() + : ignoreFamily_(false), ignoreSeries_(false), + ignoreShape_(false), ignoreUnderline_(false), + ignoreMarkUp_(false), ignoreStrikeOut_(false), + ignoreSectioning_(false), ignoreFrontMatter_(false), + ignoreColor_(false), ignoreLanguage_(false) {} + /// + bool getFamily() { return ignoreFamily_; }; + /// + bool getSeries() { return ignoreSeries_; }; + /// + bool getShape() { return ignoreShape_; }; + /// + bool getUnderline() { return ignoreUnderline_; }; + /// + bool getMarkUp() { return ignoreMarkUp_; }; + /// + bool getStrikeOut() { return ignoreStrikeOut_; }; + /// + bool getSectioning() { return ignoreSectioning_; }; + /// + bool getFrontMatter() { return ignoreFrontMatter_; }; + /// + bool getColor() { return ignoreColor_; }; + /// + bool getLanguage() { return ignoreLanguage_; }; + /// + void setIgnoreFormat(string const & type, bool value); + +private: + /// + bool ignoreFamily_; + /// + bool ignoreSeries_; + /// + bool ignoreShape_; + /// + bool ignoreUnderline_; + /// + bool ignoreMarkUp_; + /// + bool ignoreStrikeOut_; + /// + bool ignoreSectioning_; + /// + bool ignoreFrontMatter_; + /// + bool ignoreColor_; + /// + bool ignoreLanguage_; +}; + + +void IgnoreFormats::setIgnoreFormat(string const & type, bool value) +{ + if (type == "color") { + ignoreColor_ = value; + } + else if (type == "language") { + ignoreLanguage_ = value; + } + else if (type == "sectioning") { + ignoreSectioning_ = value; + ignoreFrontMatter_ = value; + } + else if (type == "font") { + ignoreSeries_ = value; + ignoreShape_ = value; + ignoreFamily_ = value; + } + else if (type == "series") { + ignoreSeries_ = value; + } + else if (type == "shape") { + ignoreShape_ = value; + } + else if (type == "family") { + ignoreFamily_ = value; + } + else if (type == "markup") { + ignoreMarkUp_ = value; + } + else if (type == "underline") { + ignoreUnderline_ = value; + } + else if (type == "strike") { + ignoreStrikeOut_ = value; + } +} + +// The global variable that can be changed from outside +IgnoreFormats ignoreFormats; + + +void setIgnoreFormat(string const & type, bool value) +{ + ignoreFormats.setIgnoreFormat(type, value); +} + + namespace { bool parse_bool(docstring & howto) @@ -70,8 +181,8 @@ bool parse_bool(docstring & howto) class MatchString : public binary_function { public: - MatchString(docstring const & str, bool cs, bool mw) - : str(str), case_sens(cs), whole_words(mw) + MatchString(docstring const & s, bool cs, bool mw) + : str(s), case_sens(cs), whole_words(mw) {} // returns true if the specified string is at the specified position @@ -92,7 +203,7 @@ private: int findForward(DocIterator & cur, MatchString const & match, - bool find_del = true) + bool find_del = true) { for (; cur; cur.forwardChar()) if (cur.inTexted()) { @@ -105,7 +216,7 @@ int findForward(DocIterator & cur, MatchString const & match, int findBackwards(DocIterator & cur, MatchString const & match, - bool find_del = true) + bool find_del = true) { while (cur) { cur.backwardChar(); @@ -119,23 +230,6 @@ int findBackwards(DocIterator & cur, MatchString const & match, } -bool findChange(DocIterator & cur, bool next) -{ - if (!next) - cur.backwardPos(); - for (; cur; next ? cur.forwardPos() : cur.backwardPos()) - if (cur.inTexted() && cur.paragraph().isChanged(cur.pos())) { - if (!next) - // if we search backwards, take a step forward - // to correctly set the anchor - cur.forwardPos(); - return true; - } - - return false; -} - - bool searchAllowed(docstring const & str) { if (str.empty()) { @@ -147,20 +241,50 @@ bool searchAllowed(docstring const & str) bool findOne(BufferView * bv, docstring const & searchstr, - bool case_sens, bool whole, bool forward, bool find_del = true) + bool case_sens, bool whole, bool forward, + bool find_del = true, bool check_wrap = false) { if (!searchAllowed(searchstr)) return false; - DocIterator cur = bv->cursor(); + DocIterator cur = forward + ? bv->cursor().selectionEnd() + : bv->cursor().selectionBegin(); MatchString const match(searchstr, case_sens, whole); - int match_len = forward ? findForward(cur, match, find_del) : - findBackwards(cur, match, find_del); + int match_len = forward + ? findForward(cur, match, find_del) + : findBackwards(cur, match, find_del); if (match_len > 0) bv->putSelectionAt(cur, match_len, !forward); + else if (check_wrap) { + DocIterator cur_orig(bv->cursor()); + docstring q; + if (forward) + q = _("End of file reached while searching forward.\n" + "Continue searching from the beginning?"); + else + q = _("Beginning of file reached while searching backward.\n" + "Continue searching from the end?"); + int wrap_answer = frontend::Alert::prompt(_("Wrap search?"), + q, 0, 1, _("&Yes"), _("&No")); + if (wrap_answer == 0) { + if (forward) { + bv->cursor().clear(); + bv->cursor().push_back(CursorSlice(bv->buffer().inset())); + } else { + bv->cursor().setCursor(doc_iterator_end(&bv->buffer())); + bv->cursor().backwardPos(); + } + bv->clearSelection(); + if (findOne(bv, searchstr, case_sens, whole, forward, find_del, false)) + return true; + } + bv->cursor().setCursor(cur_orig); + return false; + } return match_len > 0; } @@ -191,11 +315,13 @@ int replaceAll(BufferView * bv, pos_type const pos = cur.pos(); Font const font = cur.paragraph().getFontSettings(buf.params(), pos); cur.recordUndo(); - int striked = ssize - cur.paragraph().eraseChars(pos, pos + match_len, - buf.params().trackChanges); + int striked = ssize - + cur.paragraph().eraseChars(pos, pos + match_len, + buf.params().track_changes); cur.paragraph().insert(pos, replacestr, font, - Change(buf.params().trackChanges ? - Change::INSERTED : Change::UNCHANGED)); + Change(buf.params().track_changes + ? Change::INSERTED + : Change::UNCHANGED)); for (int i = 0; i < rsize + striked; ++i) cur.forwardChar(); ++num; @@ -212,81 +338,84 @@ int replaceAll(BufferView * bv, // the idea here is that we are going to replace the string that -// is selected IF it is the search string. +// is selected IF it is the search string. // if there is a selection, but it is not the search string, then // we basically ignore it. (FIXME We ought to replace only within // the selection.) // if there is no selection, then: // (i) if some search string has been provided, then we find it. // (think of how the dialog works when you hit "replace" the -// first time.) +// first time.) // (ii) if no search string has been provided, then we treat the // word the cursor is in as the search string. (why? i have no // idea.) but this only works in text? // -// returns the number of replacements made (one, if any) and +// returns the number of replacements made (one, if any) and // whether anything at all was done. pair replaceOne(BufferView * bv, docstring searchstr, - docstring const & replacestr, bool case_sens, - bool whole, bool forward) + docstring const & replacestr, bool case_sens, + bool whole, bool forward, bool findnext) { Cursor & cur = bv->cursor(); + bool found = false; if (!cur.selection()) { // no selection, non-empty search string: find it if (!searchstr.empty()) { - findOne(bv, searchstr, case_sens, whole, forward); - return pair(true, 0); + found = findOne(bv, searchstr, case_sens, whole, forward, true, findnext); + return make_pair(found, 0); } // empty search string if (!cur.inTexted()) // bail in math - return pair(false, 0); - // select current word and treat it as the search string + return make_pair(false, 0); + // select current word and treat it as the search string. + // This causes a minor bug as undo will restore this selection, + // which the user did not create (#8986). cur.innerText()->selectWord(cur, WHOLE_WORD); searchstr = cur.selectionAsString(false); } - + // if we still don't have a search string, report the error // and abort. if (!searchAllowed(searchstr)) - return pair(false, 0); - + return make_pair(false, 0); + bool have_selection = cur.selection(); docstring const selected = cur.selectionAsString(false); - bool match = - case_sens ? searchstr == selected - : compare_no_case(searchstr, selected) == 0; + bool match = + case_sens + ? searchstr == selected + : compare_no_case(searchstr, selected) == 0; // no selection or current selection is not search word: // just find the search word if (!have_selection || !match) { - findOne(bv, searchstr, case_sens, whole, forward); - return pair(true, 0); + found = findOne(bv, searchstr, case_sens, whole, forward, true, findnext); + return make_pair(found, 0); } // we're now actually ready to replace. if the buffer is // read-only, we can't, though. if (bv->buffer().isReadonly()) - return pair(false, 0); + return make_pair(false, 0); - cap::replaceSelectionWithString(cur, replacestr, forward); + cap::replaceSelectionWithString(cur, replacestr); if (forward) { cur.pos() += replacestr.length(); - LASSERT(cur.pos() <= cur.lastpos(), /* */); - } else { - cur.pos() -= replacestr.length(); - LASSERT(cur.pos() >= 0, /* */); + LASSERT(cur.pos() <= cur.lastpos(), + cur.pos() = cur.lastpos()); } - findOne(bv, searchstr, case_sens, whole, forward, false); + if (findnext) + findOne(bv, searchstr, case_sens, whole, forward, false, findnext); - return pair(true, 1); + return make_pair(true, 1); } -} // namespace anon +} // namespace docstring const find2string(docstring const & search, - bool casesensitive, bool matchword, bool forward) + bool casesensitive, bool matchword, bool forward) { odocstringstream ss; ss << search << '\n' @@ -298,8 +427,9 @@ docstring const find2string(docstring const & search, docstring const replace2string(docstring const & replace, - docstring const & search, bool casesensitive, bool matchword, - bool all, bool forward) + docstring const & search, + bool casesensitive, bool matchword, + bool all, bool forward, bool findnext) { odocstringstream ss; ss << replace << '\n' @@ -307,7 +437,8 @@ docstring const replace2string(docstring const & replace, << int(casesensitive) << ' ' << int(matchword) << ' ' << int(all) << ' ' - << int(forward); + << int(forward) << ' ' + << int(findnext); return ss.str(); } @@ -329,11 +460,11 @@ bool lyxfind(BufferView * bv, FuncRequest const & ev) bool matchword = parse_bool(howto); bool forward = parse_bool(howto); - return findOne(bv, search, casesensitive, matchword, forward); + return findOne(bv, search, casesensitive, matchword, forward, true, true); } -bool lyxreplace(BufferView * bv, +bool lyxreplace(BufferView * bv, FuncRequest const & ev, bool has_deleted) { if (!bv || ev.action() != LFUN_WORD_REPLACE) @@ -342,7 +473,7 @@ bool lyxreplace(BufferView * bv, // data is of the form // " // - // " + // " docstring search; docstring rplc; docstring howto = split(ev.argument(), rplc, '\n'); @@ -352,17 +483,18 @@ bool lyxreplace(BufferView * bv, bool matchword = parse_bool(howto); bool all = parse_bool(howto); bool forward = parse_bool(howto); + bool findnext = howto.empty() ? true : parse_bool(howto); - int replace_count = 0; bool update = false; if (!has_deleted) { + int replace_count = 0; if (all) { replace_count = replaceAll(bv, search, rplc, casesensitive, matchword); update = replace_count > 0; } else { - pair rv = - replaceOne(bv, search, rplc, casesensitive, matchword, forward); + pair rv = + replaceOne(bv, search, rplc, casesensitive, matchword, forward, findnext); update = rv.first; replace_count = rv.second; } @@ -370,152 +502,209 @@ bool lyxreplace(BufferView * bv, Buffer const & buf = bv->buffer(); if (!update) { // emit message signal. - buf.message(_("String not found!")); + buf.message(_("String not found.")); } else { if (replace_count == 0) { buf.message(_("String found.")); } else if (replace_count == 1) { buf.message(_("String has been replaced.")); } else { - docstring const str = + docstring const str = bformat(_("%1$d strings have been replaced."), replace_count); buf.message(str); } } - } else { + } else if (findnext) { // if we have deleted characters, we do not replace at all, but // rather search for the next occurence - if (findOne(bv, search, casesensitive, matchword, forward)) + if (findOne(bv, search, casesensitive, matchword, forward, true, findnext)) update = true; else - bv->message(_("String not found!")); + bv->message(_("String not found.")); } return update; } -bool findNextChange(BufferView * bv) +bool findNextChange(BufferView * bv, Cursor & cur, bool const check_wrap) { - return findChange(bv, true); -} + for (; cur; cur.forwardPos()) + if (cur.inTexted() && cur.paragraph().isChanged(cur.pos())) + return true; + if (check_wrap) { + DocIterator cur_orig(bv->cursor()); + docstring q = _("End of file reached while searching forward.\n" + "Continue searching from the beginning?"); + int wrap_answer = frontend::Alert::prompt(_("Wrap search?"), + q, 0, 1, _("&Yes"), _("&No")); + if (wrap_answer == 0) { + bv->cursor().clear(); + bv->cursor().push_back(CursorSlice(bv->buffer().inset())); + bv->clearSelection(); + cur.setCursor(bv->cursor().selectionBegin()); + if (findNextChange(bv, cur, false)) + return true; + } + bv->cursor().setCursor(cur_orig); + } -bool findPreviousChange(BufferView * bv) -{ - return findChange(bv, false); + return false; } -bool findChange(BufferView * bv, bool next) +bool findPreviousChange(BufferView * bv, Cursor & cur, bool const check_wrap) { - if (bv->cursor().selection()) { - // set the cursor at the beginning or at the end of the selection - // before searching. Otherwise, the current change will be found. - if (next != (bv->cursor().top() > bv->cursor().normalAnchor())) - bv->cursor().setCursorToAnchor(); + for (cur.backwardPos(); cur; cur.backwardPos()) { + if (cur.inTexted() && cur.paragraph().isChanged(cur.pos())) + return true; } - DocIterator cur = bv->cursor(); - - // Are we within a change ? Then first search forward (backward), - // clear the selection and search the other way around (see the end - // of this function). This will avoid changes to be selected half. - bool search_both_sides = false; - DocIterator tmpcur = cur; - // Leave math first - while (tmpcur.inMathed()) - tmpcur.pop_back(); - Change change_next_pos - = tmpcur.paragraph().lookupChange(tmpcur.pos()); - if (change_next_pos.changed() && cur.inMathed()) { - cur = tmpcur; - search_both_sides = true; - } else if (tmpcur.pos() > 0 && tmpcur.inTexted()) { - Change change_prev_pos - = tmpcur.paragraph().lookupChange(tmpcur.pos() - 1); - if (change_next_pos.isSimilarTo(change_prev_pos)) - search_both_sides = true; + if (check_wrap) { + DocIterator cur_orig(bv->cursor()); + docstring q = _("Beginning of file reached while searching backward.\n" + "Continue searching from the end?"); + int wrap_answer = frontend::Alert::prompt(_("Wrap search?"), + q, 0, 1, _("&Yes"), _("&No")); + if (wrap_answer == 0) { + bv->cursor().setCursor(doc_iterator_end(&bv->buffer())); + bv->cursor().backwardPos(); + bv->clearSelection(); + cur.setCursor(bv->cursor().selectionBegin()); + if (findPreviousChange(bv, cur, false)) + return true; + } + bv->cursor().setCursor(cur_orig); } - if (!findChange(cur, next)) - return false; - - bv->cursor().setCursor(cur); - bv->cursor().resetAnchor(); + return false; +} - if (!next) - // take a step into the change - cur.backwardPos(); - Change orig_change = cur.paragraph().lookupChange(cur.pos()); +bool selectChange(Cursor & cur, bool forward) +{ + if (!cur.inTexted() || !cur.paragraph().isChanged(cur.pos())) + return false; + Change ch = cur.paragraph().lookupChange(cur.pos()); - CursorSlice & tip = cur.top(); - if (next) { - for (; !tip.at_end(); tip.forwardPos()) { - Change change = tip.paragraph().lookupChange(tip.pos()); - if (!change.isSimilarTo(orig_change)) - break; - } - } else { - for (; !tip.at_begin();) { - tip.backwardPos(); - Change change = tip.paragraph().lookupChange(tip.pos()); - if (!change.isSimilarTo(orig_change)) { - // take a step forward to correctly set the selection - tip.forwardPos(); - break; - } + CursorSlice tip1 = cur.top(); + for (; tip1.pit() < tip1.lastpit() || tip1.pos() < tip1.lastpos(); tip1.forwardPos()) { + Change ch2 = tip1.paragraph().lookupChange(tip1.pos()); + if (!ch2.isSimilarTo(ch)) + break; + } + CursorSlice tip2 = cur.top(); + for (; tip2.pit() > 0 || tip2.pos() > 0;) { + tip2.backwardPos(); + Change ch2 = tip2.paragraph().lookupChange(tip2.pos()); + if (!ch2.isSimilarTo(ch)) { + // take a step forward to correctly set the selection + tip2.forwardPos(); + break; } } + if (forward) + swap(tip1, tip2); + cur.top() = tip1; + cur.bv().mouseSetCursor(cur, false); + cur.top() = tip2; + cur.bv().mouseSetCursor(cur, true); + return true; +} - // Now put cursor to end of selection: - bv->cursor().setCursor(cur); - bv->cursor().setSelection(); - if (search_both_sides) { - bv->cursor().setSelection(false); - findChange(bv, !next); - } +namespace { - return true; + +bool findChange(BufferView * bv, bool forward) +{ + Cursor cur(*bv); + cur.setCursor(forward ? bv->cursor().selectionEnd() + : bv->cursor().selectionBegin()); + forward ? findNextChange(bv, cur, true) : findPreviousChange(bv, cur, true); + return selectChange(cur, forward); } +} // namespace + +bool findNextChange(BufferView * bv) +{ + return findChange(bv, true); +} + + +bool findPreviousChange(BufferView * bv) +{ + return findChange(bv, false); +} + + + namespace { typedef vector > Escapes; /// A map of symbols and their escaped equivalent needed within a regex. +/// @note Beware of order Escapes const & get_regexp_escapes() { + typedef std::pair P; + static Escapes escape_map; if (escape_map.empty()) { - escape_map.push_back(pair("\\", "\\\\")); - escape_map.push_back(pair("^", "\\^")); - escape_map.push_back(pair("$", "\\$")); - escape_map.push_back(pair("{", "\\{")); - escape_map.push_back(pair("}", "\\}")); - escape_map.push_back(pair("[", "\\[")); - escape_map.push_back(pair("]", "\\]")); - escape_map.push_back(pair("(", "\\(")); - escape_map.push_back(pair(")", "\\)")); - escape_map.push_back(pair("+", "\\+")); - escape_map.push_back(pair("*", "\\*")); - escape_map.push_back(pair(".", "\\.")); + escape_map.push_back(P("$", "_x_$")); + escape_map.push_back(P("{", "_x_{")); + escape_map.push_back(P("}", "_x_}")); + escape_map.push_back(P("[", "_x_[")); + escape_map.push_back(P("]", "_x_]")); + escape_map.push_back(P("(", "_x_(")); + escape_map.push_back(P(")", "_x_)")); + escape_map.push_back(P("+", "_x_+")); + escape_map.push_back(P("*", "_x_*")); + escape_map.push_back(P(".", "_x_.")); + escape_map.push_back(P("\\", "(?:\\\\|\\\\backslash)")); + escape_map.push_back(P("~", "(?:\\\\textasciitilde|\\\\sim)")); + escape_map.push_back(P("^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\textasciicircum|\\\\mathcircumflex)")); + escape_map.push_back(P("_x_", "\\")); } return escape_map; } /// A map of lyx escaped strings and their unescaped equivalent. -Escapes const & get_lyx_unescapes() { +Escapes const & get_lyx_unescapes() +{ + typedef std::pair P; + static Escapes escape_map; if (escape_map.empty()) { - escape_map.push_back(pair("{*}", "*")); - escape_map.push_back(pair("{[}", "[")); - escape_map.push_back(pair("\\$", "$")); - escape_map.push_back(pair("\\backslash{}", "\\")); - escape_map.push_back(pair("\\backslash", "\\")); - escape_map.push_back(pair("\\sim ", "~")); - escape_map.push_back(pair("\\^", "^")); + escape_map.push_back(P("\\%", "%")); + escape_map.push_back(P("\\mathcircumflex ", "^")); + escape_map.push_back(P("\\mathcircumflex", "^")); + escape_map.push_back(P("\\backslash ", "\\")); + escape_map.push_back(P("\\backslash", "\\")); + escape_map.push_back(P("\\\\{", "_x_<")); + escape_map.push_back(P("\\\\}", "_x_>")); + escape_map.push_back(P("\\sim ", "~")); + escape_map.push_back(P("\\sim", "~")); + } + return escape_map; +} + +/// A map of escapes turning a regexp matching text to one matching latex. +Escapes const & get_regexp_latex_escapes() +{ + typedef std::pair P; + + static Escapes escape_map; + if (escape_map.empty()) { + escape_map.push_back(P("\\\\", "(?:\\\\\\\\|\\\\backslash|\\\\textbackslash\\{\\}|\\\\textbackslash)")); + escape_map.push_back(P("(first, pos)) < s.length()) { s.replace(pos, it->first.length(), it->second); -// LYXERR(Debug::FIND, "After escape: " << s); + LYXERR(Debug::FIND, "After escape: " << s); pos += it->second.length(); // LYXERR(Debug::FIND, "pos: " << pos); } @@ -541,69 +730,61 @@ string apply_escapes(string s, Escapes const & escape_map) return s; } -/** Return the position of the closing brace matching the open one at s[pos], - ** or s.size() if not found. - **/ -size_t find_matching_brace(string const & s, size_t pos) -{ - LASSERT(s[pos] == '{', /* */); - int open_braces = 1; - for (++pos; pos < s.size(); ++pos) { - if (s[pos] == '\\') - ++pos; - else if (s[pos] == '{') - ++open_braces; - else if (s[pos] == '}') { - --open_braces; - if (open_braces == 0) - return pos; - } - } - return s.size(); -} -/// Within \regexp{} apply get_regex_escapes(), while outside apply get_lyx_unescapes(). -string escape_for_regex(string s) +/// Within \regexp{} apply get_lyx_unescapes() only (i.e., preserve regexp semantics of the string), +/// while outside apply get_lyx_unescapes()+get_regexp_escapes(). +/// If match_latex is true, then apply regexp_latex_escapes() to \regexp{} contents as well. +string escape_for_regex(string s, bool match_latex) { size_t pos = 0; while (pos < s.size()) { - size_t new_pos = s.find("\\regexp{{{", pos); + size_t new_pos = s.find("\\regexp{", pos); if (new_pos == string::npos) new_pos = s.size(); - LYXERR(Debug::FIND, "new_pos: " << new_pos); - string t = apply_escapes(s.substr(pos, new_pos - pos), get_lyx_unescapes()); - LYXERR(Debug::FIND, "t : " << t); - t = apply_escapes(t, get_regexp_escapes()); - LYXERR(Debug::FIND, "t : " << t); - s.replace(pos, new_pos - pos, t); - new_pos = pos + t.size(); - LYXERR(Debug::FIND, "Regexp after escaping: " << s); - LYXERR(Debug::FIND, "new_pos: " << new_pos); - if (new_pos == s.size()) - break; - size_t end_pos = s.find("}}}", new_pos + 10); // find_matching_brace(s, new_pos + 7); + string t; + if (new_pos > pos) { + LYXERR(Debug::FIND, "new_pos: " << new_pos); + t = apply_escapes(s.substr(pos, new_pos - pos), get_lyx_unescapes()); + LYXERR(Debug::FIND, "t [lyx]: " << t); + t = apply_escapes(t, get_regexp_escapes()); + LYXERR(Debug::FIND, "t [rxp]: " << t); + s.replace(pos, new_pos - pos, t); + new_pos = pos + t.size(); + LYXERR(Debug::FIND, "Regexp after escaping: " << s); + LYXERR(Debug::FIND, "new_pos: " << new_pos); + if (new_pos == s.size()) + break; + } + // Might fail if \\endregexp{} is preceeded by unexpected stuff (weird escapes) + size_t end_pos = s.find("\\endregexp{}}", new_pos + 8); LYXERR(Debug::FIND, "end_pos: " << end_pos); - t = apply_escapes(s.substr(new_pos + 10, end_pos - (new_pos + 10)), get_lyx_unescapes()); - LYXERR(Debug::FIND, "t : " << t); + t = s.substr(new_pos + 8, end_pos - (new_pos + 8)); + LYXERR(Debug::FIND, "t in regexp : " << t); + t = apply_escapes(t, get_lyx_unescapes()); + LYXERR(Debug::FIND, "t in regexp [lyx]: " << t); + if (match_latex) { + t = apply_escapes(t, get_regexp_latex_escapes()); + LYXERR(Debug::FIND, "t in regexp [ltx]: " << t); + } if (end_pos == s.size()) { s.replace(new_pos, end_pos - new_pos, t); - pos = s.size(); LYXERR(Debug::FIND, "Regexp after \\regexp{} removal: " << s); break; } - s.replace(new_pos, end_pos + 3 - new_pos, t); - LYXERR(Debug::FIND, "Regexp after \\regexp{} removal: " << s); + s.replace(new_pos, end_pos + 13 - new_pos, t); + LYXERR(Debug::FIND, "Regexp after \\regexp{...\\endregexp{}} removal: " << s); pos = new_pos + t.size(); LYXERR(Debug::FIND, "pos: " << pos); } return s; } + /// Wrapper for lyx::regex_replace with simpler interface bool regex_replace(string const & s, string & t, string const & searchstr, - string const & replacestr) + string const & replacestr) { - lyx::regex e(searchstr); + lyx::regex e(searchstr, regex_constants::ECMAScript); ostringstream oss; ostream_iterator it(oss); lyx::regex_replace(it, s.begin(), s.end(), e, replacestr); @@ -613,6 +794,7 @@ bool regex_replace(string const & s, string & t, string const & searchstr, return rv; } + /** Checks if supplied string segment is well-formed from the standpoint of matching open-closed braces. ** ** Verify that closed braces exactly match open braces. This avoids that, for example, @@ -645,15 +827,24 @@ bool braces_match(string::const_iterator const & beg, } } if (open_pars != unmatched) { - LYXERR(Debug::FIND, "Found " << open_pars - << " instead of " << unmatched - << " unmatched open braces at the end of count"); - return false; + LYXERR(Debug::FIND, "Found " << open_pars + << " instead of " << unmatched + << " unmatched open braces at the end of count"); + return false; } LYXERR(Debug::FIND, "Braces match as expected"); return true; } + +class MatchResult { +public: + int match_len; + int match2end; + int pos; + MatchResult(): match_len(0),match2end(0), pos(0) {}; +}; + /** The class performing a match between a position in the document and the FindAdvOptions. **/ class MatchStringAdv { @@ -670,7 +861,7 @@ public: ** @return ** The length of the matching text, or zero if no match was found. **/ - int operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const; + MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const; public: /// buffer @@ -682,7 +873,7 @@ public: private: /// Auxiliary find method (does not account for opt.matchword) - int findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const; + MatchResult findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const; /** Normalize a stringified or latexified LyX paragraph. ** @@ -697,150 +888,2176 @@ private: ** @todo Normalization should also expand macros, if the corresponding ** search option was checked. **/ - string normalize(docstring const & s) const; + string normalize(docstring const & s, bool hack_braces) const; // normalized string to search string par_as_string; // regular expression to use for searching lyx::regex regexp; - // same as regexp, but prefixed with a ".*" + // same as regexp, but prefixed with a ".*?" lyx::regex regexp2; + // leading format material as string + string lead_as_string; + // par_as_string after removal of lead_as_string + string par_as_string_nolead; // unmatched open braces in the search string/regexp int open_braces; // number of (.*?) subexpressions added at end of search regexp for closing // environments, math mode, styles, etc... int close_wildcards; + // Are we searching with regular expressions ? + bool use_regexp; +}; + + +static docstring buffer_to_latex(Buffer & buffer) +{ + //OutputParams runparams(&buffer.params().encoding()); + OutputParams runparams(encodings.fromLyXName("utf8")); + odocstringstream ods; + otexstream os(ods); + runparams.nice = true; + runparams.flavor = OutputParams::XETEX; + runparams.linelen = 10000; //lyxrc.plaintext_linelen; + // No side effect of file copying and image conversion + runparams.dryrun = true; + runparams.for_search = true; + pit_type const endpit = buffer.paragraphs().size(); + for (pit_type pit = 0; pit != endpit; ++pit) { + TeXOnePar(buffer, buffer.text(), pit, os, runparams); + LYXERR(Debug::FIND, "searchString up to here: " << ods.str()); + } + return ods.str(); +} + + +static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions const & opt) +{ + docstring str; + if (!opt.ignoreformat) { + str = buffer_to_latex(buffer); + } else { + // OutputParams runparams(&buffer.params().encoding()); + OutputParams runparams(encodings.fromLyXName("utf8")); + runparams.nice = true; + runparams.flavor = OutputParams::XETEX; + runparams.linelen = 10000; //lyxrc.plaintext_linelen; + runparams.dryrun = true; + runparams.for_search = true; + for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) { + Paragraph const & par = buffer.paragraphs().at(pit); + LYXERR(Debug::FIND, "Adding to search string: '" + << par.asString(pos_type(0), par.size(), + AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, + &runparams) + << "'"); + str += par.asString(pos_type(0), par.size(), + AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, + &runparams); + } + } + return str; +} + + +/// Return separation pos between the leading material and the rest +static size_t identifyLeading(string const & s) +{ + string t = s; + // @TODO Support \item[text] + // Kornel: Added textsl, textsf, textit, texttt and noun + // + allow to search for colored text too + while (regex_replace(t, t, REGEX_BOS "\\\\(((footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|emph|noun|minisec|text(bf|md|sl|sf|it|tt))|((textcolor|foreignlanguage|latexenvironment)\\{[a-z]+\\*?\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part|chapter)\\*?)\\{", "") + || regex_replace(t, t, REGEX_BOS "\\$", "") + || regex_replace(t, t, REGEX_BOS "\\\\\\[ ", "") + || regex_replace(t, t, REGEX_BOS " ?\\\\item\\{[a-z]+\\}", "") + || regex_replace(t, t, REGEX_BOS "\\\\begin\\{[a-zA-Z_]*\\*?\\} ", "")) + ; + LYXERR(Debug::FIND, " after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'"); + return s.find(t); +} + +/* + * Given a latexified string, retrieve some handled features + * The features of the regex will later be compared with the features + * of the searched text. If the regex features are not a + * subset of the analized, then, in not format ignoring search + * we can early stop the search in the relevant inset. + */ +typedef map Features; + +static Features identifyFeatures(string const & s) +{ + static regex const feature("\\\\(([a-zA-Z]+(\\{([a-z]+\\*?)\\}|\\*)?))\\{"); + static regex const valid("^(" + "(" + "(footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|" + "emph|noun|text(bf|md|sl|sf|it|tt)|" + "(textcolor|foreignlanguage|item|listitem|latexenvironment)\\{[a-z]+\\*?\\})|" + "(u|uu)line|(s|x)out|uwave|" + "(sub|extra)?title|author|subject|publishers|dedication|(upper|lower)titleback|lyx(right)?address)|" + "((sub)?(((sub)?section)|paragraph)|part|chapter|lyxslide)\\*?)$"); + smatch sub; + bool displ = true; + Features info; + + for (sregex_iterator it(s.begin(), s.end(), feature), end; it != end; ++it) { + sub = *it; + if (displ) { + if (sub.str(1).compare("regexp") == 0) { + displ = false; + continue; + } + string token = sub.str(1); + smatch sub2; + if (regex_match(token, sub2, valid)) { + info[token] = true; + } + else { + // ignore + } + } + else { + if (sub.str(1).compare("endregexp") == 0) { + displ = true; + continue; + } + } + } + return info; +} + +/* + * defines values features of a key "\\[a-z]+{" + */ +class KeyInfo { + public: + enum KeyType { + /* Char type with content discarded + * like \hspace{1cm} */ + noContent, + /* Char, like \backslash */ + isChar, + /* \part, \section*, ... */ + isSectioning, + /* title, author etc */ + isTitle, + /* \foreignlanguage{ngerman}, ... */ + isMain, + /* inside \code{} + * to discard language in content */ + noMain, + isRegex, + /* \begin{eqnarray}...\end{eqnarray}, ... $...$ */ + isMath, + /* fonts, colors, markups, ... */ + isStandard, + /* footnotesize, ... large, ... + * Ignore all of them */ + isSize, + invalid, + /* inputencoding, ... + * Discard also content, because they do not help in search */ + doRemove, + /* twocolumns, ... + * like remove, but also all arguments */ + removeWithArg, + /* item, listitem */ + isList, + /* tex, latex, ... like isChar */ + isIgnored, + /* like \lettrine[lines=5]{}{} */ + cleanToStart, + /* End of arguments marker for lettrine, + * so that they can be ignored */ + endArguments + }; + KeyInfo() + : keytype(invalid), + head(""), + _tokensize(-1), + _tokenstart(-1), + _dataStart(-1), + _dataEnd(-1), + parenthesiscount(1), + disabled(false), + used(false) + {}; + KeyInfo(KeyType type, int parcount, bool disable) + : keytype(type), + _tokensize(-1), + _tokenstart(-1), + _dataStart(-1), + _dataEnd(-1), + parenthesiscount(parcount), + disabled(disable), + used(false) {}; + KeyType keytype; + string head; + int _tokensize; + int _tokenstart; + int _dataStart; + int _dataEnd; + int parenthesiscount; + bool disabled; + bool used; /* by pattern */ +}; + +class Border { + public: + Border(int l=0, int u=0) : low(l), upper(u) {}; + int low; + int upper; +}; + +#define MAXOPENED 30 +class Intervall { + bool isPatternString_; +public: + explicit Intervall(bool isPattern, string const & p) : + isPatternString_(isPattern), par(p), ignoreidx(-1), actualdeptindex(0), + hasTitle(false), langcount(0) + { + depts[0] = 0; + closes[0] = 0; + } + + string par; + int ignoreidx; + static vector borders; + int depts[MAXOPENED]; + int closes[MAXOPENED]; + int actualdeptindex; + int previousNotIgnored(int); + int nextNotIgnored(int); + void handleOpenP(int i); + void handleCloseP(int i, bool closingAllowed); + void resetOpenedP(int openPos); + void addIntervall(int upper); + void addIntervall(int low, int upper); /* if explicit */ + void removeAccents(); + void setForDefaultLang(KeyInfo &defLang); + int findclosing(int start, int end, char up, char down, int repeat); + void handleParentheses(int lastpos, bool closingAllowed); + bool hasTitle; + int langcount; // Number of disabled language specs up to current position in actual interval + int isOpeningPar(int pos); + string titleValue; + void output(ostringstream &os, int lastpos); + // string show(int lastpos); +}; + +vector Intervall::borders = vector(30); + +int Intervall::isOpeningPar(int pos) +{ + if ((pos < 0) || (size_t(pos) >= par.size())) + return 0; + if (par[pos] != '{') + return 0; + if (size_t(pos) + 2 >= par.size()) + return 1; + if (par[pos+2] != '}') + return 1; + if (par[pos+1] == '[' || par[pos+1] == ']') + return 3; + return 1; +} + +void Intervall::setForDefaultLang(KeyInfo &defLang) +{ + // Enable the use of first token again + if (ignoreidx >= 0) { + int value = defLang._tokenstart + defLang._tokensize; + if (value > 0) { + if (borders[0].low < value) + borders[0].low = value; + if (borders[0].upper < value) + borders[0].upper = value; + } + } +} + +static void checkDepthIndex(int val) +{ + static int maxdepthidx = MAXOPENED-2; + static int lastmaxdepth = 0; + if (val > lastmaxdepth) { + LYXERR0("Depth reached " << val); + lastmaxdepth = val; + } + if (val > maxdepthidx) { + maxdepthidx = val; + LYXERR0("maxdepthidx now " << val); + } +} + +#if 0 +// Not needed, because borders are now dynamically expanded +static void checkIgnoreIdx(int val) +{ + static int lastmaxignore = -1; + if ((lastmaxignore < val) && (size_t(val+1) >= borders.size())) { + LYXERR0("IgnoreIdx reached " << val); + lastmaxignore = val; + } +} +#endif + +/* + * Expand the region of ignored parts of the input latex string + * The region is only relevant in output() + */ +void Intervall::addIntervall(int low, int upper) +{ + int idx; + if (low == upper) return; + for (idx = ignoreidx+1; idx > 0; --idx) { + if (low > borders[idx-1].upper) { + break; + } + } + Border br(low, upper); + if (idx > ignoreidx) { + if (borders.size() <= size_t(idx)) { + borders.push_back(br); + } + else { + borders[idx] = br; + } + ignoreidx = idx; + // checkIgnoreIdx(ignoreidx); + return; + } + else { + // Expand only if one of the new bound is inside the interwall + // We know here that br.low > borders[idx-1].upper + if (br.upper < borders[idx].low) { + // We have to insert at this pos + if (size_t(ignoreidx+1) >= borders.size()) { + borders.push_back(borders[ignoreidx]); + } + else { + borders[ignoreidx+1] = borders[ignoreidx]; + } + for (int i = ignoreidx; i > idx; --i) { + borders[i] = borders[i-1]; + } + borders[idx] = br; + ignoreidx += 1; + // checkIgnoreIdx(ignoreidx); + return; + } + // Here we know, that we are overlapping + if (br.low > borders[idx].low) + br.low = borders[idx].low; + // check what has to be concatenated + int count = 0; + for (int i = idx; i <= ignoreidx; i++) { + if (br.upper >= borders[i].low) { + count++; + if (br.upper < borders[i].upper) + br.upper = borders[i].upper; + } + else { + break; + } + } + // count should be >= 1 here + borders[idx] = br; + if (count > 1) { + for (int i = idx + count; i <= ignoreidx; i++) { + borders[i-count+1] = borders[i]; + } + ignoreidx -= count - 1; + return; + } + } +} + +typedef map AccentsMap; +static AccentsMap accents = map(); + +static void buildaccent(string n, string param, string values) +{ + stringstream s(n); + string name; + const char delim = '|'; + while (getline(s, name, delim)) { + size_t start = 0; + for (size_t i = 0; i < param.size(); i++) { + string key = name + "{" + param[i] + "}"; + // get the corresponding utf8-value + if ((values[start] & 0xc0) != 0xc0) { + // should not happen, utf8 encoding starts at least with 11xxxxxx + // but value for '\dot{i}' is 'i', which is ascii + if ((values[start] & 0x80) == 0) { + // is ascii + accents[key] = values.substr(start, 1); + // LYXERR0("" << key << "=" << accents[key]); + } + start++; + continue; + } + for (int j = 1; ;j++) { + if (start + j >= values.size()) { + accents[key] = values.substr(start, j); + start = values.size() - 1; + break; + } + else if ((values[start+j] & 0xc0) != 0x80) { + // This is the first byte of following utf8 char + accents[key] = values.substr(start, j); + start += j; + // LYXERR0("" << key << "=" << accents[key]); + break; + } + } + } + } +} + +static void buildAccentsMap() +{ + accents["imath"] = "ı"; + accents["i"] = "ı"; + accents["jmath"] = "È·"; + accents["lyxmathsym{ß}"] = "ß"; + accents["text{ß}"] = "ß"; + accents["ddot{\\imath}"] = "ï"; + buildaccent("ddot", "aAeEhHiIioOtuUwWxXyY", + "äÄëËḧḦïÏïöÖẗüÜẅẄẍẌÿŸ"); // umlaut + buildaccent("dot|.", "aAbBcCdDeEfFGghHIimMnNoOpPrRsStTwWxXyYzZ", + "ȧȦḃḂċĊḋḊėĖḟḞĠġḣḢİİṁṀṅṄȯȮṗṖṙṘṡṠṫṪẇẆẋẊẏẎżŻ"); // dot{i} can only happen if ignoring case, but there is no lowercase of 'Ä°' + accents["acute{\\imath}"] = "í"; + buildaccent("acute", "aAcCeEgGkKlLmMoOnNpPrRsSuUwWyYzZiI", + "áÁćĆéÉǵǴḱḰĺĹḿḾóÓńŃṕṔŕŔśŚúÚẃẂýÝźŹíÍ"); + buildaccent("dacute|H|h", "oOuU", "őŐűŰ"); // double acute + buildaccent("mathring|r", "aAuUwy", + "åÅůŮẘẙ"); // ring + accents["check{\\imath}"] = "ǐ"; + accents["check{\\jmath}"] = "Ç°"; + buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTtzZ", + "čČďĎǎǍěĚǐǏǒǑǔǓǧǦǩǨȟȞľĽňŇřŘšŠŤťžŽ"); // caron + accents["hat{\\imath}"] = "î"; + accents["hat{\\jmath}"] = "ĵ"; + buildaccent("hat|^", "aAcCeEgGhHiIjJoOsSuUwWyYzZ", + "âÂĉĈêÊĝĜĥĤîÎĵĴôÔŝŜûÛŵŴŷŶẑẐ"); // circ + accents["bar{\\imath}"] = "Ä«"; + buildaccent("bar|=", "aAeEiIoOuUyY", + "āĀēĒīĪōŌūŪȳȲ"); // macron + accents["tilde{\\imath}"] = "Ä©"; + buildaccent("tilde", "aAeEiInNoOuUvVyY", + "ãÃẽẼĩĨñÑõÕũŨṽṼỹỸ"); // tilde + accents["breve{\\imath}"] = "Ä­"; + buildaccent("breve|u", "aAeEgGiIoOuU", + "ăĂĕĔğĞĭĬŏŎŭŬ"); // breve + accents["grave{\\imath}"] = "ì"; + buildaccent("grave|`", "aAeEiIoOuUnNwWyY", + "àÀèÈìÌòÒùÙǹǸẁẀỳỲ"); // grave + buildaccent("subdot|d", "BbDdHhKkLlMmNnRrSsTtVvWwZzAaEeIiOoUuYy", + "ḄḅḌḍḤḥḲḳḶḷṂṃṆṇṚṛṢṣṬṭṾṿẈẉẒẓẠạẸẹỊịỌọỤụỴỵ"); // dot below + buildaccent("ogonek|k", "AaEeIiUuOo", + "ĄąĘęĮįŲųǪǫ"); // ogonek + buildaccent("cedilla|c", "CcGgKkLlNnRrSsTtEeDdHh", + "ÇçĢĢĶķĻļŅņŖŗŞşŢţȨȩḐḑḨḩ"); // cedilla + buildaccent("subring|textsubring", "Aa", + "Ḁḁ"); // subring + buildaccent("subhat|textsubcircum", "DdEeLlNnTtUu", + "ḒḓḘḙḼḽṊṋṰṱṶṷ"); // subcircum + buildaccent("subtilde|textsubtilde", "EeIiUu", + "ḚḛḬḭṴṵ"); // subtilde + accents["dgrave{\\imath}"] = "ȉ"; + accents["textdoublegrave{\\i}"] = "ȉ"; + buildaccent("dgrave|textdoublegrave", "AaEeIiOoRrUu", + "ȀȁȄȅȈȉȌȍȐȑȔȕ"); // double grave + accents["rcap{\\imath}"] = "ȉ"; + accents["textroundcap{\\i}"] = "ȉ"; + buildaccent("rcap|textroundcap", "AaEeIiOoRrUu", + "ȂȃȆȇȊȋȎȏȒȓȖȗ"); // inverted breve + buildaccent("slashed", "oO", + "øØ"); // slashed +} + +/* + * Created accents in math or regexp environment + * are macros, but we need the utf8 equivalent + */ +void Intervall::removeAccents() +{ + if (accents.empty()) + buildAccentsMap(); + static regex const accre("\\\\(([\\S]|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))"); + smatch sub; + for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) { + sub = *itacc; + string key = sub.str(1); + if (accents.find(key) != accents.end()) { + string val = accents[key]; + size_t pos = sub.position(size_t(0)); + for (size_t i = 0; i < val.size(); i++) { + par[pos+i] = val[i]; + } + addIntervall(pos+val.size(), pos + sub.str(0).size()); + for (size_t i = pos+val.size(); i < pos + sub.str(0).size(); i++) { + // remove traces of any remaining chars + par[i] = ' '; + } + } + else { + LYXERR0("Not added accent for \"" << key << "\""); + } + } +} + +void Intervall::handleOpenP(int i) +{ + actualdeptindex++; + depts[actualdeptindex] = i+1; + closes[actualdeptindex] = -1; + checkDepthIndex(actualdeptindex); +} + +void Intervall::handleCloseP(int i, bool closingAllowed) +{ + if (actualdeptindex <= 0) { + if (! closingAllowed) + LYXERR(Debug::FIND, "Bad closing parenthesis in latex"); /* should not happen, but the latex input may be wrong */ + // if we are at the very end + addIntervall(i, i+1); + } + else { + closes[actualdeptindex] = i+1; + actualdeptindex--; + } +} + +void Intervall::resetOpenedP(int openPos) +{ + // Used as initializer for foreignlanguage entry + actualdeptindex = 1; + depts[1] = openPos+1; + closes[1] = -1; +} + +int Intervall::previousNotIgnored(int start) +{ + int idx = 0; /* int intervalls */ + for (idx = ignoreidx; idx >= 0; --idx) { + if (start > borders[idx].upper) + return start; + if (start >= borders[idx].low) + start = borders[idx].low-1; + } + return start; +} + +int Intervall::nextNotIgnored(int start) +{ + int idx = 0; /* int intervalls */ + for (idx = 0; idx <= ignoreidx; idx++) { + if (start < borders[idx].low) + return start; + if (start < borders[idx].upper) + start = borders[idx].upper; + } + return start; +} + +typedef map KeysMap; +typedef vector< KeyInfo> Entries; +static KeysMap keys = map(); + +class LatexInfo { + private: + int entidx_; + Entries entries_; + Intervall interval_; + void buildKeys(bool); + void buildEntries(bool); + void makeKey(const string &, KeyInfo, bool isPatternString); + void processRegion(int start, int region_end); /* remove {} parts */ + void removeHead(KeyInfo&, int count=0); + + public: + LatexInfo(string const & par, bool isPatternString) + : entidx_(-1), interval_(isPatternString, par) + { + buildKeys(isPatternString); + entries_ = vector(); + buildEntries(isPatternString); + }; + int getFirstKey() { + entidx_ = 0; + if (entries_.empty()) { + return -1; + } + if (entries_[0].keytype == KeyInfo::isTitle) { + if (! entries_[0].disabled) { + interval_.hasTitle = true; + interval_.titleValue = entries_[0].head; + } + else { + interval_.hasTitle = false; + interval_.titleValue = ""; + } + removeHead(entries_[0]); + if (entries_.size() > 1) + return 1; + else + return -1; + } + return 0; + }; + int getNextKey() { + entidx_++; + if (int(entries_.size()) > entidx_) { + return entidx_; + } + else { + return -1; + } + }; + bool setNextKey(int idx) { + if ((idx == entidx_) && (entidx_ >= 0)) { + entidx_--; + return true; + } + else + return false; + }; + int find(int start, KeyInfo::KeyType keytype) { + if (start < 0) + return -1; + int tmpIdx = start; + while (tmpIdx < int(entries_.size())) { + if (entries_[tmpIdx].keytype == keytype) + return tmpIdx; + tmpIdx++; + } + return -1; + }; + int process(ostringstream &os, KeyInfo &actual); + int dispatch(ostringstream &os, int previousStart, KeyInfo &actual); + // string show(int lastpos) { return interval.show(lastpos);}; + int nextNotIgnored(int start) { return interval_.nextNotIgnored(start);}; + KeyInfo &getKeyInfo(int keyinfo) { + static KeyInfo invalidInfo = KeyInfo(); + if ((keyinfo < 0) || ( keyinfo >= int(entries_.size()))) + return invalidInfo; + else + return entries_[keyinfo]; + }; + void setForDefaultLang(KeyInfo &defLang) {interval_.setForDefaultLang(defLang);}; + void addIntervall(int low, int up) { interval_.addIntervall(low, up); }; +}; + + +int Intervall::findclosing(int start, int end, char up = '{', char down = '}', int repeat = 1) +{ + int skip = 0; + int depth = 0; + for (int i = start; i < end; i += 1 + skip) { + char c; + c = par[i]; + skip = 0; + if (c == '\\') skip = 1; + else if (c == up) { + depth++; + } + else if (c == down) { + repeat--; + if (depth == 0) { + if ((repeat <= 0) || (par[i+1] != up)) + return i; + } + --depth; + } + } + return end; +} + +class MathInfo { + class MathEntry { + public: + string wait; + size_t mathEnd; + size_t mathStart; + size_t mathSize; + }; + size_t actualIdx_; + vector entries_; + public: + MathInfo() { + actualIdx_ = 0; + } + void insert(string const & wait, size_t start, size_t end) { + MathEntry m = MathEntry(); + m.wait = wait; + m.mathStart = start; + m.mathEnd = end; + m.mathSize = end - start; + entries_.push_back(m); + } + bool empty() { return entries_.empty(); }; + size_t getEndPos() { + if (entries_.empty() || (actualIdx_ >= entries_.size())) { + return 0; + } + return entries_[actualIdx_].mathEnd; + } + size_t getStartPos() { + if (entries_.empty() || (actualIdx_ >= entries_.size())) { + return 100000; /* definitely enough? */ + } + return entries_[actualIdx_].mathStart; + } + size_t getFirstPos() { + actualIdx_ = 0; + return getStartPos(); + } + size_t getSize() { + if (entries_.empty() || (actualIdx_ >= entries_.size())) { + return size_t(0); + } + return entries_[actualIdx_].mathSize; + } + void incrEntry() { actualIdx_++; }; }; +void LatexInfo::buildEntries(bool isPatternString) +{ + static regex const rmath("\\$|\\\\\\[|\\\\\\]|\\\\(begin|end)\\{((eqnarray|equation|flalign|gather|multline|align|alignat)\\*?)\\}"); + static regex const rkeys("\\$|\\\\\\[|\\\\\\]|\\\\((([a-zA-Z]+\\*?)(\\{([a-z]+\\*?)\\}|=[0-9]+[a-z]+)?))"); + static bool disableLanguageOverride = false; + smatch sub, submath; + bool evaluatingRegexp = false; + MathInfo mi; + bool evaluatingMath = false; + bool evaluatingCode = false; + size_t codeEnd = 0; + bool evaluatingOptional = false; + size_t optionalEnd = 0; + int codeStart = -1; + KeyInfo found; + bool math_end_waiting = false; + size_t math_pos = 10000; + string math_end; + + interval_.removeAccents(); + + for (sregex_iterator itmath(interval_.par.begin(), interval_.par.end(), rmath), end; itmath != end; ++itmath) { + submath = *itmath; + if (math_end_waiting) { + size_t pos = submath.position(size_t(0)); + if ((math_end == "$") && + (submath.str(0) == "$") && + (interval_.par[pos-1] != '\\')) { + mi.insert("$", math_pos, pos + 1); + math_end_waiting = false; + } + else if ((math_end == "\\]") && + (submath.str(0) == "\\]")) { + mi.insert("\\]", math_pos, pos + 2); + math_end_waiting = false; + } + else if ((submath.str(1).compare("end") == 0) && + (submath.str(2).compare(math_end) == 0)) { + mi.insert(math_end, math_pos, pos + submath.str(0).length()); + math_end_waiting = false; + } + else + continue; + } + else { + if (submath.str(1).compare("begin") == 0) { + math_end_waiting = true; + math_end = submath.str(2); + math_pos = submath.position(size_t(0)); + } + else if (submath.str(0).compare("\\[") == 0) { + math_end_waiting = true; + math_end = "\\]"; + math_pos = submath.position(size_t(0)); + } + else if (submath.str(0) == "$") { + size_t pos = submath.position(size_t(0)); + if ((pos == 0) || (interval_.par[pos-1] != '\\')) { + math_end_waiting = true; + math_end = "$"; + math_pos = pos; + } + } + } + } + // Ignore language if there is math somewhere in pattern-string + if (isPatternString) { + if (! mi.empty()) { + // Disable language + keys["foreignlanguage"].disabled = true; + disableLanguageOverride = true; + } + else + disableLanguageOverride = false; + } + else { + if (disableLanguageOverride) { + keys["foreignlanguage"].disabled = true; + } + } + math_pos = mi.getFirstPos(); + for (sregex_iterator it(interval_.par.begin(), interval_.par.end(), rkeys), end; it != end; ++it) { + sub = *it; + string key = sub.str(3); + if (key == "") { + if (sub.str(0)[0] == '\\') + key = sub.str(0)[1]; + else { + key = sub.str(0); + if (key == "$") { + size_t k_pos = sub.position(size_t(0)); + if ((k_pos > 0) && (interval_.par[k_pos - 1] == '\\')) { + // Escaped '$', ignoring + continue; + } + } + } + }; + if (evaluatingRegexp) { + if (sub.str(1).compare("endregexp") == 0) { + evaluatingRegexp = false; + // found._tokenstart already set + found._dataEnd = sub.position(size_t(0)) + 13; + found._dataStart = found._dataEnd; + found._tokensize = found._dataEnd - found._tokenstart; + found.parenthesiscount = 0; + found.head = interval_.par.substr(found._tokenstart, found._tokensize); + } + else { + continue; + } + } + else { + if (evaluatingMath) { + if (size_t(sub.position(size_t(0))) < mi.getEndPos()) + continue; + evaluatingMath = false; + mi.incrEntry(); + math_pos = mi.getStartPos(); + } + if (keys.find(key) == keys.end()) { + found = KeyInfo(KeyInfo::isStandard, 0, true); + if (isPatternString) { + found.keytype = KeyInfo::isChar; + found.disabled = false; + found.used = true; + } + keys[key] = found; + } + else + found = keys[key]; + if (key.compare("regexp") == 0) { + evaluatingRegexp = true; + found._tokenstart = sub.position(size_t(0)); + found._tokensize = 0; + continue; + } + } + // Handle the other params of key + if (found.keytype == KeyInfo::isIgnored) + continue; + else if (found.keytype == KeyInfo::isMath) { + if (size_t(sub.position(size_t(0))) == math_pos) { + found = keys[key]; + found._tokenstart = sub.position(size_t(0)); + found._tokensize = mi.getSize(); + found._dataEnd = found._tokenstart + found._tokensize; + found._dataStart = found._dataEnd; + found.parenthesiscount = 0; + found.head = interval_.par.substr(found._tokenstart, found._tokensize); + evaluatingMath = true; + } + else { + // begin|end of unknown env, discard + // First handle tables + // longtable|tabular + bool discardComment; + found = keys[key]; + found.keytype = KeyInfo::doRemove; + if ((sub.str(5).compare("longtable") == 0) || + (sub.str(5).compare("tabular") == 0)) { + discardComment = true; /* '%' */ + } + else { + discardComment = false; + static regex const removeArgs("^(multicols|multipar|sectionbox|subsectionbox|tcolorbox)$"); + smatch sub2; + string token = sub.str(5); + if (regex_match(token, sub2, removeArgs)) { + found.keytype = KeyInfo::removeWithArg; + } + } + // discard spaces before pos(0) + int pos = sub.position(size_t(0)); + int count; + for (count = 0; pos - count > 0; count++) { + char c = interval_.par[pos-count-1]; + if (discardComment) { + if ((c != ' ') && (c != '%')) + break; + } + else if (c != ' ') + break; + } + found._tokenstart = pos - count; + if (sub.str(1).compare(0, 5, "begin") == 0) { + size_t pos1 = pos + sub.str(0).length(); + if (sub.str(5).compare("cjk") == 0) { + pos1 = interval_.findclosing(pos1+1, interval_.par.length()) + 1; + if ((interval_.par[pos1] == '{') && (interval_.par[pos1+1] == '}')) + pos1 += 2; + found.keytype = KeyInfo::isMain; + found._dataStart = pos1; + found._dataEnd = interval_.par.length(); + found.disabled = keys["foreignlanguage"].disabled; + found.used = keys["foreignlanguage"].used; + found._tokensize = pos1 - found._tokenstart; + found.head = interval_.par.substr(found._tokenstart, found._tokensize); + } + else { + // Swallow possible optional params + while (interval_.par[pos1] == '[') { + pos1 = interval_.findclosing(pos1+1, interval_.par.length(), '[', ']')+1; + } + // Swallow also the eventual parameter + if (interval_.par[pos1] == '{') { + found._dataEnd = interval_.findclosing(pos1+1, interval_.par.length()) + 1; + } + else { + found._dataEnd = pos1; + } + found._dataStart = found._dataEnd; + found._tokensize = count + found._dataEnd - pos; + found.parenthesiscount = 0; + found.head = interval_.par.substr(found._tokenstart, found._tokensize); + found.disabled = true; + } + } + else { + // Handle "\end{...}" + found._dataStart = pos + sub.str(0).length(); + found._dataEnd = found._dataStart; + found._tokensize = count + found._dataEnd - pos; + found.parenthesiscount = 0; + found.head = interval_.par.substr(found._tokenstart, found._tokensize); + found.disabled = true; + } + } + } + else if (found.keytype != KeyInfo::isRegex) { + found._tokenstart = sub.position(size_t(0)); + if (found.parenthesiscount == 0) { + // Probably to be discarded + size_t following_pos = sub.position(size_t(0)) + sub.str(3).length() + 1; + char following = interval_.par[following_pos]; + if (following == ' ') + found.head = "\\" + sub.str(3) + " "; + else if (following == '=') { + // like \uldepth=1000pt + found.head = sub.str(0); + } + else + found.head = "\\" + key; + found._tokensize = found.head.length(); + found._dataEnd = found._tokenstart + found._tokensize; + found._dataStart = found._dataEnd; + } + else { + int params = found._tokenstart + key.length() + 1; + if (evaluatingOptional) { + if (size_t(found._tokenstart) > optionalEnd) { + evaluatingOptional = false; + } + else { + found.disabled = true; + } + } + int optend = params; + while (interval_.par[optend] == '[') { + // discard optional parameters + optend = interval_.findclosing(optend+1, interval_.par.length(), '[', ']') + 1; + } + if (optend > params) { + key += interval_.par.substr(params, optend-params); + evaluatingOptional = true; + optionalEnd = optend; + } + string token = sub.str(5); + int closings = found.parenthesiscount; + if (found.parenthesiscount == 1) { + found.head = "\\" + key + "{"; + } + else if (found.parenthesiscount > 1) { + if (token != "") { + found.head = sub.str(0) + "{"; + closings = found.parenthesiscount - 1; + } + else { + found.head = "\\" + key + "{"; + } + } + found._tokensize = found.head.length(); + found._dataStart = found._tokenstart + found.head.length(); + if (found.keytype == KeyInfo::doRemove) { + int endpar = 2 + interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings); + found._dataStart = endpar; + found._tokensize = found._dataStart - found._tokenstart; + closings = 1; + } + if (interval_.par.substr(found._dataStart-1, 15).compare("\\endarguments{}") == 0) { + found._dataStart += 15; + } + size_t endpos = interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings); + if (found.keytype == KeyInfo::isList) { + // Check if it really is list env + static regex const listre("^([a-z]+)$"); + smatch sub2; + if (!regex_match(token, sub2, listre)) { + // Change the key of this entry. It is not in a list/item environment + found.keytype = KeyInfo::endArguments; + } + } + if (found.keytype == KeyInfo::noMain) { + evaluatingCode = true; + codeEnd = endpos; + codeStart = found._dataStart; + } + else if (evaluatingCode) { + if (size_t(found._dataStart) > codeEnd) + evaluatingCode = false; + else if (found.keytype == KeyInfo::isMain) { + // Disable this key, treate it as standard + found.keytype = KeyInfo::isStandard; + found.disabled = true; + if ((codeEnd == interval_.par.length()) && + (found._tokenstart == codeStart)) { + // trickery, because the code inset starts + // with \selectlanguage ... + codeEnd = endpos; + if (entries_.size() > 1) { + entries_[entries_.size()-1]._dataEnd = codeEnd; + } + } + } + } + if ((endpos == interval_.par.length()) && + (found.keytype == KeyInfo::doRemove)) { + // Missing closing => error in latex-input? + // therefore do not delete remaining data + found._dataStart -= 1; + found._dataEnd = found._dataStart; + } + else + found._dataEnd = endpos; + } + if (isPatternString) { + keys[key].used = true; + } + } + entries_.push_back(found); + } +} + +void LatexInfo::makeKey(const string &keysstring, KeyInfo keyI, bool isPatternString) +{ + stringstream s(keysstring); + string key; + const char delim = '|'; + while (getline(s, key, delim)) { + KeyInfo keyII(keyI); + if (isPatternString) { + keyII.used = false; + } + else if ( !keys[key].used) + keyII.disabled = true; + keys[key] = keyII; + } +} + +void LatexInfo::buildKeys(bool isPatternString) +{ + + static bool keysBuilt = false; + if (keysBuilt && !isPatternString) return; + + // Known standard keys with 1 parameter. + // Split is done, if not at start of region + makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString); + makeKey("textbf", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getSeries()), isPatternString); + makeKey("textit|textsc|textsl", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getShape()), isPatternString); + makeKey("uuline|uline|uwave", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getUnderline()), isPatternString); + makeKey("emph|noun", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getMarkUp()), isPatternString); + makeKey("sout|xout", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getStrikeOut()), isPatternString); + + makeKey("section|subsection|subsubsection|paragraph|subparagraph|minisec", + KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString); + makeKey("section*|subsection*|subsubsection*|paragraph*", + KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString); + makeKey("part|part*|chapter|chapter*", KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString); + makeKey("title|subtitle|author|subject|publishers|dedication|uppertitleback|lowertitleback|extratitle|lyxaddress|lyxrightaddress", KeyInfo(KeyInfo::isTitle, 1, ignoreFormats.getFrontMatter()), isPatternString); + // Regex + makeKey("regexp", KeyInfo(KeyInfo::isRegex, 1, false), isPatternString); + + // Split is done, if not at start of region + makeKey("textcolor", KeyInfo(KeyInfo::isStandard, 2, ignoreFormats.getColor()), isPatternString); + makeKey("latexenvironment", KeyInfo(KeyInfo::isStandard, 2, false), isPatternString); + + // Split is done always. + makeKey("foreignlanguage", KeyInfo(KeyInfo::isMain, 2, ignoreFormats.getLanguage()), isPatternString); + + // Known charaters + // No split + makeKey("backslash|textbackslash|slash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("textasciicircum|textasciitilde", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("textasciiacute|texemdash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("dots|ldots", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + // Spaces + makeKey("quad|qquad|hfill|dotfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("textvisiblespace|nobreakspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("negthickspace|negmedspace|negthinspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + // Skip + // makeKey("enskip|smallskip|medskip|bigskip|vfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + // Custom space/skip, remove the content (== length value) + makeKey("vspace|vspace*|hspace|hspace*|mspace", KeyInfo(KeyInfo::noContent, 1, false), isPatternString); + // Found in fr/UserGuide.lyx + makeKey("og|fg", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + // quotes + makeKey("textquotedbl|quotesinglbase|lyxarrow", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("textquotedblleft|textquotedblright", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + // Known macros to remove (including their parameter) + // No split + makeKey("input|inputencoding|label|ref|index|bibitem", KeyInfo(KeyInfo::doRemove, 1, false), isPatternString); + makeKey("addtocounter|setlength", KeyInfo(KeyInfo::noContent, 2, true), isPatternString); + // handle like standard keys with 1 parameter. + makeKey("url|href|vref|thanks", KeyInfo(KeyInfo::isStandard, 1, false), isPatternString); + + // Ignore deleted text + makeKey("lyxdeleted", KeyInfo(KeyInfo::doRemove, 2, false), isPatternString); + // but preserve added text + makeKey("lyxadded", KeyInfo(KeyInfo::doRemove, 1, false), isPatternString); + + // Macros to remove, but let the parameter survive + // No split + makeKey("menuitem|textmd|textrm", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); + + // Remove language spec from content of these insets + makeKey("code", KeyInfo(KeyInfo::noMain, 1, false), isPatternString); + + // Same effect as previous, parameter will survive (because there is no one anyway) + // No split + makeKey("noindent|textcompwordmark|maketitle", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString); + // Remove table decorations + makeKey("hline|tabularnewline|toprule|bottomrule|midrule", KeyInfo(KeyInfo::doRemove, 0, true), isPatternString); + // Discard shape-header. + // For footnote or shortcut too, because of lang settings + // and wrong handling if used 'KeyInfo::noMain' + makeKey("circlepar|diamondpar|heartpar|nutpar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); + makeKey("trianglerightpar|hexagonpar|starpar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); + makeKey("triangleuppar|triangledownpar|droppar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); + makeKey("triangleleftpar|shapepar|dropuppar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); + makeKey("hphantom|vphantom|footnote|shortcut|include|includegraphics", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); + makeKey("parbox", KeyInfo(KeyInfo::doRemove, 1, true), isPatternString); + // like ('tiny{}' or '\tiny ' ... ) + makeKey("footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge", KeyInfo(KeyInfo::isSize, 0, false), isPatternString); + + // Survives, like known character + makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("item|listitem", KeyInfo(KeyInfo::isList, 1, false), isPatternString); + + makeKey("begin|end", KeyInfo(KeyInfo::isMath, 1, false), isPatternString); + makeKey("[|]", KeyInfo(KeyInfo::isMath, 1, false), isPatternString); + makeKey("$", KeyInfo(KeyInfo::isMath, 1, false), isPatternString); + + makeKey("par|uldepth|ULdepth|protect|nobreakdash|medskip|relax", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString); + // Remove RTL/LTR marker + makeKey("l|r|textlr|textfr|textar|beginl|endl", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString); + makeKey("lettrine", KeyInfo(KeyInfo::cleanToStart, 0, true), isPatternString); + makeKey("lyxslide", KeyInfo(KeyInfo::isSectioning, 1, true), isPatternString); + makeKey("endarguments", KeyInfo(KeyInfo::endArguments, 0, true), isPatternString); + makeKey("twocolumn", KeyInfo(KeyInfo::removeWithArg, 2, true), isPatternString); + makeKey("tnotetext|ead|fntext|cortext|address", KeyInfo(KeyInfo::removeWithArg, 0, true), isPatternString); + makeKey("lyxend", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString); + if (isPatternString) { + // Allow the first searched string to rebuild the keys too + keysBuilt = false; + } + else { + // no need to rebuild again + keysBuilt = true; + } +} + +/* + * Keep the list of actual opened parentheses actual + * (e.g. depth == 4 means there are 4 '{' not processed yet) + */ +void Intervall::handleParentheses(int lastpos, bool closingAllowed) +{ + int skip = 0; + for (int i = depts[actualdeptindex]; i < lastpos; i+= 1 + skip) { + char c; + c = par[i]; + skip = 0; + if (c == '\\') skip = 1; + else if (c == '{') { + handleOpenP(i); + } + else if (c == '}') { + handleCloseP(i, closingAllowed); + } + } +} + +#if (0) +string Intervall::show(int lastpos) +{ + int idx = 0; /* int intervalls */ + string s; + int i = 0; + for (idx = 0; idx <= ignoreidx; idx++) { + while (i < lastpos) { + int printsize; + if (i <= borders[idx].low) { + if (borders[idx].low > lastpos) + printsize = lastpos - i; + else + printsize = borders[idx].low - i; + s += par.substr(i, printsize); + i += printsize; + if (i >= borders[idx].low) + i = borders[idx].upper; + } + else { + i = borders[idx].upper; + break; + } + } + } + if (lastpos > i) { + s += par.substr(i, lastpos-i); + } + return s; +} +#endif + +void Intervall::output(ostringstream &os, int lastpos) +{ + // get number of chars to output + int idx = 0; /* int intervalls */ + int i = 0; + int printed = 0; + string startTitle = titleValue; + for (idx = 0; idx <= ignoreidx; idx++) { + if (i < lastpos) { + if (i <= borders[idx].low) { + int printsize; + if (borders[idx].low > lastpos) + printsize = lastpos - i; + else + printsize = borders[idx].low - i; + if (printsize > 0) { + os << startTitle << par.substr(i, printsize); + i += printsize; + printed += printsize; + startTitle = ""; + } + handleParentheses(i, false); + if (i >= borders[idx].low) + i = borders[idx].upper; + } + else { + i = borders[idx].upper; + } + } + else + break; + } + if (lastpos > i) { + os << startTitle << par.substr(i, lastpos-i); + printed += lastpos-i; + } + handleParentheses(lastpos, false); + int startindex; + if (keys["foreignlanguage"].disabled) + startindex = actualdeptindex-langcount; + else + startindex = actualdeptindex; + for (int i = startindex; i > 0; --i) { + os << "}"; + } + if (hasTitle && (printed > 0)) + os << "}"; + if (! isPatternString_) + os << "\n"; + handleParentheses(lastpos, true); /* extra closings '}' allowed here */ +} + +void LatexInfo::processRegion(int start, int region_end) +{ + while (start < region_end) { /* Let {[} and {]} survive */ + int cnt = interval_.isOpeningPar(start); + if (cnt == 1) { + // Closing is allowed past the region + int closing = interval_.findclosing(start+1, interval_.par.length()); + interval_.addIntervall(start, start+1); + interval_.addIntervall(closing, closing+1); + } + else if (cnt == 3) + start += 2; + start = interval_.nextNotIgnored(start+1); + } +} + +void LatexInfo::removeHead(KeyInfo &actual, int count) +{ + if (actual.parenthesiscount == 0) { + // "{\tiny{} ...}" ==> "{{} ...}" + interval_.addIntervall(actual._tokenstart-count, actual._tokenstart + actual._tokensize); + } + else { + // Remove header hull, that is "\url{abcd}" ==> "abcd" + interval_.addIntervall(actual._tokenstart - count, actual._dataStart); + interval_.addIntervall(actual._dataEnd, actual._dataEnd+1); + } +} + +int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) +{ + int nextKeyIdx = 0; + switch (actual.keytype) + { + case KeyInfo::isTitle: { + removeHead(actual); + nextKeyIdx = getNextKey(); + break; + } + case KeyInfo::cleanToStart: { + actual._dataEnd = actual._dataStart; + nextKeyIdx = getNextKey(); + // Search for end of arguments + int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments); + if (tmpIdx > 0) { + for (int i = nextKeyIdx; i <= tmpIdx; i++) { + entries_[i].disabled = true; + } + actual._dataEnd = entries_[tmpIdx]._dataEnd; + } + while (interval_.par[actual._dataEnd] == ' ') + actual._dataEnd++; + interval_.addIntervall(0, actual._dataEnd+1); + interval_.actualdeptindex = 0; + interval_.depts[0] = actual._dataEnd+1; + interval_.closes[0] = -1; + break; + } + case KeyInfo::noContent: { /* char like "\hspace{2cm}" */ + if (actual.disabled) + interval_.addIntervall(actual._tokenstart, actual._dataEnd); + else + interval_.addIntervall(actual._dataStart, actual._dataEnd); + } + // fall through + case KeyInfo::isChar: { + nextKeyIdx = getNextKey(); + break; + } + case KeyInfo::isSize: { + if (actual.disabled || (interval_.par[actual._dataStart] != '{') || (interval_.par[actual._dataStart-1] == ' ')) { + processRegion(actual._dataEnd, actual._dataEnd+1); /* remove possibly following {} */ + interval_.addIntervall(actual._tokenstart, actual._dataEnd+1); + nextKeyIdx = getNextKey(); + } else { + // Here _dataStart points to '{', so correct it + actual._dataStart += 1; + actual._tokensize += 1; + actual.parenthesiscount = 1; + if (interval_.par[actual._dataStart] == '}') { + // Determine the end if used like '{\tiny{}...}' + actual._dataEnd = interval_.findclosing(actual._dataStart+1, interval_.par.length()) + 1; + interval_.addIntervall(actual._dataStart, actual._dataStart+1); + } + else { + // Determine the end if used like '\tiny{...}' + actual._dataEnd = interval_.findclosing(actual._dataStart, interval_.par.length()) + 1; + } + // Split on this key if not at start + int start = interval_.nextNotIgnored(previousStart); + if (start < actual._tokenstart) { + interval_.output(os, actual._tokenstart); + interval_.addIntervall(start, actual._tokenstart); + } + // discard entry if at end of actual + nextKeyIdx = process(os, actual); + } + break; + } + case KeyInfo::endArguments: + // Remove trailing '{}' too + actual._dataStart += 1; + actual._dataEnd += 1; + interval_.addIntervall(actual._tokenstart, actual._dataEnd+1); + nextKeyIdx = getNextKey(); + break; + case KeyInfo::noMain: + // fall through + case KeyInfo::isStandard: { + if (actual.disabled) { + removeHead(actual); + processRegion(actual._dataStart, actual._dataStart+1); + nextKeyIdx = getNextKey(); + } else { + // Split on this key if not at datastart of calling entry + int start = interval_.nextNotIgnored(previousStart); + if (start < actual._tokenstart) { + interval_.output(os, actual._tokenstart); + interval_.addIntervall(start, actual._tokenstart); + } + // discard entry if at end of actual + nextKeyIdx = process(os, actual); + } + break; + } + case KeyInfo::removeWithArg: { + nextKeyIdx = getNextKey(); + // Search for end of arguments + int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments); + if (tmpIdx > 0) { + for (int i = nextKeyIdx; i <= tmpIdx; i++) { + entries_[i].disabled = true; + } + actual._dataEnd = entries_[tmpIdx]._dataEnd; + } + interval_.addIntervall(actual._tokenstart, actual._dataEnd+1); + break; + } + case KeyInfo::doRemove: { + // Remove the key with all parameters and following spaces + size_t pos; + for (pos = actual._dataEnd+1; pos < interval_.par.length(); pos++) { + if ((interval_.par[pos] != ' ') && (interval_.par[pos] != '%')) + break; + } + // Remove also enclosing parentheses [] and {} + int numpars = 0; + int spaces = 0; + while (actual._tokenstart > numpars) { + if (interval_.par[pos+numpars] == ']' && interval_.par[actual._tokenstart-numpars-1] == '[') + numpars++; + else if (interval_.par[pos+numpars] == '}' && interval_.par[actual._tokenstart-numpars-1] == '{') + numpars++; + else + break; + } + if (numpars > 0) { + if (interval_.par[pos+numpars] == ' ') + spaces++; + } + + interval_.addIntervall(actual._tokenstart-numpars, pos+numpars+spaces); + nextKeyIdx = getNextKey(); + break; + } + case KeyInfo::isList: { + // Discard space before _tokenstart + int count; + for (count = 0; count < actual._tokenstart; count++) { + if (interval_.par[actual._tokenstart-count-1] != ' ') + break; + } + nextKeyIdx = getNextKey(); + int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments); + if (tmpIdx > 0) { + // Special case: \item is not a list, but a command (like in Style Author_Biography in maa-monthly.layout) + // with arguments + // How else can we catch this one? + for (int i = nextKeyIdx; i <= tmpIdx; i++) { + entries_[i].disabled = true; + } + actual._dataEnd = entries_[tmpIdx]._dataEnd; + } + else if (nextKeyIdx > 0) { + // Ignore any lang entries inside data region + for (int i = nextKeyIdx; i < int(entries_.size()) && entries_[i]._tokenstart < actual._dataEnd; i++) { + if (entries_[i].keytype == KeyInfo::isMain) + entries_[i].disabled = true; + } + } + if (actual.disabled) { + interval_.addIntervall(actual._tokenstart-count, actual._dataEnd+1); + } + else { + interval_.addIntervall(actual._tokenstart-count, actual._tokenstart); + } + if (interval_.par[actual._dataEnd+1] == '[') { + int posdown = interval_.findclosing(actual._dataEnd+2, interval_.par.length(), '[', ']'); + if ((interval_.par[actual._dataEnd+2] == '{') && + (interval_.par[posdown-1] == '}')) { + interval_.addIntervall(actual._dataEnd+1,actual._dataEnd+3); + interval_.addIntervall(posdown-1, posdown+1); + } + else { + interval_.addIntervall(actual._dataEnd+1, actual._dataEnd+2); + interval_.addIntervall(posdown, posdown+1); + } + int blk = interval_.nextNotIgnored(actual._dataEnd+1); + if (blk > posdown) { + // Discard at most 1 space after empty item + int count; + for (count = 0; count < 1; count++) { + if (interval_.par[blk+count] != ' ') + break; + } + if (count > 0) + interval_.addIntervall(blk, blk+count); + } + } + break; + } + case KeyInfo::isSectioning: { + // Discard spaces before _tokenstart + int count; + int val = actual._tokenstart; + for (count = 0; count < actual._tokenstart;) { + val = interval_.previousNotIgnored(val-1); + if (interval_.par[val] != ' ') + break; + else { + count = actual._tokenstart - val; + } + } + if (actual.disabled) { + removeHead(actual, count); + nextKeyIdx = getNextKey(); + } else { + interval_.addIntervall(actual._tokenstart-count, actual._tokenstart); + nextKeyIdx = process(os, actual); + } + break; + } + case KeyInfo::isMath: { + // Same as regex, use the content unchanged + nextKeyIdx = getNextKey(); + break; + } + case KeyInfo::isRegex: { + // DO NOT SPLIT ON REGEX + // Do not disable + nextKeyIdx = getNextKey(); + break; + } + case KeyInfo::isIgnored: { + // Treat like a character for now + nextKeyIdx = getNextKey(); + break; + } + case KeyInfo::isMain: { + if (interval_.par.substr(actual._dataStart, 2) == "% ") + interval_.addIntervall(actual._dataStart, actual._dataStart+2); + if (actual._tokenstart > 0) { + int prev = interval_.previousNotIgnored(actual._tokenstart - 1); + if ((prev >= 0) && interval_.par[prev] == '%') + interval_.addIntervall(prev, prev+1); + } + if (actual.disabled) { + removeHead(actual); + interval_.langcount++; + if ((interval_.par.substr(actual._dataStart, 3) == " \\[") || + (interval_.par.substr(actual._dataStart, 8) == " \\begin{")) { + // Discard also the space before math-equation + interval_.addIntervall(actual._dataStart, actual._dataStart+1); + } + nextKeyIdx = getNextKey(); + // interval.resetOpenedP(actual._dataStart-1); + } + else { + if (actual._tokenstart < 26) { + // for the first (and maybe dummy) language + interval_.setForDefaultLang(actual); + } + interval_.resetOpenedP(actual._dataStart-1); + } + break; + } + case KeyInfo::invalid: + // This cannot happen, already handled + // fall through + default: { + // LYXERR0("Unhandled keytype"); + nextKeyIdx = getNextKey(); + break; + } + } + return nextKeyIdx; +} + +int LatexInfo::process(ostringstream &os, KeyInfo &actual ) +{ + int end = interval_.nextNotIgnored(actual._dataEnd); + int oldStart = actual._dataStart; + int nextKeyIdx = getNextKey(); + while (true) { + if ((nextKeyIdx < 0) || + (entries_[nextKeyIdx]._tokenstart >= actual._dataEnd) || + (entries_[nextKeyIdx].keytype == KeyInfo::invalid)) { + if (oldStart <= end) { + processRegion(oldStart, end); + oldStart = end+1; + } + break; + } + KeyInfo &nextKey = getKeyInfo(nextKeyIdx); + + if ((nextKey.keytype == KeyInfo::isMain) && !nextKey.disabled) { + (void) dispatch(os, actual._dataStart, nextKey); + end = nextKey._tokenstart; + break; + } + processRegion(oldStart, nextKey._tokenstart); + nextKeyIdx = dispatch(os, actual._dataStart, nextKey); + + oldStart = nextKey._dataEnd+1; + } + // now nextKey is either invalid or is outside of actual._dataEnd + // output the remaining and discard myself + if (oldStart <= end) { + processRegion(oldStart, end); + } + if (interval_.par[end] == '}') { + end += 1; + // This is the normal case. + // But if using the firstlanguage, the closing may be missing + } + // get minimum of 'end' and 'actual._dataEnd' in case that the nextKey.keytype was 'KeyInfo::isMain' + int output_end; + if (actual._dataEnd < end) + output_end = interval_.nextNotIgnored(actual._dataEnd); + else + output_end = interval_.nextNotIgnored(end); + if ((actual.keytype == KeyInfo::isMain) && actual.disabled) { + interval_.addIntervall(actual._tokenstart, actual._tokenstart+actual._tokensize); + } + // Remove possible empty data + int dstart = interval_.nextNotIgnored(actual._dataStart); + while (interval_.isOpeningPar(dstart) == 1) { + interval_.addIntervall(dstart, dstart+1); + int dend = interval_.findclosing(dstart+1, output_end); + interval_.addIntervall(dend, dend+1); + dstart = interval_.nextNotIgnored(dstart+1); + } + if (dstart < output_end) + interval_.output(os, output_end); + interval_.addIntervall(actual._tokenstart, end); + return nextKeyIdx; +} + +string splitOnKnownMacros(string par, bool isPatternString) +{ + ostringstream os; + LatexInfo li(par, isPatternString); + // LYXERR0("Berfore split: " << par); + KeyInfo DummyKey = KeyInfo(KeyInfo::KeyType::isMain, 2, true); + DummyKey.head = ""; + DummyKey._tokensize = 0; + DummyKey._dataStart = 0; + DummyKey._dataEnd = par.length(); + DummyKey.disabled = true; + int firstkeyIdx = li.getFirstKey(); + string s; + if (firstkeyIdx >= 0) { + KeyInfo firstKey = li.getKeyInfo(firstkeyIdx); + DummyKey._tokenstart = firstKey._tokenstart; + int nextkeyIdx; + if ((firstKey.keytype != KeyInfo::isMain) || firstKey.disabled) { + // Use dummy firstKey + firstKey = DummyKey; + (void) li.setNextKey(firstkeyIdx); + } + else { + if (par.substr(firstKey._dataStart, 2) == "% ") + li.addIntervall(firstKey._dataStart, firstKey._dataStart+2); + } + nextkeyIdx = li.process(os, firstKey); + while (nextkeyIdx >= 0) { + // Check for a possible gap between the last + // entry and this one + int datastart = li.nextNotIgnored(firstKey._dataStart); + KeyInfo &nextKey = li.getKeyInfo(nextkeyIdx); + if ((nextKey._tokenstart > datastart)) { + // Handle the gap + firstKey._dataStart = datastart; + firstKey._dataEnd = par.length(); + (void) li.setNextKey(nextkeyIdx); + // Fake the last opened parenthesis + li.setForDefaultLang(firstKey); + nextkeyIdx = li.process(os, firstKey); + } + else { + if (nextKey.keytype != KeyInfo::isMain) { + firstKey._dataStart = datastart; + firstKey._dataEnd = nextKey._dataEnd+1; + (void) li.setNextKey(nextkeyIdx); + li.setForDefaultLang(firstKey); + nextkeyIdx = li.process(os, firstKey); + } + else { + nextkeyIdx = li.process(os, nextKey); + } + } + } + // Handle the remaining + firstKey._dataStart = li.nextNotIgnored(firstKey._dataStart); + firstKey._dataEnd = par.length(); + // Check if ! empty + if ((firstKey._dataStart < firstKey._dataEnd) && + (par[firstKey._dataStart] != '}')) { + li.setForDefaultLang(firstKey); + (void) li.process(os, firstKey); + } + s = os.str(); + if (s.empty()) { + // return string definitelly impossible to match + s = "\\foreignlanguage{ignore}{ }"; + } + } + else + s = par; /* no known macros found */ + // LYXERR0("After split: " << s); + return s; +} + +/* + * Try to unify the language specs in the latexified text. + * Resulting modified string is set to "", if + * the searched tex does not contain all the features in the search pattern + */ +static string correctlanguagesetting(string par, bool isPatternString, bool withformat) +{ + static Features regex_f; + static int missed = 0; + static bool regex_with_format = false; + + int parlen = par.length(); + + while ((parlen > 0) && (par[parlen-1] == '\n')) { + parlen--; + } + if (isPatternString && (parlen > 0) && (par[parlen-1] == '~')) { + // Happens to be there in case of description or labeling environment + parlen--; + } + string result; + if (withformat) { + // Split the latex input into pieces which + // can be digested by our search engine + LYXERR(Debug::FIND, "input: \"" << par << "\""); + result = splitOnKnownMacros(par.substr(0,parlen), isPatternString); + LYXERR(Debug::FIND, "After split: \"" << result << "\""); + } + else + result = par.substr(0, parlen); + if (isPatternString) { + missed = 0; + if (withformat) { + regex_f = identifyFeatures(result); + string features = ""; + for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) { + string a = it->first; + regex_with_format = true; + features += " " + a; + // LYXERR0("Identified regex format:" << a); + } + LYXERR(Debug::FIND, "Identified Features" << features); + + } + } else if (regex_with_format) { + Features info = identifyFeatures(result); + for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) { + string a = it->first; + bool b = it->second; + if (b && ! info[a]) { + missed++; + LYXERR(Debug::FIND, "Missed(" << missed << " " << a <<", srclen = " << parlen ); + return ""; + } + } + } + else { + // LYXERR0("No regex formats"); + } + return result; +} + + +// Remove trailing closure of math, macros and environments, so to catch parts of them. +static int identifyClosing(string & t) +{ + int open_braces = 0; + do { + LYXERR(Debug::FIND, "identifyClosing(): t now is '" << t << "'"); + if (regex_replace(t, t, "(.*[^\\\\])\\$" REGEX_EOS, "$1")) + continue; + if (regex_replace(t, t, "(.*[^\\\\]) \\\\\\]" REGEX_EOS, "$1")) + continue; + if (regex_replace(t, t, "(.*[^\\\\]) \\\\end\\{[a-zA-Z_]*\\*?\\}" REGEX_EOS, "$1")) + continue; + if (regex_replace(t, t, "(.*[^\\\\])\\}" REGEX_EOS, "$1")) { + ++open_braces; + continue; + } + break; + } while (true); + return open_braces; +} + +static int num_replaced = 0; +static bool previous_single_replace = true; MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt) : p_buf(&buf), p_first_buf(&buf), opt(opt) { - par_as_string = normalize(opt.search); + Buffer & find_buf = *theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true); + docstring const & ds = stringifySearchBuffer(find_buf, opt); + use_regexp = lyx::to_utf8(ds).find("\\regexp{") != std::string::npos; + if (opt.replace_all && previous_single_replace) { + previous_single_replace = false; + num_replaced = 0; + } + else if (!opt.replace_all) { + num_replaced = 0; // count number of replaced strings + previous_single_replace = true; + } + // When using regexp, braces are hacked already by escape_for_regex() + par_as_string = normalize(ds, !use_regexp); open_braces = 0; close_wildcards = 0; - if (! opt.regexp) { - // Remove trailing closure of math, macros and environments, so to catch parts of them. - do { - LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); - if (regex_replace(par_as_string, par_as_string, "(.*)[[:blank:]]\\'", "$1")) - continue; - if (regex_replace(par_as_string, par_as_string, "(.*[^\\\\]) ?\\$\\'", "$1")) - continue; - // @todo need to account for open square braces as well ? - if (regex_replace(par_as_string, par_as_string, "(.*[^\\\\]) ?\\\\\\]\\'", "$1")) - continue; - if (regex_replace(par_as_string, par_as_string, "(.*[^\\\\]) ?\\\\end\\{[a-zA-Z_]*\\}\\'", "$1")) - continue; - if (regex_replace(par_as_string, par_as_string, "(.*[^\\\\]) ?\\}\\'", "$1")) { - ++open_braces; - continue; - } - break; - } while (true); + size_t lead_size = 0; + // correct the language settings + par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat); + if (opt.ignoreformat) { + if (!use_regexp) { + // if par_as_string_nolead were emty, + // the following call to findAux will always *find* the string + // in the checked data, and thus always using the slow + // examining of the current text part. + par_as_string_nolead = par_as_string; + } + } else { + lead_size = identifyLeading(par_as_string); + LYXERR(Debug::FIND, "Lead_size: " << lead_size); + lead_as_string = par_as_string.substr(0, lead_size); + par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size); + } + + if (!use_regexp) { + open_braces = identifyClosing(par_as_string); + identifyClosing(par_as_string_nolead); LYXERR(Debug::FIND, "Open braces: " << open_braces); LYXERR(Debug::FIND, "Built MatchStringAdv object: par_as_string = '" << par_as_string << "'"); } else { - par_as_string = escape_for_regex(par_as_string); + string lead_as_regexp; + if (lead_size > 0) { + // @todo No need to search for \regexp{} insets in leading material + lead_as_regexp = escape_for_regex(par_as_string.substr(0, lead_size), !opt.ignoreformat); + par_as_string = par_as_string_nolead; + LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'"); + LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); + } + par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat); // Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them. LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); if ( // Insert .* before trailing '\$' ('$' has been escaped by escape_for_regex) regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\$)\\'", "$1(.*?)$2") - // Insert .* before trailing '\\\]' ('\]' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\\\\\\\\\])\\'", "$1(.*?)$2") - // Insert .* before trailing '\\end\{...}' ('\end{...}' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, - "(.*[^\\\\])(\\\\\\\\end\\\\\\{[a-zA-Z_]*\\\\\\})\\'", "$1(.*?)$2") - // Insert .* before trailing '\}' ('}' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\})\\'", "$1(.*?)$2") - ) { + // Insert .* before trailing '\\\]' ('\]' has been escaped by escape_for_regex) + || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])( \\\\\\\\\\\\\\])\\'", "$1(.*?)$2") + // Insert .* before trailing '\\end\{...}' ('\end{...}' has been escaped by escape_for_regex) + || regex_replace(par_as_string, par_as_string, + "(.*[^\\\\])( \\\\\\\\end\\\\\\{[a-zA-Z_]*)(\\\\\\*)?(\\\\\\})\\'", "$1(.*?)$2$3$4") + // Insert .* before trailing '\}' ('}' has been escaped by escape_for_regex) + || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\})\\'", "$1(.*?)$2") + ) { ++close_wildcards; } + if (!opt.ignoreformat) { + // Remove extra '\}' at end if not part of \{\.\} + size_t lng = par_as_string.size(); + while(lng > 2) { + if (par_as_string.substr(lng-2, 2).compare("\\}") == 0) { + if (lng >= 6) { + if (par_as_string.substr(lng-6,3).compare("\\{\\") == 0) + break; + } + lng -= 2; + open_braces++; + } + else + break; + } + if (lng < par_as_string.size()) + par_as_string = par_as_string.substr(0,lng); + /* + // save '\.' + regex_replace(par_as_string, par_as_string, "\\\\\\.", "_xxbdotxx_"); + // handle '.' -> '[^]', replace later as '[^\}\{\\]' + regex_replace(par_as_string, par_as_string, "\\.", "[^]"); + // replace '[^...]' with '[^...\}\{\\]' + regex_replace(par_as_string, par_as_string, "\\[\\^([^\\\\\\]]*)\\]", "_xxbrlxx_$1\\}\\{\\\\_xxbrrxx_"); + regex_replace(par_as_string, par_as_string, "_xxbrlxx_", "[^"); + regex_replace(par_as_string, par_as_string, "_xxbrrxx_", "]"); + // restore '\.' + regex_replace(par_as_string, par_as_string, "_xxbdotxx_", "\\."); + */ + } LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); LYXERR(Debug::FIND, "Open braces: " << open_braces); LYXERR(Debug::FIND, "Close .*? : " << close_wildcards); LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string); + // If entered regexp must match at begin of searched string buffer - regexp = lyx::regex(string("\\`") + par_as_string); - // If entered regexp may match wherever in searched string buffer - regexp2 = lyx::regex(string("\\`.*") + par_as_string); + // Kornel: Added parentheses to use $1 for size of the leading string + string regexp_str; + string regexp2_str; + { + // TODO: Adapt '\[12345678]' in par_as_string to acount for the first '() + // Unfortunately is '\1', '\2', etc not working for strings with extra format + // so the convert has no effect in that case + for (int i = 8; i > 0; --i) { + string orig = "\\\\" + std::to_string(i); + string dest = "\\" + std::to_string(i+1); + while (regex_replace(par_as_string, par_as_string, orig, dest)); + } + regexp_str = "(" + lead_as_regexp + ")" + par_as_string; + regexp2_str = "(" + lead_as_regexp + ").*?" + par_as_string; + } + LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); + regexp = lyx::regex(regexp_str); + + LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); + regexp2 = lyx::regex(regexp2_str); } } -int MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const +// Count number of characters in string +// {]} ==> 1 +// \& ==> 1 +// --- ==> 1 +// \\[a-zA-Z]+ ==> 1 +static int computeSize(string s, int len) { + if (len == 0) + return 0; + int skip = 1; + int count = 0; + for (int i = 0; i < len; i += skip, count++) { + if (s[i] == '\\') { + skip = 2; + if (isalpha(s[i+1])) { + for (int j = 2; i+j < len; j++) { + if (! isalpha(s[i+j])) { + if (s[i+j] == ' ') + skip++; + else if ((s[i+j] == '{') && s[i+j+1] == '}') + skip += 2; + else if ((s[i+j] == '{') && (i + j + 1 >= len)) + skip++; + break; + } + skip++; + } + } + } + else if (s[i] == '{') { + if (s[i+1] == '}') + skip = 2; + else + skip = 3; + } + else if (s[i] == '-') { + if (s[i+1] == '-') { + if (s[i+2] == '-') + skip = 3; + else + skip = 2; + } + else + skip = 1; + } + else { + skip = 1; + } + } + return count; +} + +MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const +{ + MatchResult mres; + + if (at_begin && + (opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) ) + return mres; + docstring docstr = stringifyFromForSearch(opt, cur, len); + string str = normalize(docstr, true); + if (!opt.ignoreformat) { + str = correctlanguagesetting(str, false, !opt.ignoreformat); + } + if (str.empty()) { + mres.match_len = -1; + return mres; + } LYXERR(Debug::FIND, "Matching against '" << lyx::to_utf8(docstr) << "'"); - string str = normalize(docstr); LYXERR(Debug::FIND, "After normalization: '" << str << "'"); - if (! opt.regexp) { - LYXERR(Debug::FIND, "Searching in normal mode: par_as_string='" << par_as_string << "', str='" << str << "'"); + + if (use_regexp) { + LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin); + regex const *p_regexp; + regex_constants::match_flag_type flags; if (at_begin) { - LYXERR(Debug::FIND, "size=" << par_as_string.size() << ", substr='" << str.substr(0, par_as_string.size()) << "'"); - if (str.substr(0, par_as_string.size()) == par_as_string) - return par_as_string.size(); + flags = regex_constants::match_continuous; + p_regexp = ®exp; } else { - string t = par_as_string; - while (regex_replace(t, t, "\\\\(emph|textbf|subsubsection|subsection|section|subparagraph|paragraph|part)\\{", "") - || regex_replace(t, t, "^\\$", "") - || regex_replace(t, t, "^\\\\\\[ ", "")) - LYXERR(Debug::FIND, " after removing leading $, \\[ , \\emph{, \\textbf{, etc.: " << t); - size_t pos = str.find(t); - if (pos != string::npos) - return par_as_string.size(); + flags = regex_constants::match_default; + p_regexp = ®exp2; } - } else { - LYXERR(Debug::FIND, "Searching in regexp mode"); - // Try all possible regexp matches, - //until one that verifies the braces match test is found - regex const *p_regexp = at_begin ? ®exp : ®exp2; - sregex_iterator re_it(str.begin(), str.end(), *p_regexp); - sregex_iterator re_it_end; - for (; re_it != re_it_end; ++re_it) { - match_results const & m = *re_it; + sregex_iterator re_it(str.begin(), str.end(), *p_regexp, flags); + if (re_it == sregex_iterator()) + return mres; + match_results const & m = *re_it; + + if (0) { // Kornel Benko: DO NOT CHECKK // Check braces on the segment that matched the entire regexp expression, // plus the last subexpression, if a (.*?) was inserted in the constructor. if (!braces_match(m[0].first, m[0].second, open_braces)) - return 0; - // Check braces on segments that matched all (.*?) subexpressions. - for (size_t i = 1; i < m.size(); ++i) - if (!braces_match(m[i].first, m[i].second)) - return false; - // Exclude from the returned match length any length - // due to close wildcards added at end of regexp - if (close_wildcards == 0) - return m[0].second - m[0].first; + return mres; + } + + // Check braces on segments that matched all (.*?) subexpressions, + // except the last "padding" one inserted by lyx. + for (size_t i = 1; i < m.size() - 1; ++i) + if (!braces_match(m[i].first, m[i].second, open_braces)) + return mres; + + // Exclude from the returned match length any length + // due to close wildcards added at end of regexp + // and also the length of the leading (e.g. '\emph{}') + // + // Whole found string, including the leading: m[0].second - m[0].first + // Size of the leading string: m[1].second - m[1].first + int leadingsize = 0; + if (m.size() > 1) + leadingsize = m[1].second - m[1].first; + int result; + for (size_t i = 0; i < m.size(); i++) { + LYXERR(Debug::FIND, "Match " << i << " is " << m[i].second - m[i].first << " long"); + } + if (close_wildcards == 0) + result = m[0].second - m[0].first; + + else + result = m[m.size() - close_wildcards].first - m[0].first; + + size_t pos = m.position(size_t(0)); + // Ignore last closing characters + while (result > 0) { + if (str[pos+result-1] == '}') + --result; else - return m[m.size() - close_wildcards].first - m[0].first; + break; } + if (result > leadingsize) + result -= leadingsize; + else + result = 0; + mres.match_len = computeSize(str.substr(pos+leadingsize,result), result); + mres.match2end = str.size() - pos - leadingsize; + mres.pos = pos+leadingsize; + return mres; } - return 0; + + // else !use_regexp: but all code paths above return + LYXERR(Debug::FIND, "Searching in normal mode: par_as_string='" + << par_as_string << "', str='" << str << "'"); + LYXERR(Debug::FIND, "Searching in normal mode: lead_as_string='" + << lead_as_string << "', par_as_string_nolead='" + << par_as_string_nolead << "'"); + + if (at_begin) { + LYXERR(Debug::FIND, "size=" << par_as_string.size() + << ", substr='" << str.substr(0, par_as_string.size()) << "'"); + if (str.substr(0, par_as_string.size()) == par_as_string) { + mres.match_len = par_as_string.size(); + mres.match2end = str.size(); + mres.pos = 0; + return mres; + } + } else { + // Start the search _after_ the leading part + size_t pos = str.find(par_as_string_nolead, lead_as_string.size()); + if (pos != string::npos) { + mres.match_len = par_as_string.size(); + mres.match2end = str.size() - pos; + mres.pos = pos; + return mres; + } + } + return mres; } -int MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin) const +MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin) const { - int res = findAux(cur, len, at_begin); + MatchResult mres = findAux(cur, len, at_begin); + int res = mres.match_len; + LYXERR(Debug::FIND, + "res=" << res << ", at_begin=" << at_begin + << ", matchword=" << opt.matchword + << ", inTexted=" << cur.inTexted()); if (res == 0 || !at_begin || !opt.matchword || !cur.inTexted()) - return res; + return mres; + if ((len > 0) && (res < len)) { + mres.match_len = 0; + return mres; + } Paragraph const & par = cur.paragraph(); - bool ws_left = cur.pos() > 0 ? - par.isWordSeparator(cur.pos() - 1) : true; - bool ws_right = cur.pos() + res < par.size() ? - par.isWordSeparator(cur.pos() + res) : true; + bool ws_left = (cur.pos() > 0) + ? par.isWordSeparator(cur.pos() - 1) + : true; + bool ws_right = (cur.pos() + len < par.size()) + ? par.isWordSeparator(cur.pos() + len) + : true; LYXERR(Debug::FIND, "cur.pos()=" << cur.pos() << ", res=" << res << ", separ: " << ws_left << ", " << ws_right + << ", len: " << len << endl); - if (ws_left && ws_right) - return res; - return 0; + if (ws_left && ws_right) { + // Check for word separators inside the found 'word' + for (int i = 0; i < len; i++) { + if (par.isWordSeparator(cur.pos() + i)) { + mres.match_len = 0; + return mres; + } + } + return mres; + } + mres.match_len = 0; + return mres; } -string MatchStringAdv::normalize(docstring const & s) const +string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const { string t; if (! opt.casesensitive) @@ -848,19 +3065,54 @@ string MatchStringAdv::normalize(docstring const & s) const else t = lyx::to_utf8(s); // Remove \n at begin - while (t.size() > 0 && t[0] == '\n') + while (!t.empty() && t[0] == '\n') t = t.substr(1); // Remove \n at end - while (t.size() > 0 && t[t.size() - 1] == '\n') + while (!t.empty() && t[t.size() - 1] == '\n') t = t.substr(0, t.size() - 1); size_t pos; - // Replace all other \n with spaces - while ((pos = t.find("\n")) != string::npos) - t.replace(pos, 1, " "); + // Handle all other '\n' + while ((pos = t.find("\n")) != string::npos) { + if (pos > 1 && t[pos-1] == '\\' && t[pos-2] == '\\' ) { + // Handle '\\\n' + if (isAlnumASCII(t[pos+1])) { + t.replace(pos-2, 3, " "); + } + else { + t.replace(pos-2, 3, ""); + } + } + else if (!isAlnumASCII(t[pos+1]) || !isAlnumASCII(t[pos-1])) { + // '\n' adjacent to non-alpha-numerics, discard + t.replace(pos, 1, ""); + } + else { + // Replace all other \n with spaces + t.replace(pos, 1, " "); + } + } // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify + // Kornel: Added textsl, textsf, textit, texttt and noun + // + allow to seach for colored text too LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, \\*section{} macros from: " << t); - while (regex_replace(t, t, "\\\\(emph|textbf|subsubsection|subsection|section|subparagraph|paragraph|part)(\\{\\})+", "")) + while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", "")) + LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); + while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", "")) LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); + + while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", "")); + // FIXME - check what preceeds the brace + if (hack_braces) { + if (opt.ignoreformat) + while (regex_replace(t, t, "\\{", "_x_<") + || regex_replace(t, t, "\\}", "_x_>")) + LYXERR(Debug::FIND, "After {} replacement: '" << t << "'"); + else + while (regex_replace(t, t, "\\\\\\{", "_x_<") + || regex_replace(t, t, "\\\\\\}", "_x_>")) + LYXERR(Debug::FIND, "After {} replacement: '" << t << "'"); + } + return t; } @@ -869,31 +3121,38 @@ docstring stringifyFromCursor(DocIterator const & cur, int len) { LYXERR(Debug::FIND, "Stringifying with len=" << len << " from cursor at pos: " << cur); if (cur.inTexted()) { - Paragraph const & par = cur.paragraph(); - // TODO what about searching beyond/across paragraph breaks ? - // TODO Try adding a AS_STR_INSERTS as last arg - pos_type end = ( len == -1 || cur.pos() + len > int(par.size()) ) ? - int(par.size()) : cur.pos() + len; - OutputParams runparams(&cur.buffer()->params().encoding()); - odocstringstream os; - runparams.nice = true; - runparams.flavor = OutputParams::LATEX; - runparams.linelen = 100000; //lyxrc.plaintext_linelen; - // No side effect of file copying and image conversion - runparams.dryrun = true; - LYXERR(Debug::FIND, "Stringifying with cur: " - << cur << ", from pos: " << cur.pos() << ", end: " << end); - return par.stringify(cur.pos(), end, AS_STR_INSETS, runparams); + Paragraph const & par = cur.paragraph(); + // TODO what about searching beyond/across paragraph breaks ? + // TODO Try adding a AS_STR_INSERTS as last arg + pos_type end = ( len == -1 || cur.pos() + len > int(par.size()) ) ? + int(par.size()) : cur.pos() + len; + // OutputParams runparams(&cur.buffer()->params().encoding()); + OutputParams runparams(encodings.fromLyXName("utf8")); + runparams.nice = true; + runparams.flavor = OutputParams::XETEX; + runparams.linelen = 10000; //lyxrc.plaintext_linelen; + // No side effect of file copying and image conversion + runparams.dryrun = true; + runparams.for_search = true; + LYXERR(Debug::FIND, "Stringifying with cur: " + << cur << ", from pos: " << cur.pos() << ", end: " << end); + return par.asString(cur.pos(), end, + AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, + &runparams); } else if (cur.inMathed()) { - odocstringstream os; - CursorSlice cs = cur.top(); - MathData md = cs.cell(); - MathData::const_iterator it_end = - ( ( len == -1 || cs.pos() + len > int(md.size()) ) - ? md.end() : md.begin() + cs.pos() + len ); - for (MathData::const_iterator it = md.begin() + cs.pos(); it != it_end; ++it) - os << *it; - return os.str(); + CursorSlice cs = cur.top(); + MathData md = cs.cell(); + MathData::const_iterator it_end = + (( len == -1 || cs.pos() + len > int(md.size())) + ? md.end() + : md.begin() + cs.pos() + len ); + MathData md2; + for (MathData::const_iterator it = md.begin() + cs.pos(); + it != it_end; ++it) + md2.push_back(*it); + docstring s = asString(md2); + LYXERR(Debug::FIND, "Stringified math: '" << s << "'"); + return s; } LYXERR(Debug::FIND, "Don't know how to stringify from here: " << cur); return docstring(); @@ -908,53 +3167,60 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) { LYXERR(Debug::FIND, "Latexifying with len=" << len << " from cursor at pos: " << cur); LYXERR(Debug::FIND, " with cur.lastpost=" << cur.lastpos() << ", cur.lastrow=" - << cur.lastrow() << ", cur.lastcol=" << cur.lastcol()); + << cur.lastrow() << ", cur.lastcol=" << cur.lastcol()); Buffer const & buf = *cur.buffer(); - LASSERT(buf.isLatex(), /* */); - TexRow texrow; odocstringstream ods; otexstream os(ods); - OutputParams runparams(&buf.params().encoding()); + //OutputParams runparams(&buf.params().encoding()); + OutputParams runparams(encodings.fromLyXName("utf8")); runparams.nice = false; - runparams.flavor = OutputParams::LATEX; + runparams.flavor = OutputParams::XETEX; runparams.linelen = 8000; //lyxrc.plaintext_linelen; // No side effect of file copying and image conversion runparams.dryrun = true; + runparams.for_search = true; if (cur.inTexted()) { // @TODO what about searching beyond/across paragraph breaks ? pos_type endpos = cur.paragraph().size(); if (len != -1 && endpos > cur.pos() + len) endpos = cur.pos() + len; - TeXOnePar(buf, *cur.innerText(), cur.pit(), os, texrow, runparams, - string(), cur.pos(), endpos); - LYXERR(Debug::FIND, "Latexified text: '" << lyx::to_utf8(ods.str()) << "'"); + TeXOnePar(buf, *cur.innerText(), cur.pit(), os, runparams, + string(), cur.pos(), endpos); + string s = lyx::to_utf8(ods.str()); + LYXERR(Debug::FIND, "Latexified +modified text: '" << s << "'"); + return(lyx::from_utf8(s)); } else if (cur.inMathed()) { // Retrieve the math environment type, and add '$' or '$[' or others (\begin{equation}) accordingly for (int s = cur.depth() - 1; s >= 0; --s) { - CursorSlice const & cs = cur[s]; - if (cs.asInsetMath() && cs.asInsetMath() && cs.asInsetMath()->asHullInset()) { - WriteStream ws(ods); - cs.asInsetMath()->asHullInset()->header_write(ws); - break; - } + CursorSlice const & cs = cur[s]; + if (cs.asInsetMath() && cs.asInsetMath()->asHullInset()) { + WriteStream ws(os); + cs.asInsetMath()->asHullInset()->header_write(ws); + break; + } } CursorSlice const & cs = cur.top(); MathData md = cs.cell(); - MathData::const_iterator it_end = ( ( len == -1 || cs.pos() + len > int(md.size()) ) - ? md.end() : md.begin() + cs.pos() + len ); - for (MathData::const_iterator it = md.begin() + cs.pos(); it != it_end; ++it) - ods << *it; - + MathData::const_iterator it_end = + ((len == -1 || cs.pos() + len > int(md.size())) + ? md.end() + : md.begin() + cs.pos() + len); + MathData md2; + for (MathData::const_iterator it = md.begin() + cs.pos(); + it != it_end; ++it) + md2.push_back(*it); + + ods << asString(md2); // Retrieve the math environment type, and add '$' or '$]' // or others (\end{equation}) accordingly for (int s = cur.depth() - 1; s >= 0; --s) { - CursorSlice const & cs = cur[s]; - InsetMath * inset = cs.asInsetMath(); + CursorSlice const & cs2 = cur[s]; + InsetMath * inset = cs2.asInsetMath(); if (inset && inset->asHullInset()) { - WriteStream ws(ods); + WriteStream ws(os); inset->asHullInset()->footer_write(ws); break; } @@ -982,28 +3248,91 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match) d = cur.depth(); old_cur = cur; cur.forwardPos(); - } while (cur && cur.depth() > d && match(cur) > 0); + } while (cur && cur.depth() > d && match(cur).match_len > 0); cur = old_cur; - LASSERT(match(cur) > 0, /* */); + int max_match = match(cur).match_len; /* match valid only if not searching whole words */ + if (max_match <= 0) return 0; LYXERR(Debug::FIND, "Ok"); // Compute the match length - int len = 1; + int len = 1; if (cur.pos() + len > cur.lastpos()) - return 0; - LYXERR(Debug::FIND, "verifying unmatch with len = " << len); - while (cur.pos() + len <= cur.lastpos() && match(cur, len) == 0) { - ++len; - LYXERR(Debug::FIND, "verifying unmatch with len = " << len); - } - // Length of matched text (different from len param) - int old_len = match(cur, len); - int new_len; - // Greedy behaviour while matching regexps - while ((new_len = match(cur, len + 1)) > old_len) { - ++len; - old_len = new_len; - LYXERR(Debug::FIND, "verifying match with len = " << len); + return 0; + if (match.opt.matchword) { + LYXERR(Debug::FIND, "verifying unmatch with len = " << len); + while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) { + ++len; + LYXERR(Debug::FIND, "verifying unmatch with len = " << len); + } + // Length of matched text (different from len param) + int old_match = match(cur, len).match_len; + if (old_match < 0) + old_match = 0; + int new_match; + // Greedy behaviour while matching regexps + while ((new_match = match(cur, len + 1).match_len) > old_match) { + ++len; + old_match = new_match; + LYXERR(Debug::FIND, "verifying match with len = " << len); + } + if (old_match == 0) + len = 0; + } + else { + int minl = 1; + int maxl = cur.lastpos() - cur.pos(); + // Greedy behaviour while matching regexps + while (maxl > minl) { + int actual_match = match(cur, len).match_len; + if (actual_match >= max_match) { + // actual_match > max_match _can_ happen, + // if the search area splits + // some following word so that the regex + // (e.g. 'r.*r\b' matches 'r' from the middle of the + // splitted word) + // This means, the len value is too big + maxl = len; + len = (int)((maxl + minl)/2); + } + else { + // (actual_match < max_match) + minl = len + 1; + len = (int)((maxl + minl)/2); + } + } + old_cur = cur; + // Search for real start of matched characters + while (len > 1) { + int actual_match; + do { + cur.forwardPos(); + } while (cur.depth() > old_cur.depth()); /* Skip inner insets */ + if (cur.depth() < old_cur.depth()) { + // Outer inset? + LYXERR0("cur.depth() < old_cur.depth(), this should never happen"); + break; + } + if (cur.pos() != old_cur.pos()) { + // OK, forwarded 1 pos in actual inset + actual_match = match(cur, len-1).match_len; + if (actual_match == max_match) { + // Ha, got it! The shorter selection has the same match length + len--; + old_cur = cur; + } + else { + // OK, the shorter selection matches less chars, revert to previous value + cur = old_cur; + break; + } + } + else { + LYXERR0("cur.pos() == old_cur.pos(), this should never happen"); + actual_match = match(cur, len).match_len; + if (actual_match == max_match) + old_cur = cur; + } + } } return len; } @@ -1014,17 +3343,96 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match) { if (!cur) return 0; - while (cur && !match(cur, -1, false)) { - if (cur.pit() < cur.lastpit()) + while (!theApp()->longOperationCancelled() && cur) { + LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur); + MatchResult mres = match(cur, -1, false); + int match_len = mres.match_len; + LYXERR(Debug::FIND, "match_len: " << match_len); + if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) { + LYXERR0("BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end); + match_len = 0; + } + if (match_len > 0) { + // Try to find the begin of searched string + int increment = mres.pos/2; + while (mres.pos > 5 && (increment > 5)) { + DocIterator old_cur = cur; + for (int i = 0; i < increment && cur; cur.forwardPos(), i++) { + } + if (! cur || (cur.pit() > old_cur.pit())) { + // Are we outside of the paragraph? + // This can happen if moving past some UTF8-encoded chars + cur = old_cur; + increment /= 2; + } + else { + MatchResult mres2 = match(cur, -1, false); + if ((mres2.match2end < mres.match2end) || + (mres2.match_len < mres.match_len)) { + cur = old_cur; + increment /= 2; + } + else { + mres = mres2; + increment -= 2; + if (increment > mres.pos/2) + increment = mres.pos/2; + } + } + } + int match_len_zero_count = 0; + for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) { + if (i++ > 10) { + int remaining_len = match(cur, -1, false).match_len; + if (remaining_len <= 0) { + // Apparently the searched string is not in the remaining part + break; + } + else { + i = 0; + } + } + LYXERR(Debug::FIND, "Advancing cur: " << cur); + int match_len3 = match(cur, 1).match_len; + if (match_len3 < 0) + continue; + int match_len2 = match(cur).match_len; + LYXERR(Debug::FIND, "match_len2: " << match_len2); + if (match_len2 > 0) { + // Sometimes in finalize we understand it wasn't a match + // and we need to continue the outest loop + int len = findAdvFinalize(cur, match); + if (len > 0) { + return len; + } + } + if (match_len2 >= 0) { + if (match_len2 == 0) + match_len_zero_count++; + else + match_len_zero_count = 0; + } + else { + if (++match_len_zero_count > 3) { + LYXERR(Debug::FIND, "match_len2_zero_count: " << match_len_zero_count << ", match_len was " << match_len); + match_len_zero_count = 0; + } + break; + } + } + if (!cur) + return 0; + } + if (match_len >= 0 && cur.pit() < cur.lastpit()) { + LYXERR(Debug::FIND, "Advancing par: cur=" << cur); cur.forwardPar(); - else { + } else { + // This should exit nested insets, if any, or otherwise undefine the currsor. + cur.pos() = cur.lastpos(); + LYXERR(Debug::FIND, "Advancing pos: cur=" << cur); cur.forwardPos(); } } - for (; cur; cur.forwardPos()) { - if (match(cur)) - return findAdvFinalize(cur, match); - } return 0; } @@ -1040,7 +3448,7 @@ int findMostBackwards(DocIterator & cur, MatchStringAdv const & match) LYXERR(Debug::FIND, "findMostBackwards(): cur=" << cur); DocIterator new_cur = cur; new_cur.backwardPos(); - if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur)) + if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len) break; int new_len = findAdvFinalize(new_cur, match); if (new_len == len) @@ -1053,7 +3461,8 @@ int findMostBackwards(DocIterator & cur, MatchStringAdv const & match) /// Finds backwards -int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) { +int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) +{ if (! cur) return 0; // Backup of original position @@ -1062,12 +3471,10 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) { return 0; cur.backwardPos(); DocIterator cur_orig(cur); - bool found_match; bool pit_changed = false; - found_match = false; do { cur.pos() = 0; - found_match = match(cur, -1, false); + bool found_match = (match(cur, -1, false).match_len > 0); if (found_match) { if (pit_changed) @@ -1077,8 +3484,8 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) { LYXERR(Debug::FIND, "findBackAdv2: cur: " << cur); DocIterator cur_prev_iter; do { - found_match = match(cur); - LYXERR(Debug::FIND, "findBackAdv3: found_match=" + found_match = (match(cur).match_len > 0); + LYXERR(Debug::FIND, "findBackAdv3: found_match=" << found_match << ", cur: " << cur); if (found_match) return findMostBackwards(cur, match); @@ -1097,18 +3504,19 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) { else cur.backwardPos(); pit_changed = true; - } while (true); + } while (!theApp()->longOperationCancelled()); return 0; } -} // anonym namespace +} // namespace docstring stringifyFromForSearch(FindAndReplaceOptions const & opt, - DocIterator const & cur, int len) + DocIterator const & cur, int len) { - LASSERT(cur.pos() >= 0 && cur.pos() <= cur.lastpos(), /* */); + if (cur.pos() < 0 || cur.pos() > cur.lastpos()) + return docstring(); if (!opt.ignoreformat) return latexifyFromCursor(cur, len); else @@ -1116,13 +3524,14 @@ docstring stringifyFromForSearch(FindAndReplaceOptions const & opt, } -FindAndReplaceOptions::FindAndReplaceOptions(docstring const & search, bool casesensitive, - bool matchword, bool forward, bool expandmacros, bool ignoreformat, - bool regexp, docstring const & replace, bool keep_case, - SearchScope scope) - : search(search), casesensitive(casesensitive), matchword(matchword), - forward(forward), expandmacros(expandmacros), ignoreformat(ignoreformat), - regexp(regexp), replace(replace), keep_case(keep_case), scope(scope) +FindAndReplaceOptions::FindAndReplaceOptions( + docstring const & _find_buf_name, bool _casesensitive, + bool _matchword, bool _forward, bool _expandmacros, bool _ignoreformat, + docstring const & _repl_buf_name, bool _keep_case, + SearchScope _scope, SearchRestriction _restr, bool _replace_all) + : find_buf_name(_find_buf_name), casesensitive(_casesensitive), matchword(_matchword), + forward(_forward), expandmacros(_expandmacros), ignoreformat(_ignoreformat), + repl_buf_name(_repl_buf_name), keep_case(_keep_case), scope(_scope), restr(_restr), replace_all(_replace_all) { } @@ -1131,9 +3540,16 @@ namespace { /** Check if 'len' letters following cursor are all non-lowercase */ -static bool allNonLowercase(DocIterator const & cur, int len) { - pos_type end_pos = cur.pos() + len; - for (pos_type pos = cur.pos(); pos != end_pos; ++pos) +static bool allNonLowercase(Cursor const & cur, int len) +{ + pos_type beg_pos = cur.selectionBegin().pos(); + pos_type end_pos = cur.selectionBegin().pos() + len; + if (len > cur.lastpos() + 1 - beg_pos) { + LYXERR(Debug::FIND, "This should not happen, more debug needed"); + len = cur.lastpos() + 1 - beg_pos; + end_pos = beg_pos + len; + } + for (pos_type pos = beg_pos; pos != end_pos; ++pos) if (isLowerCase(cur.paragraph().getChar(pos))) return false; return true; @@ -1141,17 +3557,19 @@ static bool allNonLowercase(DocIterator const & cur, int len) { /** Check if first letter is upper case and second one is lower case */ -static bool firstUppercase(DocIterator const & cur) { +static bool firstUppercase(Cursor const & cur) +{ char_type ch1, ch2; - if (cur.pos() >= cur.lastpos() - 1) { + pos_type pos = cur.selectionBegin().pos(); + if (pos >= cur.lastpos() - 1) { LYXERR(Debug::FIND, "No upper-case at cur: " << cur); return false; } - ch1 = cur.paragraph().getChar(cur.pos()); - ch2 = cur.paragraph().getChar(cur.pos()+1); + ch1 = cur.paragraph().getChar(pos); + ch2 = cur.paragraph().getChar(pos + 1); bool result = isUpperCase(ch1) && isLowerCase(ch2); LYXERR(Debug::FIND, "firstUppercase(): " - << "ch1=" << ch1 << "(" << char(ch1) << "), ch2=" + << "ch1=" << ch1 << "(" << char(ch1) << "), ch2=" << ch2 << "(" << char(ch2) << ")" << ", result=" << result << ", cur=" << cur); return result; @@ -1162,43 +3580,53 @@ static bool firstUppercase(DocIterator const & cur) { ** ** \fixme What to do with possible further paragraphs in replace buffer ? **/ -static void changeFirstCase(Buffer & buffer, TextCase first_case, TextCase others_case) { +static void changeFirstCase(Buffer & buffer, TextCase first_case, TextCase others_case) +{ ParagraphList::iterator pit = buffer.paragraphs().begin(); + LASSERT(pit->size() >= 1, /**/); pos_type right = pos_type(1); pit->changeCase(buffer.params(), pos_type(0), right, first_case); - right = pit->size() + 1; - pit->changeCase(buffer.params(), right, right, others_case); + right = pit->size(); + pit->changeCase(buffer.params(), pos_type(1), right, others_case); } -} // anon namespace + +} // namespace /// -static void findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv) +static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv) { Cursor & cur = bv->cursor(); - if (opt.replace == docstring(from_utf8(LYX_FR_NULL_STRING))) - return; + if (opt.repl_buf_name == docstring() + || theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true) == 0 + || theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true) == 0) + return 0; + DocIterator sel_beg = cur.selectionBegin(); DocIterator sel_end = cur.selectionEnd(); if (&sel_beg.inset() != &sel_end.inset() - || sel_beg.pit() != sel_end.pit()) - return; + || sel_beg.pit() != sel_end.pit() + || sel_beg.idx() != sel_end.idx()) + return 0; int sel_len = sel_end.pos() - sel_beg.pos(); LYXERR(Debug::FIND, "sel_beg: " << sel_beg << ", sel_end: " << sel_end << ", sel_len: " << sel_len << endl); if (sel_len == 0) - return; - LASSERT(sel_len > 0, /**/); + return 0; + LASSERT(sel_len > 0, return 0); - if (!matchAdv(sel_beg, sel_len)) - return; + if (!matchAdv(sel_beg, sel_len).match_len) + return 0; - string lyx = to_utf8(opt.replace); - // FIXME: Seems so stupid to me to rebuild a buffer here, - // when we already have one (replace_work_area_.buffer()) + // Build a copy of the replace buffer, adapted to the KeepCase option + Buffer & repl_buffer_orig = *theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true); + ostringstream oss; + repl_buffer_orig.write(oss); + string lyx = oss.str(); Buffer repl_buffer("", false); repl_buffer.setUnnamed(true); - LASSERT(repl_buffer.readString(lyx), /**/); + LASSERT(repl_buffer.readString(lyx), return 0); if (opt.keep_case && sel_len >= 2) { + LYXERR(Debug::FIND, "keep_case true: cur.pos()=" << cur.pos() << ", sel_len=" << sel_len); if (cur.inTexted()) { if (firstUppercase(cur)) changeFirstCase(repl_buffer, text_uppercase, text_lowercase); @@ -1206,8 +3634,8 @@ static void findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, M changeFirstCase(repl_buffer, text_uppercase, text_uppercase); } } - cap::cutSelection(cur, false, false); - if (!cur.inMathed()) { + cap::cutSelection(cur, false); + if (cur.inTexted()) { repl_buffer.changeLanguage( repl_buffer.language(), cur.getFont().language()); @@ -1218,32 +3646,38 @@ static void findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, M bv->buffer().errorList("Paste")); LYXERR(Debug::FIND, "After pasteParagraphList() cur=" << cur << endl); sel_len = repl_buffer.paragraphs().begin()->size(); - } else { + } else if (cur.inMathed()) { odocstringstream ods; otexstream os(ods); - OutputParams runparams(&repl_buffer.params().encoding()); + // OutputParams runparams(&repl_buffer.params().encoding()); + OutputParams runparams(encodings.fromLyXName("utf8")); runparams.nice = false; - runparams.flavor = OutputParams::LATEX; + runparams.flavor = OutputParams::XETEX; runparams.linelen = 8000; //lyxrc.plaintext_linelen; runparams.dryrun = true; - TexRow texrow; - TeXOnePar(repl_buffer, repl_buffer.text(), 0, os, texrow, runparams); + TeXOnePar(repl_buffer, repl_buffer.text(), 0, os, runparams); //repl_buffer.getSourceCode(ods, 0, repl_buffer.paragraphs().size(), false); docstring repl_latex = ods.str(); LYXERR(Debug::FIND, "Latexified replace_buffer: '" << repl_latex << "'"); string s; - regex_replace(to_utf8(repl_latex), s, "\\$(.*)\\$", "$1"); - regex_replace(s, s, "\\\\\\[(.*)\\\\\\]", "$1"); + (void)regex_replace(to_utf8(repl_latex), s, "\\$(.*)\\$", "$1"); + (void)regex_replace(s, s, "\\\\\\[(.*)\\\\\\]", "$1"); repl_latex = from_utf8(s); - LYXERR(Debug::FIND, "Replacing by niceInsert()ing latex: '" << repl_latex << "'"); - sel_len = cur.niceInsert(repl_latex); + LYXERR(Debug::FIND, "Replacing by insert()ing latex: '" << repl_latex << "' cur=" << cur << " with depth=" << cur.depth()); + MathData ar(cur.buffer()); + asArray(repl_latex, ar, Parse::NORMAL); + cur.insert(ar); + sel_len = ar.size(); + LYXERR(Debug::FIND, "After insert() cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len); } - cur.pos() -= sel_len; - if (cur.pos() < 0) + if (cur.pos() >= sel_len) + cur.pos() -= sel_len; + else cur.pos() = 0; - LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << sel_len); + LYXERR(Debug::FIND, "After pos adj cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len); bv->putSelectionAt(DocIterator(cur), sel_len, !opt.forward); bv->processUpdateFlags(Update::Force); + return 1; } @@ -1253,19 +3687,22 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) DocIterator cur; int match_len = 0; - if (opt.search.empty()) { - bv->message(_("Search text is empty!")); + // e.g., when invoking word-findadv from mini-buffer wither with + // wrong options syntax or before ever opening advanced F&R pane + if (theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true) == 0) return false; - } try { MatchStringAdv matchAdv(bv->buffer(), opt); - findAdvReplace(bv, opt, matchAdv); + int length = bv->cursor().selectionEnd().pos() - bv->cursor().selectionBegin().pos(); + if (length > 0) + bv->putSelectionAt(bv->cursor().selectionBegin(), length, !opt.forward); + num_replaced += findAdvReplace(bv, opt, matchAdv); cur = bv->cursor(); if (opt.forward) - match_len = findForwardAdv(cur, matchAdv); + match_len = findForwardAdv(cur, matchAdv); else - match_len = findBackwardsAdv(cur, matchAdv); + match_len = findBackwardsAdv(cur, matchAdv); } catch (...) { // This may only be raised by lyx::regex() bv->message(_("Invalid regular expression!")); @@ -1273,11 +3710,31 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) } if (match_len == 0) { - bv->message(_("Match not found!")); + if (num_replaced > 0) { + switch (num_replaced) + { + case 1: + bv->message(_("One match has been replaced.")); + break; + case 2: + bv->message(_("Two matches have been replaced.")); + break; + default: + bv->message(bformat(_("%1$d matches have been replaced."), num_replaced)); + break; + } + num_replaced = 0; + } + else { + bv->message(_("Match not found.")); + } return false; } - bv->message(_("Match found!")); + if (num_replaced > 0) + bv->message(_("Match has been replaced.")); + else + bv->message(_("Match found.")); LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << match_len); bv->putSelectionAt(cur, match_len, !opt.forward); @@ -1288,22 +3745,24 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) ostringstream & operator<<(ostringstream & os, FindAndReplaceOptions const & opt) { - os << to_utf8(opt.search) << "\nEOSS\n" + os << to_utf8(opt.find_buf_name) << "\nEOSS\n" << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' ' << opt.expandmacros << ' ' << opt.ignoreformat << ' ' - << opt.regexp << ' ' - << to_utf8(opt.replace) << "\nEOSS\n" + << opt.replace_all << ' ' + << to_utf8(opt.repl_buf_name) << "\nEOSS\n" << opt.keep_case << ' ' - << int(opt.scope); + << int(opt.scope) << ' ' + << int(opt.restr); LYXERR(Debug::FIND, "built: " << os.str()); return os; } + istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) { LYXERR(Debug::FIND, "parsing"); @@ -1312,35 +3771,39 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) getline(is, line); while (line != "EOSS") { if (! s.empty()) - s = s + "\n"; + s = s + "\n"; s = s + line; if (is.eof()) // Tolerate malformed request - break; + break; getline(is, line); } - LYXERR(Debug::FIND, "searching for: '" << s << "'"); - opt.search = from_utf8(s); - is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat >> opt.regexp; + LYXERR(Debug::FIND, "file_buf_name: '" << s << "'"); + opt.find_buf_name = from_utf8(s); + is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat >> opt.replace_all; is.get(); // Waste space before replace string s = ""; getline(is, line); while (line != "EOSS") { if (! s.empty()) - s = s + "\n"; + s = s + "\n"; s = s + line; if (is.eof()) // Tolerate malformed request - break; + break; getline(is, line); } + LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'"); + opt.repl_buf_name = from_utf8(s); is >> opt.keep_case; int i; is >> i; opt.scope = FindAndReplaceOptions::SearchScope(i); + is >> i; + opt.restr = FindAndReplaceOptions::SearchRestriction(i); + LYXERR(Debug::FIND, "parsed: " << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' ' - << opt.expandmacros << ' ' << opt.ignoreformat << ' ' << opt.regexp << ' ' << opt.keep_case); - LYXERR(Debug::FIND, "replacing with: '" << s << "'"); - opt.replace = from_utf8(s); + << opt.expandmacros << ' ' << opt.ignoreformat << ' ' << opt.keep_case << ' ' + << opt.scope << ' ' << opt.restr); return is; } -} // lyx namespace +} // namespace lyx