X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Flyxfind.cpp;h=6e3b324bfa66be867f2bee4d4292bfd6b2ee3422;hb=26ba2a65838731ce639a09539f617cb0f0be3b22;hp=9fbc13f25a77b7b83d24d587c025842eb1421bc4;hpb=9d6b71c6b3db62f0d1fe6338379c2bf226bedfa5;p=lyx.git diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp index 9fbc13f25a..6e3b324bfa 100644 --- a/src/lyxfind.cpp +++ b/src/lyxfind.cpp @@ -8,16 +8,17 @@ * \author Jürgen Vigna * \author Alfredo Braunstein * \author Tommaso Cucinotta + * \author Kornel Benko * * Full author contact details are available in file CREDITS. */ #include +#include #include "lyxfind.h" #include "Buffer.h" -#include "buffer_funcs.h" #include "BufferList.h" #include "BufferParams.h" #include "BufferView.h" @@ -29,102 +30,126 @@ #include "output_latex.h" #include "OutputParams.h" #include "Paragraph.h" -#include "ParIterator.h" -#include "TexRow.h" #include "Text.h" +#include "Encoding.h" +#include "Language.h" #include "frontends/Application.h" #include "frontends/alert.h" #include "mathed/InsetMath.h" -#include "mathed/InsetMathGrid.h" #include "mathed/InsetMathHull.h" #include "mathed/MathData.h" #include "mathed/MathStream.h" #include "mathed/MathSupport.h" -#include "support/convert.h" #include "support/debug.h" #include "support/docstream.h" #include "support/FileName.h" #include "support/gettext.h" #include "support/lassert.h" #include "support/lstrings.h" - -#include "support/regex.h" #include "support/textutils.h" -#include + +#include +#include + +//#define ResultsDebug +#define USE_QT_FOR_SEARCH +#if defined(USE_QT_FOR_SEARCH) + #include + #define QTSEARCH 1 +#else + #define QTSEARCH 0 +#endif using namespace std; using namespace lyx::support; namespace lyx { +typedef unordered_map AccentsMap; +typedef unordered_map::const_iterator AccentsIterator; +static AccentsMap accents = unordered_map(); // Helper class for deciding what should be ignored class IgnoreFormats { public: /// - IgnoreFormats() - : ignoreFamily_(false), ignoreSeries_(false), - ignoreShape_(false), ignoreUnderline_(false), - ignoreMarkUp_(false), ignoreStrikeOut_(false), - ignoreSectioning_(false), ignoreFrontMatter_(false), - ignoreColor_(false), ignoreLanguage_(false) {} + IgnoreFormats() = default; + /// + bool getFamily() const { return ignoreFamily_; } + /// + bool getSeries() const { return ignoreSeries_; } + /// + bool getShape() const { return ignoreShape_; } + /// + bool getSize() const { return ignoreSize_; } /// - bool getFamily() { return ignoreFamily_; }; + bool getUnderline() const { return ignoreUnderline_; } /// - bool getSeries() { return ignoreSeries_; }; + bool getMarkUp() const { return ignoreMarkUp_; } /// - bool getShape() { return ignoreShape_; }; + bool getStrikeOut() const { return ignoreStrikeOut_; } /// - bool getUnderline() { return ignoreUnderline_; }; + bool getSectioning() const { return ignoreSectioning_; } /// - bool getMarkUp() { return ignoreMarkUp_; }; + bool getFrontMatter() const { return ignoreFrontMatter_; } /// - bool getStrikeOut() { return ignoreStrikeOut_; }; + bool getColor() const { return ignoreColor_; } /// - bool getSectioning() { return ignoreSectioning_; }; + bool getLanguage() const { return ignoreLanguage_; } /// - bool getFrontMatter() { return ignoreFrontMatter_; }; + bool getDeleted() const { return ignoreDeleted_; } /// - bool getColor() { return ignoreColor_; }; + void setIgnoreDeleted(bool value); /// - bool getLanguage() { return ignoreLanguage_; }; + bool getNonContent() const { return searchNonContent_; } /// - void setIgnoreFormat(string type, bool value); + void setIgnoreFormat(string const & type, bool value, bool fromUser = true); private: /// - bool ignoreFamily_; + bool ignoreFamily_ = false; /// - bool ignoreSeries_; + bool ignoreSeries_ = false; /// - bool ignoreShape_; + bool ignoreShape_ = false; /// - bool ignoreUnderline_; + bool ignoreSize_ = true; /// - bool ignoreMarkUp_; + bool ignoreUnderline_ = false; /// - bool ignoreStrikeOut_; + bool ignoreMarkUp_ = false; /// - bool ignoreSectioning_; + bool ignoreStrikeOut_ = false; /// - bool ignoreFrontMatter_; + bool ignoreSectioning_ = false; /// - bool ignoreColor_; + bool ignoreFrontMatter_ = false; /// - bool ignoreLanguage_; + bool ignoreColor_ = false; + /// + bool ignoreLanguage_ = false; + bool userSelectedIgnoreLanguage_ = false; + /// + bool ignoreDeleted_ = true; + /// + bool searchNonContent_ = true; }; - -void IgnoreFormats::setIgnoreFormat(string type, bool value) +void IgnoreFormats::setIgnoreFormat(string const & type, bool value, bool fromUser) { if (type == "color") { ignoreColor_ = value; } else if (type == "language") { - ignoreLanguage_ = value; + if (fromUser) { + userSelectedIgnoreLanguage_ = value; + ignoreLanguage_ = value; + } + else + ignoreLanguage_ = (value || userSelectedIgnoreLanguage_); } else if (type == "sectioning") { ignoreSectioning_ = value; @@ -141,6 +166,9 @@ void IgnoreFormats::setIgnoreFormat(string type, bool value) else if (type == "shape") { ignoreShape_ = value; } + else if (type == "size") { + ignoreSize_ = value; + } else if (type == "family") { ignoreFamily_ = value; } @@ -153,35 +181,41 @@ void IgnoreFormats::setIgnoreFormat(string type, bool value) else if (type == "strike") { ignoreStrikeOut_ = value; } + else if (type == "deleted") { + ignoreDeleted_ = value; + } + else if (type == "non-output-content") { + searchNonContent_ = !value; + } } // The global variable that can be changed from outside IgnoreFormats ignoreFormats; -void setIgnoreFormat(string type, bool value) +void setIgnoreFormat(string const & type, bool value, bool fromUser) { - ignoreFormats.setIgnoreFormat(type, value); + ignoreFormats.setIgnoreFormat(type, value, fromUser); } namespace { -bool parse_bool(docstring & howto) +bool parse_bool(docstring & howto, bool const defvalue = false) { if (howto.empty()) - return false; + return defvalue; docstring var; howto = split(howto, var, ' '); return var == "1"; } -class MatchString : public binary_function +class MatchString { public: - MatchString(docstring const & str, bool cs, bool mw) - : str(str), case_sens(cs), whole_words(mw) + MatchString(docstring const & s, bool cs, bool mw) + : str(s), case_sens(cs), whole_words(mw) {} // returns true if the specified string is at the specified position @@ -201,24 +235,33 @@ private: }; -int findForward(DocIterator & cur, MatchString const & match, - bool find_del = true) +int findForward(DocIterator & cur, DocIterator const endcur, + MatchString const & match, + bool find_del = true, bool onlysel = false) { - for (; cur; cur.forwardChar()) + for (; cur; cur.forwardChar()) { + if (onlysel && endcur.pit() == cur.pit() + && endcur.idx() == cur.idx() && endcur.pos() < cur.pos()) + break; if (cur.inTexted()) { int len = match(cur.paragraph(), cur.pos(), find_del); if (len > 0) return len; } + } return 0; } -int findBackwards(DocIterator & cur, MatchString const & match, - bool find_del = true) +int findBackwards(DocIterator & cur, DocIterator const endcur, + MatchString const & match, + bool find_del = true, bool onlysel = false) { while (cur) { cur.backwardChar(); + if (onlysel && endcur.pit() == cur.pit() + && endcur.idx() == cur.idx() && endcur.pos() > cur.pos()) + break; if (cur.inTexted()) { int len = match(cur.paragraph(), cur.pos(), find_del); if (len > 0) @@ -238,38 +281,89 @@ bool searchAllowed(docstring const & str) return true; } +} // namespace + bool findOne(BufferView * bv, docstring const & searchstr, bool case_sens, bool whole, bool forward, - bool find_del = true, bool check_wrap = false) + bool find_del, bool check_wrap, bool const auto_wrap, + bool instant, bool onlysel) { + bool const had_selection = bv->cursor().selection(); + + // Clean up previous selections with empty searchstr on instant + if (searchstr.empty() && instant) { + if (had_selection) { + bv->setCursor(bv->cursor().selectionBegin()); + bv->clearSelection(); + } + return true; + } + if (!searchAllowed(searchstr)) return false; + DocIterator const startcur = bv->cursor().selectionBegin(); + DocIterator const endcur = bv->cursor().selectionEnd(); + + if (onlysel && had_selection) { + docstring const matchstring = bv->cursor().selectionAsString(false); + docstring const lcmatchsting = support::lowercase(matchstring); + if (matchstring == searchstr || (!case_sens && lcmatchsting == lowercase(searchstr))) { + docstring q = _("The search string matches the selection, and search is limited to selection.\n" + "Continue search outside?"); + int search_answer = frontend::Alert::prompt(_("Search outside selection?"), + q, 0, 1, _("&Yes"), _("&No")); + if (search_answer == 0) { + bv->clearSelection(); + if (findOne(bv, searchstr, case_sens, whole, forward, + find_del, check_wrap, auto_wrap, false, false)) + return true; + } + return false; + } + } + DocIterator cur = forward - ? bv->cursor().selectionEnd() - : bv->cursor().selectionBegin(); + ? ((instant || onlysel) ? bv->cursor().selectionBegin() : bv->cursor().selectionEnd()) + : ((instant || onlysel) ? bv->cursor().selectionEnd() : bv->cursor().selectionBegin()); MatchString const match(searchstr, case_sens, whole); int match_len = forward - ? findForward(cur, match, find_del) - : findBackwards(cur, match, find_del); + ? findForward(cur, endcur, match, find_del, onlysel) + : findBackwards(cur, startcur, match, find_del, onlysel); if (match_len > 0) bv->putSelectionAt(cur, match_len, !forward); - else if (check_wrap) { - DocIterator cur_orig(bv->cursor()); - docstring q; - if (forward) - q = _("End of file reached while searching forward.\n" - "Continue searching from the beginning?"); - else - q = _("Beginning of file reached while searching backward.\n" - "Continue searching from the end?"); - int wrap_answer = frontend::Alert::prompt(_("Wrap search?"), + else if (onlysel && had_selection) { + docstring q = _("The search string was not found within the selection.\n" + "Continue search outside?"); + int search_answer = frontend::Alert::prompt(_("Search outside selection?"), q, 0, 1, _("&Yes"), _("&No")); - if (wrap_answer == 0) { + if (search_answer == 0) { + bv->clearSelection(); + if (findOne(bv, searchstr, case_sens, whole, forward, + find_del, check_wrap, auto_wrap, false, false)) + return true; + } + return false; + } + else if (check_wrap) { + bool wrap = auto_wrap; + if (!auto_wrap) { + docstring q; + if (forward) + q = _("End of file reached while searching forward.\n" + "Continue searching from the beginning?"); + else + q = _("Beginning of file reached while searching backward.\n" + "Continue searching from the end?"); + int wrap_answer = frontend::Alert::prompt(_("Wrap search?"), + q, 0, 1, _("&Yes"), _("&No")); + wrap = wrap_answer == 0; + } + if (wrap) { if (forward) { bv->cursor().clear(); bv->cursor().push_back(CursorSlice(bv->buffer().inset())); @@ -277,11 +371,24 @@ bool findOne(BufferView * bv, docstring const & searchstr, bv->cursor().setCursor(doc_iterator_end(&bv->buffer())); bv->cursor().backwardPos(); } + if (auto_wrap) { + docstring const msg = forward + ? _("Search reached end of document, continuing from beginning.") + : _("Search reached beginning of document, continuing from end."); + bv->message(msg); + } bv->clearSelection(); - if (findOne(bv, searchstr, case_sens, whole, forward, find_del, false)) + if (findOne(bv, searchstr, case_sens, whole, forward, + find_del, false, false, false, false)) return true; } - bv->cursor().setCursor(cur_orig); + bv->setCursor(startcur); + + // restore original selection + if (had_selection) { + bv->cursor().resetAnchor(); + bv->setSelection(startcur, endcur); + } return false; } @@ -289,16 +396,20 @@ bool findOne(BufferView * bv, docstring const & searchstr, } +namespace { + int replaceAll(BufferView * bv, docstring const & searchstr, docstring const & replacestr, - bool case_sens, bool whole) + bool case_sens, bool whole, bool onlysel) { Buffer & buf = bv->buffer(); if (!searchAllowed(searchstr) || buf.isReadonly()) return 0; - DocIterator cur_orig(bv->cursor()); + DocIterator startcur = bv->cursor().selectionBegin(); + DocIterator endcur = bv->cursor().selectionEnd(); + bool const had_selection = bv->cursor().selection(); MatchString const match(searchstr, case_sens, whole); int num = 0; @@ -308,29 +419,53 @@ int replaceAll(BufferView * bv, Cursor cur(*bv); cur.setCursor(doc_iterator_begin(&buf)); - int match_len = findForward(cur, match, false); + int match_len = findForward(cur, endcur, match, false, onlysel); while (match_len > 0) { // Backup current cursor position and font. pos_type const pos = cur.pos(); Font const font = cur.paragraph().getFontSettings(buf.params(), pos); cur.recordUndo(); - int striked = ssize - + int ct_deleted_text = ssize - cur.paragraph().eraseChars(pos, pos + match_len, buf.params().track_changes); cur.paragraph().insert(pos, replacestr, font, Change(buf.params().track_changes ? Change::INSERTED : Change::UNCHANGED)); - for (int i = 0; i < rsize + striked; ++i) - cur.forwardChar(); + for (int i = 0; i < rsize + ct_deleted_text + && cur.pos() < cur.lastpos(); ++i) + cur.forwardPos(); + if (onlysel && cur.pit() == endcur.pit() && cur.idx() == endcur.idx()) { + // Adjust end of selection for replace-all in selection + if (rsize > ssize) { + int const offset = rsize - ssize; + for (int i = 0; i < offset + ct_deleted_text + && endcur.pos() < endcur.lastpos(); ++i) + endcur.forwardPos(); + } else { + int const offset = ssize - rsize; + for (int i = 0; i < offset && endcur.pos() > 0; ++i) + endcur.backwardPos(); + for (int i = 0; i < ct_deleted_text + && endcur.pos() < endcur.lastpos(); ++i) + endcur.forwardPos(); + } + } ++num; - match_len = findForward(cur, match, false); + match_len = findForward(cur, endcur, match, false, onlysel); } bv->putSelectionAt(doc_iterator_begin(&buf), 0, false); - cur_orig.fixIfBroken(); - bv->setCursor(cur_orig); + startcur.fixIfBroken(); + bv->setCursor(startcur); + + // Reset selection, accounting for changes in selection + if (had_selection) { + endcur.fixIfBroken(); + bv->cursor().resetAnchor(); + bv->setSelection(startcur, endcur); + } return num; } @@ -353,14 +488,15 @@ int replaceAll(BufferView * bv, // whether anything at all was done. pair replaceOne(BufferView * bv, docstring searchstr, docstring const & replacestr, bool case_sens, - bool whole, bool forward, bool findnext) + bool whole, bool forward, bool findnext, bool wrap, + bool onlysel) { Cursor & cur = bv->cursor(); - bool found = false; - if (!cur.selection()) { + if (!cur.selection() || onlysel) { // no selection, non-empty search string: find it if (!searchstr.empty()) { - found = findOne(bv, searchstr, case_sens, whole, forward, true, findnext); + bool const found = findOne(bv, searchstr, case_sens, whole, + forward, true, findnext, wrap, false, onlysel); return make_pair(found, 0); } // empty search string @@ -371,7 +507,7 @@ pair replaceOne(BufferView * bv, docstring searchstr, // This causes a minor bug as undo will restore this selection, // which the user did not create (#8986). cur.innerText()->selectWord(cur, WHOLE_WORD); - searchstr = cur.selectionAsString(false); + searchstr = cur.selectionAsString(false, true); } // if we still don't have a search string, report the error @@ -380,7 +516,7 @@ pair replaceOne(BufferView * bv, docstring searchstr, return make_pair(false, 0); bool have_selection = cur.selection(); - docstring const selected = cur.selectionAsString(false); + docstring const selected = cur.selectionAsString(false, true); bool match = case_sens ? searchstr == selected @@ -389,7 +525,8 @@ pair replaceOne(BufferView * bv, docstring searchstr, // no selection or current selection is not search word: // just find the search word if (!have_selection || !match) { - found = findOne(bv, searchstr, case_sens, whole, forward, true, findnext); + bool const found = findOne(bv, searchstr, case_sens, whole, forward, + true, findnext, wrap, false, onlysel); return make_pair(found, 0); } @@ -405,7 +542,8 @@ pair replaceOne(BufferView * bv, docstring searchstr, cur.pos() = cur.lastpos()); } if (findnext) - findOne(bv, searchstr, case_sens, whole, forward, false, findnext); + findOne(bv, searchstr, case_sens, whole, + forward, false, findnext, wrap, false, onlysel); return make_pair(true, 1); } @@ -414,13 +552,18 @@ pair replaceOne(BufferView * bv, docstring searchstr, docstring const find2string(docstring const & search, - bool casesensitive, bool matchword, bool forward) + bool casesensitive, bool matchword, + bool forward, bool wrap, bool instant, + bool onlysel) { odocstringstream ss; ss << search << '\n' << int(casesensitive) << ' ' << int(matchword) << ' ' - << int(forward); + << int(forward) << ' ' + << int(wrap) << ' ' + << int(instant) << ' ' + << int(onlysel); return ss.str(); } @@ -428,7 +571,8 @@ docstring const find2string(docstring const & search, docstring const replace2string(docstring const & replace, docstring const & search, bool casesensitive, bool matchword, - bool all, bool forward, bool findnext) + bool all, bool forward, bool findnext, + bool wrap, bool onlysel) { odocstringstream ss; ss << replace << '\n' @@ -437,34 +581,60 @@ docstring const replace2string(docstring const & replace, << int(matchword) << ' ' << int(all) << ' ' << int(forward) << ' ' - << int(findnext); + << int(findnext) << ' ' + << int(wrap) << ' ' + << int(onlysel); return ss.str(); } -bool lyxfind(BufferView * bv, FuncRequest const & ev) +docstring const string2find(docstring const & argument, + bool &casesensitive, + bool &matchword, + bool &forward, + bool &wrap, + bool &instant, + bool &onlysel) { - if (!bv || ev.action() != LFUN_WORD_FIND) - return false; - - //lyxerr << "find called, cmd: " << ev << endl; - // data is of the form // " - // " + // " docstring search; - docstring howto = split(ev.argument(), search, '\n'); + docstring howto = split(argument, search, '\n'); + + casesensitive = parse_bool(howto); + matchword = parse_bool(howto); + forward = parse_bool(howto, true); + wrap = parse_bool(howto); + instant = parse_bool(howto); + onlysel = parse_bool(howto); + + return search; +} - bool casesensitive = parse_bool(howto); - bool matchword = parse_bool(howto); - bool forward = parse_bool(howto); - return findOne(bv, search, casesensitive, matchword, forward, true, true); +bool lyxfind(BufferView * bv, FuncRequest const & ev) +{ + if (!bv || ev.action() != LFUN_WORD_FIND) + return false; + + //lyxerr << "find called, cmd: " << ev << endl; + bool casesensitive; + bool matchword; + bool forward; + bool wrap; + bool instant; + bool onlysel; + + docstring search = string2find(ev.argument(), casesensitive, + matchword, forward, wrap, instant, onlysel); + + return findOne(bv, search, casesensitive, matchword, forward, + false, true, wrap, instant, onlysel); } -bool lyxreplace(BufferView * bv, - FuncRequest const & ev, bool has_deleted) +bool lyxreplace(BufferView * bv, FuncRequest const & ev) { if (!bv || ev.action() != LFUN_WORD_REPLACE) return false; @@ -472,7 +642,7 @@ bool lyxreplace(BufferView * bv, // data is of the form // " // - // " + // " docstring search; docstring rplc; docstring howto = split(ev.argument(), rplc, '\n'); @@ -481,45 +651,48 @@ bool lyxreplace(BufferView * bv, bool casesensitive = parse_bool(howto); bool matchword = parse_bool(howto); bool all = parse_bool(howto); - bool forward = parse_bool(howto); - bool findnext = howto.empty() ? true : parse_bool(howto); + bool forward = parse_bool(howto, true); + bool findnext = parse_bool(howto, true); + bool wrap = parse_bool(howto); + bool onlysel = parse_bool(howto); + + if (!bv->cursor().selection()) + // only selection only makes sense with selection + onlysel = false; bool update = false; - if (!has_deleted) { - int replace_count = 0; - if (all) { - replace_count = replaceAll(bv, search, rplc, casesensitive, matchword); - update = replace_count > 0; - } else { - pair rv = - replaceOne(bv, search, rplc, casesensitive, matchword, forward, findnext); - update = rv.first; - replace_count = rv.second; - } + int replace_count = 0; + if (all) { + replace_count = replaceAll(bv, search, rplc, casesensitive, + matchword, onlysel); + update = replace_count > 0; + } else { + pair rv = + replaceOne(bv, search, rplc, casesensitive, matchword, + forward, findnext, wrap, onlysel); + update = rv.first; + replace_count = rv.second; + } - Buffer const & buf = bv->buffer(); - if (!update) { - // emit message signal. + Buffer const & buf = bv->buffer(); + if (!update) { + // emit message signal. + if (onlysel) + buf.message(_("String not found in selection.")); + else buf.message(_("String not found.")); + } else { + if (replace_count == 0) { + buf.message(_("String found.")); + } else if (replace_count == 1) { + buf.message(_("String has been replaced.")); } else { - if (replace_count == 0) { - buf.message(_("String found.")); - } else if (replace_count == 1) { - buf.message(_("String has been replaced.")); - } else { - docstring const str = - bformat(_("%1$d strings have been replaced."), replace_count); - buf.message(str); - } + docstring const str = onlysel + ? bformat(_("%1$d strings have been replaced in the selection."), replace_count) + : bformat(_("%1$d strings have been replaced."), replace_count); + buf.message(str); } - } else if (findnext) { - // if we have deleted characters, we do not replace at all, but - // rather search for the next occurence - if (findOne(bv, search, casesensitive, matchword, forward, true, findnext)) - update = true; - else - bv->message(_("String not found.")); } return update; } @@ -643,139 +816,237 @@ namespace { typedef vector > Escapes; -/// A map of symbols and their escaped equivalent needed within a regex. -/// @note Beware of order -Escapes const & get_regexp_escapes() -{ - typedef std::pair P; - - static Escapes escape_map; - if (escape_map.empty()) { - escape_map.push_back(P("$", "_x_$")); - escape_map.push_back(P("{", "_x_{")); - escape_map.push_back(P("}", "_x_}")); - escape_map.push_back(P("[", "_x_[")); - escape_map.push_back(P("]", "_x_]")); - escape_map.push_back(P("(", "_x_(")); - escape_map.push_back(P(")", "_x_)")); - escape_map.push_back(P("+", "_x_+")); - escape_map.push_back(P("*", "_x_*")); - escape_map.push_back(P(".", "_x_.")); - escape_map.push_back(P("\\", "(?:\\\\|\\\\backslash)")); - escape_map.push_back(P("~", "(?:\\\\textasciitilde|\\\\sim)")); - escape_map.push_back(P("^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\textasciicircum|\\\\mathcircumflex)")); - escape_map.push_back(P("_x_", "\\")); - } - return escape_map; -} - -/// A map of lyx escaped strings and their unescaped equivalent. -Escapes const & get_lyx_unescapes() -{ - typedef std::pair P; - - static Escapes escape_map; - if (escape_map.empty()) { - escape_map.push_back(P("\\%", "%")); - escape_map.push_back(P("\\mathcircumflex ", "^")); - escape_map.push_back(P("\\mathcircumflex", "^")); - escape_map.push_back(P("\\backslash ", "\\")); - escape_map.push_back(P("\\backslash", "\\")); - escape_map.push_back(P("\\\\{", "_x_<")); - escape_map.push_back(P("\\\\}", "_x_>")); - escape_map.push_back(P("\\sim ", "~")); - escape_map.push_back(P("\\sim", "~")); - } - return escape_map; -} - -/// A map of escapes turning a regexp matching text to one matching latex. -Escapes const & get_regexp_latex_escapes() -{ - typedef std::pair P; - - static Escapes escape_map; - if (escape_map.empty()) { - escape_map.push_back(P("\\\\", "(?:\\\\\\\\|\\\\backslash|\\\\textbackslash\\{\\}|\\\\textbackslash)")); - escape_map.push_back(P("( 0) { + if (count > 1) + return "\\s{" + std::to_string(count) + "}"; + else + return "\\s"; + } + return ""; +} + +string string2regex(string in) { - LYXERR(Debug::FIND, "Escaping: '" << s << "'"); - Escapes::const_iterator it; - for (it = escape_map.begin(); it != escape_map.end(); ++it) { -// LYXERR(Debug::FIND, "Escaping " << it->first << " as " << it->second); - unsigned int pos = 0; - while (pos < s.length() && (pos = s.find(it->first, pos)) < s.length()) { - s.replace(pos, it->first.length(), it->second); - LYXERR(Debug::FIND, "After escape: " << s); - pos += it->second.length(); -// LYXERR(Debug::FIND, "pos: " << pos); + static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\$\\])" }; + string tempx = std::regex_replace(in, specialChars, R"(\$&)" ); + // Special handling for ' ' + string temp(""); + int blanks = 0; + for (unsigned i = 0; i < tempx.size(); i++) { + if (tempx[i] == ' ' || tempx[i] == '~' ) { + // normal blanks + blanks++; + } + else if (tempx[i] == '\302' && tempx[i+1] == '\240') { + // Normal Space + blanks++; + i++; + } + else if (tempx[i] == '\342') { + if (tempx[i+1] == '\200') { + if ((tempx[i+2] == '\257') + || (tempx[i+2] == '\203') + || (tempx[i+2] == '\202')) { + // Non-breaking Thin (1/6 em) + // Quad(1 em), (Double quad counts as 2 blanks) + // Half Quad + blanks++; + i += 2; + } + else if (tempx[i+2] == '\213') { + // Ignoring parts of Medium and Thick + i += 2; + continue; + } + else if ((tempx[i+2] == '\204') || (tempx[i+2] == '\205')) { + // Thick + // Medium + blanks++; + i += 2; + } + } + else if (tempx[i+1] == '\201') { + if (tempx[i+2] == '\240') { + // Ignoring parts of half quad + i += 2; + continue; + } + } + else if ((tempx[i+1] == '\220') && (tempx[i+2] == '\243')) { + // Visible space + blanks++; + i += 2; + } + } + else { + if (blanks > 0) { + temp += getRegexSpaceCount(blanks); + } + temp += tempx[i]; + blanks = 0; } } - LYXERR(Debug::FIND, "Escaped : '" << s << "'"); - return s; + if (blanks > 0) { + temp += getRegexSpaceCount(blanks); + } + + string temp2(""); + size_t lastpos = 0; + size_t fl_pos = 0; + int offset = 1; + while (fl_pos < temp.size()) { + fl_pos = temp.find("\\\\foreignlanguage", lastpos + offset); + if (fl_pos == string::npos) + break; + offset = 16; + temp2 += temp.substr(lastpos, fl_pos - lastpos); + temp2 += "\\n"; + lastpos = fl_pos; + } + if (lastpos == 0) + return(temp); + if (lastpos < temp.size()) { + temp2 += temp.substr(lastpos, temp.size() - lastpos); + } + return temp2; } +static void buildAccentsMap(); + +string correctRegex(string t, bool withformat) +{ + /* Convert \backslash => \ + * and \{, \}, \[, \] => {, }, [, ] + */ + string s(""); + static std::regex wordre("(\\\\)*(\\\\(( |[A-Za-z]+|[\\{\\}%])( |\\{\\})?|[\\[\\]\\{\\}]))"); + static std::regex protectedSpace { R"(~)" }; + size_t lastpos = 0; + smatch sub; + bool backslashed = false; + if (accents.empty()) + buildAccentsMap(); + + //LYXERR0("correctRegex input '" << t << "'"); + int skip = 0; + for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) { + sub = *it; + string replace; + if ((sub.position(2) - sub.position(0)) % 2 == 1) { + continue; + } + else { + if (sub.str(4) == "backslash") { + replace = string("\\"); + { + // transforms '\backslash \{' into '\{' + string next = t.substr(sub.position(2) + sub.str(2).length(), 2); + if ((next == "\\{") || (next == "\\}") || (next == "\\ ")) { + replace = ""; + backslashed = true; + } + else if (withformat && next[0] == '$') { + replace = accents["lyxdollar"]; + skip = 1; // Skip following '$' + } + } + } + else if (sub.str(4) == "mathcircumflex") + replace = "^"; + else if ((sub.str(4) == "negthinspace") || (sub.str(4) == "negmedspace") || (sub.str(4) == "negthickspace")) { + replace = accents[sub.str(4)+"{}"]; + } + else if (backslashed) { + backslashed = false; + if (withformat) { + if (sub.str(3) == "{") + replace = accents["braceleft"]; + else if (sub.str(3) == "}") + replace = accents["braceright"]; + else if (sub.str(3) == " ") + replace = "\\ "; + else { + // else part should not exist + LASSERT(0, /**/); + } + } + else { + if (sub.str(3) == "{") + replace = "\\{"; + else if (sub.str(3) == "}") + replace = "\\}"; + else if (sub.str(3) == " ") + replace = "\\ "; + else { + // else part should not exist + LASSERT(0, /**/); + } + } + } + else if (sub.str(4) == "{") // transforms '\{' into '{' + replace = "{"; + else if (sub.str(4) == "}") + replace = "}"; + else if (sub.str(4) == "%") + replace = "%"; + else if (sub.str(4) == " ") + replace = " "; + else { + AccentsIterator it_ac = accents.find(sub.str(4)); + if (it_ac == accents.end()) { + replace = sub.str(2); + } + else { + replace = it_ac->second; + } + } + } + if (lastpos < (size_t) sub.position(2)) + s += std::regex_replace(t.substr(lastpos, sub.position(2) - lastpos), protectedSpace, R"( )"); + s += replace; + lastpos = sub.position(2) + sub.length(2) + skip; + skip = 0; + } + if (lastpos == 0) + s = std::regex_replace(t, protectedSpace, R"( )"); + else if (lastpos < t.length()) + s += std::regex_replace(t.substr(lastpos, t.length() - lastpos), protectedSpace, R"( )"); + // Handle quotes in regex + // substitute all '„', '“', '»', '«' with '"' + // and all '‚', '‘', '›', '‹' with "\'" + static std::regex plainquotes { R"(„|“|»|«)" }; + static std::regex innerquotes { R"(‚|‘|›|‹)" }; + t = std::regex_replace(s, plainquotes, R"(")"); + s = std::regex_replace(t, innerquotes, R"(')"); + //LYXERR0("correctRegex output '" << s << "'"); + return s; +} /// Within \regexp{} apply get_lyx_unescapes() only (i.e., preserve regexp semantics of the string), /// while outside apply get_lyx_unescapes()+get_regexp_escapes(). /// If match_latex is true, then apply regexp_latex_escapes() to \regexp{} contents as well. -string escape_for_regex(string s, bool match_latex) -{ - size_t pos = 0; - while (pos < s.size()) { - size_t new_pos = s.find("\\regexp{", pos); - if (new_pos == string::npos) - new_pos = s.size(); - string t; - if (new_pos > pos) { - LYXERR(Debug::FIND, "new_pos: " << new_pos); - t = apply_escapes(s.substr(pos, new_pos - pos), get_lyx_unescapes()); - LYXERR(Debug::FIND, "t [lyx]: " << t); - t = apply_escapes(t, get_regexp_escapes()); - LYXERR(Debug::FIND, "t [rxp]: " << t); - s.replace(pos, new_pos - pos, t); - new_pos = pos + t.size(); - LYXERR(Debug::FIND, "Regexp after escaping: " << s); - LYXERR(Debug::FIND, "new_pos: " << new_pos); - if (new_pos == s.size()) - break; +string escape_for_regex(string s, bool withformat) +{ + size_t lastpos = 0; + string result = ""; + while (lastpos < s.size()) { + size_t regex_pos = s.find("\\regexp{", lastpos); + if (regex_pos == string::npos) { + regex_pos = s.size(); } - // Might fail if \\endregexp{} is preceeded by unexpected stuff (weird escapes) - size_t end_pos = s.find("\\endregexp{}}", new_pos + 8); - LYXERR(Debug::FIND, "end_pos: " << end_pos); - t = s.substr(new_pos + 8, end_pos - (new_pos + 8)); - LYXERR(Debug::FIND, "t in regexp : " << t); - t = apply_escapes(t, get_lyx_unescapes()); - LYXERR(Debug::FIND, "t in regexp [lyx]: " << t); - if (match_latex) { - t = apply_escapes(t, get_regexp_latex_escapes()); - LYXERR(Debug::FIND, "t in regexp [ltx]: " << t); - } - if (end_pos == s.size()) { - s.replace(new_pos, end_pos - new_pos, t); - LYXERR(Debug::FIND, "Regexp after \\regexp{} removal: " << s); - break; + if (regex_pos > lastpos) { + result += string2regex(s.substr(lastpos, regex_pos-lastpos)); + lastpos = regex_pos; + if (lastpos == s.size()) + break; } - s.replace(new_pos, end_pos + 13 - new_pos, t); - LYXERR(Debug::FIND, "Regexp after \\regexp{...\\endregexp{}} removal: " << s); - pos = new_pos + t.size(); - LYXERR(Debug::FIND, "pos: " << pos); + size_t end_pos = s.find("\\endregexp", regex_pos + 8); + result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)), withformat); + lastpos = end_pos + 13; } - return s; + return result; } @@ -783,84 +1054,76 @@ string escape_for_regex(string s, bool match_latex) bool regex_replace(string const & s, string & t, string const & searchstr, string const & replacestr) { - lyx::regex e(searchstr, regex_constants::ECMAScript); + regex e(searchstr, regex_constants::ECMAScript); ostringstream oss; ostream_iterator it(oss); - lyx::regex_replace(it, s.begin(), s.end(), e, replacestr); + regex_replace(it, s.begin(), s.end(), e, replacestr); // tolerate t and s be references to the same variable bool rv = (s != oss.str()); t = oss.str(); return rv; } - -/** Checks if supplied string segment is well-formed from the standpoint of matching open-closed braces. - ** - ** Verify that closed braces exactly match open braces. This avoids that, for example, - ** \frac{.*}{x} matches \frac{x+\frac{y}{x}}{z} with .* being 'x+\frac{y'. - ** - ** @param unmatched - ** Number of open braces that must remain open at the end for the verification to succeed. - **/ -bool braces_match(string::const_iterator const & beg, - string::const_iterator const & end, - int unmatched = 0) -{ - int open_pars = 0; - string::const_iterator it = beg; - LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << string(beg, end) << "'"); - for (; it != end; ++it) { - // Skip escaped braces in the count - if (*it == '\\') { - ++it; - if (it == end) - break; - } else if (*it == '{') { - ++open_pars; - } else if (*it == '}') { - if (open_pars == 0) { - LYXERR(Debug::FIND, "Found unmatched closed brace"); - return false; - } else - --open_pars; - } - } - if (open_pars != unmatched) { - LYXERR(Debug::FIND, "Found " << open_pars - << " instead of " << unmatched - << " unmatched open braces at the end of count"); - return false; - } - LYXERR(Debug::FIND, "Braces match as expected"); - return true; -} - - class MatchResult { public: + enum range { + newIsTooFar, + newIsBetter, + newIsInvalid + }; int match_len; + int match_prefix; int match2end; int pos; - MatchResult(): match_len(0),match2end(0), pos(0) {}; + int leadsize; + int pos_len; + int searched_size; + vector result = vector (); + MatchResult(int len = 0): match_len(len),match_prefix(0),match2end(0), pos(0),leadsize(0),pos_len(-1),searched_size(0) {} }; +static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres) +{ + if (newres.match2end < oldres.match2end) + return MatchResult::newIsTooFar; + if (newres.match_len < oldres.match_len) + return MatchResult::newIsTooFar; + + if (newres.match_len == oldres.match_len) { + if (newres.match2end == oldres.match2end) + return MatchResult::newIsBetter; + } + return MatchResult::newIsInvalid; +} + /** The class performing a match between a position in the document and the FindAdvOptions. **/ + class MatchStringAdv { public: - MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt); + MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt); /** Tests if text starting at the supplied position matches with the one provided to the MatchStringAdv ** constructor as opt.search, under the opt.* options settings. ** ** @param at_begin - ** If set, then match is searched only against beginning of text starting at cur. - ** If unset, then match is searched anywhere in text starting at cur. + ** If set to MatchStringAdv::MatchFromStart, + ** then match is searched only against beginning of text starting at cur. + ** Otherwise the match is searched anywhere in text starting at cur. ** ** @return ** The length of the matching text, or zero if no match was found. **/ - MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const; + enum matchType { + MatchAnyPlace, + MatchFromStart + }; + string matchTypeAsString(matchType const x) const { return (x == MatchFromStart ? "MatchFromStart" : "MatchAnyPlace"); } + MatchResult operator()(DocIterator const & cur, int len, matchType at_begin) const; +#if QTSEARCH + bool regexIsValid; + string regexError; +#endif public: /// buffer @@ -872,7 +1135,8 @@ public: private: /// Auxiliary find method (does not account for opt.matchword) - MatchResult findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const; + MatchResult findAux(DocIterator const & cur, int len, matchType at_begin) const; + void CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string = ""); /** Normalize a stringified or latexified LyX paragraph. ** @@ -887,46 +1151,130 @@ private: ** @todo Normalization should also expand macros, if the corresponding ** search option was checked. **/ - string normalize(docstring const & s, bool hack_braces) const; + string convertLF2Space(docstring const & s, bool ignore_fomat) const; // normalized string to search string par_as_string; // regular expression to use for searching - lyx::regex regexp; - // same as regexp, but prefixed with a ".*?" - lyx::regex regexp2; + // regexp2 is same as regexp, but prefixed with a ".*?" +#if QTSEARCH + QRegularExpression regexp; + QRegularExpression regexp2; +#else + regex regexp; + regex regexp2; +#endif // leading format material as string string lead_as_string; // par_as_string after removal of lead_as_string string par_as_string_nolead; // unmatched open braces in the search string/regexp - int open_braces; + int open_braces = 0; // number of (.*?) subexpressions added at end of search regexp for closing // environments, math mode, styles, etc... - int close_wildcards; + int close_wildcards = 0; +public: // Are we searching with regular expressions ? - bool use_regexp; + bool use_regexp = false; + static int valid_matches; + static vector matches; + void FillResults(MatchResult &found_mr); }; +int MatchStringAdv::valid_matches = 0; +vector MatchStringAdv::matches = vector (10); + +void MatchStringAdv::FillResults(MatchResult &found_mr) +{ + if (found_mr.match_len > 0) { + valid_matches = found_mr.result.size(); + for (size_t i = 0; i < found_mr.result.size(); i++) + matches[i] = found_mr.result[i]; + } else + valid_matches = 0; +} + +static void setFindParams(OutputParams &runparams) +{ + runparams.flavor = Flavor::XeTeX; + //runparams.use_polyglossia = true; + runparams.linelen = 10000; //lyxrc.plaintext_linelen; + // No side effect of file copying and image conversion + runparams.dryrun = true; +} static docstring buffer_to_latex(Buffer & buffer) { - OutputParams runparams(&buffer.params().encoding()); + //OutputParams runparams(&buffer.params().encoding()); + OutputParams runparams(encodings.fromLyXName("utf8")); odocstringstream ods; otexstream os(ods); runparams.nice = true; - runparams.flavor = OutputParams::LATEX; - runparams.linelen = 10000; //lyxrc.plaintext_linelen; - // No side effect of file copying and image conversion - runparams.dryrun = true; - runparams.for_search = true; + setFindParams(runparams); + if (ignoreFormats.getDeleted()) + runparams.find_set_feature(OutputParams::SearchWithoutDeleted); + else + runparams.find_set_feature(OutputParams::SearchWithDeleted); + if (ignoreFormats.getNonContent()) { + runparams.find_add_feature(OutputParams::SearchNonOutput); + } pit_type const endpit = buffer.paragraphs().size(); for (pit_type pit = 0; pit != endpit; ++pit) { - TeXOnePar(buffer, buffer.text(), pit, os, runparams); - LYXERR(Debug::FIND, "searchString up to here: " << ods.str()); + TeXOnePar(buffer, buffer.text(), pit, os, runparams, string(), -1, -1, true); + LYXERR(Debug::FINDVERBOSE, "searchString up to here: " << ods.str()); } return ods.str(); } +static string latexNamesToUtf8(docstring strIn, bool withformat) +{ + string addtmp = to_utf8(strIn); + static regex const rmAcc("(\\\\)*(" + "\\\\([A-Za-z]+\\{.\\})" // e.g. "ddot{A}" == sub.str(3) + "|\\\\([A-Za-z]+)( |\\{\\})?" // e.g. "LyX", "LyX{}", "LyX " == sub.str(4) + ")" + ); + size_t lastpos = 0; + smatch sub; + string replace; + string add(""); + if (accents.empty()) + buildAccentsMap(); + for (sregex_iterator it_add(addtmp.begin(), addtmp.end(), rmAcc), end; it_add != end; ++it_add) { + sub = *it_add; + if ((sub.position(2) - sub.position(0)) % 2 == 1) { + continue; + } + else { + string key; + if (sub.length(3) > 0) + key = sub.str(3); + else + key = sub.str(4); + AccentsIterator it_ac = accents.find(key); + if (it_ac == accents.end()) { + replace = sub.str(2); + } + else { + replace = it_ac->second; + } + } + if (lastpos < (size_t) sub.position(2)) + add += addtmp.substr(lastpos, sub.position(2) - lastpos); + add += replace; + lastpos = sub.position(2) + sub.length(2); + } + if (lastpos == 0) + add = addtmp; + else if (addtmp.length() > lastpos) + add += addtmp.substr(lastpos, addtmp.length() - lastpos); + if (!withformat) { + static std::regex repltilde { R"(~)" }; + add = std::regex_replace(add, repltilde, accents["lyxtilde"]); + } + LYXERR(Debug::FINDVERBOSE, "Adding to search string: '" + << add << "'"); + return add; +} static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions const & opt) { @@ -934,23 +1282,34 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co if (!opt.ignoreformat) { str = buffer_to_latex(buffer); } else { - OutputParams runparams(&buffer.params().encoding()); + // OutputParams runparams(&buffer.params().encoding()); + OutputParams runparams(encodings.fromLyXName("utf8")); runparams.nice = true; - runparams.flavor = OutputParams::LATEX; - runparams.linelen = 10000; //lyxrc.plaintext_linelen; - runparams.dryrun = true; - runparams.for_search = true; + setFindParams(runparams); + int option = AS_STR_INSETS |AS_STR_PLAINTEXT; + if (ignoreFormats.getDeleted()) { + option |= AS_STR_SKIPDELETE; + runparams.find_set_feature(OutputParams::SearchWithoutDeleted); + } + else { + runparams.find_set_feature(OutputParams::SearchWithDeleted); + } + if (ignoreFormats.getNonContent()) { + runparams.find_add_feature(OutputParams::SearchNonOutput); + } + string t(""); for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) { Paragraph const & par = buffer.paragraphs().at(pit); - LYXERR(Debug::FIND, "Adding to search string: '" - << par.asString(pos_type(0), par.size(), - AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, - &runparams) - << "'"); - str += par.asString(pos_type(0), par.size(), - AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, - &runparams); + string add = latexNamesToUtf8(par.asString(pos_type(0), par.size(), + option, + &runparams), !opt.ignoreformat); + LYXERR(Debug::FINDVERBOSE, "Adding to search string: '" + << add << "'"); + t += add; } + // Even in ignore-format we have to remove "\text{}, \lyxmathsym{}" parts + while (regex_replace(t, t, "\\\\(text|lyxmathsym|ensuremath)\\{([^\\}]*)\\}", "$2")); + str = from_utf8(t); } return str; } @@ -963,14 +1322,20 @@ static size_t identifyLeading(string const & s) // @TODO Support \item[text] // Kornel: Added textsl, textsf, textit, texttt and noun // + allow to search for colored text too - while (regex_replace(t, t, REGEX_BOS "\\\\(((footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|emph|noun|minisec|text(bf|md|sl|sf|it|tt))|((textcolor|foreignlanguage|latexenvironment)\\{[a-z]+\\*?\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part|chapter)\\*?)\\{", "") - || regex_replace(t, t, REGEX_BOS "\\$", "") - || regex_replace(t, t, REGEX_BOS "\\\\\\[ ", "") - || regex_replace(t, t, REGEX_BOS " ?\\\\item\\{[a-z]+\\}", "") - || regex_replace(t, t, REGEX_BOS "\\\\begin\\{[a-zA-Z_]*\\*?\\} ", "")) + while (regex_replace(t, t, "^\\\\((" + "(author|title|subtitle|subject|publishers|dedication|uppertitleback|lowertitleback|extratitle|" + "lyxaddress|lyxrightaddress|" + "footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|" + "emph|noun|minisec|text(bf|md|sl|sf|it|tt))|" + "((textcolor|foreignlanguage|latexenvironment)\\{[a-z]+\\*?\\})|" + "(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part|chapter)\\*?)\\{", "") + || regex_replace(t, t, "^\\$", "") + || regex_replace(t, t, "^\\\\\\[", "") + || regex_replace(t, t, "^ ?\\\\item\\{[a-z]+\\}", "") + || regex_replace(t, t, "^\\\\begin\\{[a-zA-Z_]*\\*?\\}", "")) ; - LYXERR(Debug::FIND, " after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'"); - return s.find(t); + LYXERR(Debug::FINDVERBOSE, " after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'"); + return s.size() - t.size(); } /* @@ -1020,169 +1385,185 @@ static Features identifyFeatures(string const & s) } } } - return(info); + return info; } /* * defines values features of a key "\\[a-z]+{" */ class KeyInfo { - public: - enum KeyType { - /* Char type with content discarded - * like \hspace{1cm} */ - noContent, - /* Char, like \backslash */ - isChar, - /* \part, \section*, ... */ - isSectioning, - /* title, author etc */ - isTitle, - /* \foreignlanguage{ngerman}, ... */ - isMain, - /* inside \code{} - * to discard language in content */ - noMain, - isRegex, - /* \begin{eqnarray}...\end{eqnarray}, ... $...$ */ - isMath, - /* fonts, colors, markups, ... */ - isStandard, - /* footnotesize, ... large, ... - * Ignore all of them */ - isSize, - invalid, - /* inputencoding, ... - * Discard also content, because they do not help in search */ - doRemove, - /* twocolumns, ... - * like remove, but also all arguments */ - removeWithArg, - /* item, listitem */ - isList, - /* tex, latex, ... like isChar */ - isIgnored, - /* like \lettrine[lines=5]{}{} */ - cleanToStart, - /* End of arguments marker for lettrine, - * so that they can be ignored */ - endArguments - }; - KeyInfo() - : keytype(invalid), - head(""), - _tokensize(-1), - _tokenstart(-1), - _dataStart(-1), - _dataEnd(-1), - parenthesiscount(1), - disabled(false), - used(false) - {}; - KeyInfo(KeyType type, int parcount, bool disable) - : keytype(type), - _tokensize(-1), - _tokenstart(-1), - _dataStart(-1), - _dataEnd(-1), - parenthesiscount(parcount), - disabled(disable), - used(false) {}; - KeyType keytype; - string head; - int _tokensize; - int _tokenstart; - int _dataStart; - int _dataEnd; - int parenthesiscount; - bool disabled; - bool used; /* by pattern */ +public: + enum KeyType { + /* Char type with content discarded + * like \hspace{1cm} */ + noContent, + /* Char, like \backslash */ + isChar, + /* replace starting backslash with '#' */ + isText, + /* \part, \section*, ... */ + isSectioning, + /* title, author etc */ + isTitle, + /* \foreignlanguage{ngerman}, ... */ + isMain, + /* inside \code{} + * to discard language in content */ + noMain, + isRegex, + /* \begin{eqnarray}...\end{eqnarray}, ... $...$ */ + isMath, + /* fonts, colors, markups, ... */ + isStandard, + /* footnotesize, ... large, ... + * Ignore all of them */ + isSize, + invalid, + /* inputencoding, ... + * Discard also content, because they do not help in search */ + doRemove, + /* twocolumns, ... + * like remove, but also all arguments */ + removeWithArg, + /* item, listitem */ + isList, + /* tex, latex, ... like isChar */ + isIgnored, + /* like \lettrine[lines=5]{}{} */ + cleanToStart, + // like isStandard, but always remove head + headRemove, + /* End of arguments marker for lettrine, + * so that they can be ignored */ + endArguments + }; + KeyInfo() = default; + KeyInfo(KeyType type, int parcount, bool disable) + : keytype(type), + parenthesiscount(parcount), + disabled(disable) {} + KeyType keytype = invalid; + string head; + int _tokensize = -1; + int _tokenstart = -1; + int _dataStart = -1; + int _dataEnd = -1; + int parenthesiscount = 1; + bool disabled = false; + bool used = false; /* by pattern */ }; class Border { - public: - Border(int l=0, int u=0) : low(l), upper(u) {}; - int low; - int upper; +public: + Border(int l=0, int u=0) : low(l), upper(u) {} + int low; + int upper; }; #define MAXOPENED 30 class Intervall { - bool isPatternString; - public: - explicit Intervall(bool isPattern) : - isPatternString(isPattern), - ignoreidx(-1), - actualdeptindex(0) { depts[0] = 0; closes[0] = 0;}; - string par; - int ignoreidx; - int depts[MAXOPENED]; - int closes[MAXOPENED]; - int actualdeptindex; - Border borders[2*MAXOPENED]; - int previousNotIgnored(int); - int nextNotIgnored(int); - void handleOpenP(int i); - void handleCloseP(int i, bool closingAllowed); - void resetOpenedP(int openPos); - void addIntervall(int upper); - void addIntervall(int low, int upper); /* if explicit */ - void setForDefaultLang(KeyInfo &defLang); - int findclosing(int start, int end, char up, char down, int repeat); - void handleParentheses(int lastpos, bool closingAllowed); - bool hasTitle; - int isOpeningPar(int pos); - string titleValue; - void output(ostringstream &os, int lastpos); - // string show(int lastpos); + bool isPatternString_; +public: + explicit Intervall(bool isPattern, string const & p) + : isPatternString_(isPattern), par(p), ignoreidx(-1), + actualdeptindex(0), hasTitle(false), langcount(0) + { + depts[0] = 0; + closes[0] = 0; + } + + string par; + int ignoreidx; + static vector borders; + static vector depts; + static vector closes; + int actualdeptindex; + int previousNotIgnored(int) const; + int nextNotIgnored(int) const; + void handleOpenP(int i); + void handleCloseP(int i, bool closingAllowed); + void resetOpenedP(int openPos); + void addIntervall(int upper); + void addIntervall(int low, int upper); /* if explicit */ + void removeAccents(); + void setForDefaultLang(KeyInfo const & defLang) const; + int findclosing(int start, int end, char up, char down, int repeat); + void removeInvalidClosings(void); + void handleParentheses(int lastpos, bool closingAllowed); + bool hasTitle; + // Number of disabled language specs up + // to current position in actual interval + int langcount; + int isOpeningPar(int pos) const; + string titleValue; + void output(ostringstream &os, int lastpos); + // string show(int lastpos); }; -int Intervall::isOpeningPar(int pos) +vector Intervall::borders = vector(30); +vector Intervall::depts = vector(30); +vector Intervall::closes = vector(30); + +int Intervall::isOpeningPar(int pos) const { - if ((pos < 0) || (size_t(pos) >= par.size())) - return 0; - if (par[pos] != '{') - return 0; - if (size_t(pos) + 2 >= par.size()) - return 1; - if (par[pos+2] != '}') - return 1; - if (par[pos+1] == '[' || par[pos+1] == ']') - return 3; - return 1; + if ((pos < 0) || (size_t(pos) >= par.size())) + return 0; + if (par[pos] != '{') + return 0; + if (size_t(pos) + 2 >= par.size()) + return 1; + if (par[pos+2] != '}') + return 1; + if (par[pos+1] == '[' || par[pos+1] == ']') + return 3; + return 1; } -void Intervall::setForDefaultLang(KeyInfo &defLang) +void Intervall::setForDefaultLang(KeyInfo const & defLang) const { - // Enable the use of first token again - if (ignoreidx >= 0) { - int value = defLang._tokenstart + defLang._tokensize; - if (value > 0) { - if (borders[0].low < value) - borders[0].low = value; - if (borders[0].upper < value) - borders[0].upper = value; - } - } + // Enable the use of first token again + if (ignoreidx >= 0) { + int value = defLang._tokenstart + defLang._tokensize; + int borderidx = 0; + if (hasTitle) + borderidx = 1; + if (value > 0) { + if (borders[borderidx].low < value) + borders[borderidx].low = value; + if (borders[borderidx].upper < value) + borders[borderidx].upper = value; + } + } } +#if 0 +// Not needed, because dpts and closes are now dynamically expanded static void checkDepthIndex(int val) { - static int maxdepthidx = MAXOPENED-2; - if (val > maxdepthidx) { - maxdepthidx = val; - LYXERR0("maxdepthidx now " << val); - } + static int maxdepthidx = MAXOPENED-2; + static int lastmaxdepth = 0; + if (val > lastmaxdepth) { + LYXERR(Debug::INFO, "Depth reached " << val); + lastmaxdepth = val; + } + if (val > maxdepthidx) { + maxdepthidx = val; + LYXERR(Debug::INFO, "maxdepthidx now " << val); + } } +#endif +#if 0 +// Not needed, because borders are now dynamically expanded static void checkIgnoreIdx(int val) { - static int maxignoreidx = 2*MAXOPENED - 4; - if (val > maxignoreidx) { - maxignoreidx = val; - LYXERR0("maxignoreidx now " << val); - } + static int lastmaxignore = -1; + if ((lastmaxignore < val) && (size_t(val+1) >= borders.size())) { + LYXERR(Debug::INFO, "IgnoreIdx reached " << val); + lastmaxignore = val; + } } +#endif /* * Expand the region of ignored parts of the input latex string @@ -1190,732 +1571,1370 @@ static void checkIgnoreIdx(int val) */ void Intervall::addIntervall(int low, int upper) { - int idx; - if (low == upper) return; - for (idx = ignoreidx+1; idx > 0; --idx) { - if (low > borders[idx-1].upper) { - break; - } - } - Border br(low, upper); - if (idx > ignoreidx) { - borders[idx] = br; - ignoreidx = idx; - checkIgnoreIdx(ignoreidx); - return; - } - else { - // Expand only if one of the new bound is inside the interwall - // We know here that br.low > borders[idx-1].upper - if (br.upper < borders[idx].low) { - // We have to insert at this pos - for (int i = ignoreidx+1; i > idx; --i) { - borders[i] = borders[i-1]; - } - borders[idx] = br; - ignoreidx += 1; - checkIgnoreIdx(ignoreidx); - return; - } - // Here we know, that we are overlapping - if (br.low > borders[idx].low) - br.low = borders[idx].low; - // check what has to be concatenated - int count = 0; - for (int i = idx; i <= ignoreidx; i++) { - if (br.upper >= borders[i].low) { - count++; - if (br.upper < borders[i].upper) - br.upper = borders[i].upper; - } - else { - break; - } - } - // count should be >= 1 here - borders[idx] = br; - if (count > 1) { - for (int i = idx + count; i <= ignoreidx; i++) { - borders[i-count+1] = borders[i]; - } - ignoreidx -= count - 1; - return; - } - } + int idx; + if (low == upper) return; + for (idx = ignoreidx+1; idx > 0; --idx) { + if (low > borders[idx-1].upper) { + break; + } + } + Border br(low, upper); + if (idx > ignoreidx) { + if (borders.size() <= size_t(idx)) { + borders.push_back(br); + } + else { + borders[idx] = br; + } + ignoreidx = idx; + // checkIgnoreIdx(ignoreidx); + return; + } + else { + // Expand only if one of the new bound is inside the interwall + // We know here that br.low > borders[idx-1].upper + if (br.upper < borders[idx].low) { + // We have to insert at this pos + if (size_t(ignoreidx+1) >= borders.size()) { + borders.push_back(borders[ignoreidx]); + } + else { + borders[ignoreidx+1] = borders[ignoreidx]; + } + for (int i = ignoreidx; i > idx; --i) { + borders[i] = borders[i-1]; + } + borders[idx] = br; + ignoreidx += 1; + // checkIgnoreIdx(ignoreidx); + return; + } + // Here we know, that we are overlapping + if (br.low > borders[idx].low) + br.low = borders[idx].low; + // check what has to be concatenated + int count = 0; + for (int i = idx; i <= ignoreidx; i++) { + if (br.upper >= borders[i].low) { + count++; + if (br.upper < borders[i].upper) + br.upper = borders[i].upper; + } + else { + break; + } + } + // count should be >= 1 here + borders[idx] = br; + if (count > 1) { + for (int i = idx + count; i <= ignoreidx; i++) { + borders[i-count+1] = borders[i]; + } + ignoreidx -= count - 1; + return; + } + } } -void Intervall::handleOpenP(int i) +static void buildaccent(string n, string param, string values) { - actualdeptindex++; - depts[actualdeptindex] = i+1; - closes[actualdeptindex] = -1; - checkDepthIndex(actualdeptindex); + stringstream s(n); + string name; + const char delim = '|'; + while (getline(s, name, delim)) { + size_t start = 0; + for (char c : param) { + string key = name + "{" + c + "}"; + // get the corresponding utf8-value + if ((values[start] & 0xc0) != 0xc0) { + // should not happen, utf8 encoding starts at least with 11xxxxxx + // but value for '\dot{i}' is 'i', which is ascii + if ((values[start] & 0x80) == 0) { + // is ascii + accents[key] = values.substr(start, 1); + // LYXERR(Debug::INFO, "" << key << "=" << accents[key]); + } + start++; + continue; + } + for (int j = 1; ;j++) { + if (start + j >= values.size()) { + accents[key] = values.substr(start, j); + start = values.size() - 1; + break; + } + else if ((values[start+j] & 0xc0) != 0x80) { + // This is the first byte of following utf8 char + accents[key] = values.substr(start, j); + start += j; + // LYXERR(Debug::INFO, "" << key << "=" << accents[key]); + break; + } + } + } + } } -void Intervall::handleCloseP(int i, bool closingAllowed) +// Helper function +static string getutf8(unsigned uchar) { - if (actualdeptindex <= 0) { - if (! closingAllowed) - LYXERR(Debug::FIND, "Bad closing parenthesis in latex"); /* should not happen, but the latex input may be wrong */ - // if we are at the very end - addIntervall(i, i+1); - } - else { - closes[actualdeptindex] = i+1; - actualdeptindex--; - } -} - +#define maxc 5 + string ret = string(); + char c[maxc] = {0}; + if (uchar <= 0x7f) { + c[maxc-1] = uchar & 0x7f; + } + else { + unsigned char rest = 0x40; + unsigned char first = 0x80; + int start = maxc-1; + for (int i = start; i >=0; --i) { + if (uchar < rest) { + c[i] = first + uchar; + break; + } + c[i] = 0x80 | (uchar & 0x3f); + uchar >>= 6; + rest >>= 1; + first >>= 1; + first |= 0x80; + } + } + for (int i = 0; i < maxc; i++) { + if (c[i] == 0) continue; + ret += c[i]; + } + return(ret); +} + +static void addAccents(string latex_in, string unicode_out) +{ + latex_in = latex_in.substr(1); + AccentsIterator it_ac = accents.find(latex_in); + if (it_ac == accents.end()) { + accents[latex_in] = unicode_out; + } + else { + LYXERR0("Key " << latex_in << " already set"); + } +} + +void static fillMissingUnicodesymbols() +{ + addAccents("\\pounds", getutf8(0x00a3)); + addAccents("\\textsterling", getutf8(0x00a3)); + addAccents("\\textyen", getutf8(0x00a5)); + addAccents("\\yen", getutf8(0x00a5)); + addAccents("\\textsection", getutf8(0x00a7)); + addAccents("\\mathsection", getutf8(0x00a7)); + addAccents("\\textcopyright", getutf8(0x00a9)); + addAccents("\\copyright", getutf8(0x00a9)); + addAccents("\\textlnot", getutf8(0x00ac)); + addAccents("\\neg", getutf8(0x00ac)); + addAccents("\\textregistered", getutf8(0x00ae)); + addAccents("\\circledR", getutf8(0x00ae)); + addAccents("\\textpm", getutf8(0x00b1)); + addAccents("\\pm", getutf8(0x00b1)); + addAccents("\\textparagraph", getutf8(0x00b6)); + addAccents("\\mathparagraph", getutf8(0x00b6)); + addAccents("\\textperiodcentered", getutf8(0x00b7)); + addAccents("\\texttimes", getutf8(0x00d7)); + addAccents("\\times", getutf8(0x00d7)); + addAccents("\\O", getutf8(0x00d8)); + addAccents("\\dh", getutf8(0x00f0)); + addAccents("\\eth", getutf8(0x00f0)); + addAccents("\\textdiv", getutf8(0x00f7)); + addAccents("\\div", getutf8(0x00f7)); + addAccents("\\o", getutf8(0x00f8)); + addAccents("\\textcrlambda", getutf8(0x019b)); + addAccents("\\j", getutf8(0x0237)); + addAccents("\\textrevepsilon", getutf8(0x025c)); + addAccents("\\textbaru", getutf8(0x0289)); + addAccents("\\textquoteleft", getutf8(0x02bb)); + addAccents("\\textGamma", getutf8(0x0393)); + addAccents("\\Gamma", getutf8(0x0393)); + addAccents("\\textDelta", getutf8(0x0394)); + addAccents("\\Delta", getutf8(0x0394)); + addAccents("\\textTheta", getutf8(0x0398)); + addAccents("\\Theta", getutf8(0x0398)); + addAccents("\\textLambda", getutf8(0x039b)); + addAccents("\\Lambda", getutf8(0x039b)); + addAccents("\\textXi", getutf8(0x039e)); + addAccents("\\Xi", getutf8(0x039e)); + addAccents("\\textPi", getutf8(0x03a0)); + addAccents("\\Pi", getutf8(0x03a0)); + addAccents("\\textSigma", getutf8(0x03a3)); + addAccents("\\Sigma", getutf8(0x03a3)); + addAccents("\\textUpsilon", getutf8(0x03a5)); + addAccents("\\Upsilon", getutf8(0x03a5)); + addAccents("\\textPhi", getutf8(0x03a6)); + addAccents("\\Phi", getutf8(0x03a6)); + addAccents("\\textPsi", getutf8(0x03a8)); + addAccents("\\Psi", getutf8(0x03a8)); + addAccents("\\textOmega", getutf8(0x03a9)); + addAccents("\\Omega", getutf8(0x03a9)); + addAccents("\\textalpha", getutf8(0x03b1)); + addAccents("\\alpha", getutf8(0x03b1)); + addAccents("\\textbeta", getutf8(0x03b2)); + addAccents("\\beta", getutf8(0x03b2)); + addAccents("\\textgamma", getutf8(0x03b3)); + addAccents("\\gamma", getutf8(0x03b3)); + addAccents("\\textdelta", getutf8(0x03b4)); + addAccents("\\delta", getutf8(0x03b4)); + addAccents("\\textepsilon", getutf8(0x03b5)); + addAccents("\\varepsilon", getutf8(0x03b5)); + addAccents("\\textzeta", getutf8(0x03b6)); + addAccents("\\zeta", getutf8(0x03b6)); + addAccents("\\texteta", getutf8(0x03b7)); + addAccents("\\eta", getutf8(0x03b7)); + addAccents("\\texttheta", getutf8(0x03b8)); + addAccents("\\theta", getutf8(0x03b8)); + addAccents("\\textiota", getutf8(0x03b9)); + addAccents("\\iota", getutf8(0x03b9)); + addAccents("\\textkappa", getutf8(0x03ba)); + addAccents("\\kappa", getutf8(0x03ba)); + addAccents("\\textlambda", getutf8(0x03bb)); + addAccents("\\lambda", getutf8(0x03bb)); + addAccents("\\textmu", getutf8(0x03bc)); + addAccents("\\mu", getutf8(0x03bc)); + addAccents("\\textnu", getutf8(0x03bd)); + addAccents("\\nu", getutf8(0x03bd)); + addAccents("\\textxi", getutf8(0x03be)); + addAccents("\\xi", getutf8(0x03be)); + addAccents("\\textpi", getutf8(0x03c0)); + addAccents("\\pi", getutf8(0x03c0)); + addAccents("\\textrho", getutf8(0x03c1)); + addAccents("\\rho", getutf8(0x03c1)); + addAccents("\\textfinalsigma", getutf8(0x03c2)); + addAccents("\\varsigma", getutf8(0x03c2)); + addAccents("\\textsigma", getutf8(0x03c3)); + addAccents("\\sigma", getutf8(0x03c3)); + addAccents("\\texttau", getutf8(0x03c4)); + addAccents("\\tau", getutf8(0x03c4)); + addAccents("\\textupsilon", getutf8(0x03c5)); + addAccents("\\upsilon", getutf8(0x03c5)); + addAccents("\\textphi", getutf8(0x03c6)); + addAccents("\\varphi", getutf8(0x03c6)); + addAccents("\\textchi", getutf8(0x03c7)); + addAccents("\\chi", getutf8(0x03c7)); + addAccents("\\textpsi", getutf8(0x03c8)); + addAccents("\\psi", getutf8(0x03c8)); + addAccents("\\textomega", getutf8(0x03c9)); + addAccents("\\omega", getutf8(0x03c9)); + addAccents("\\textdigamma", getutf8(0x03dd)); + addAccents("\\digamma", getutf8(0x03dd)); + addAccents("\\hebalef", getutf8(0x05d0)); + addAccents("\\aleph", getutf8(0x05d0)); + addAccents("\\hebbet", getutf8(0x05d1)); + addAccents("\\beth", getutf8(0x05d1)); + addAccents("\\hebgimel", getutf8(0x05d2)); + addAccents("\\gimel", getutf8(0x05d2)); + addAccents("\\hebdalet", getutf8(0x05d3)); + addAccents("\\daleth", getutf8(0x05d3)); + addAccents("\\hebhe", getutf8(0x05d4)); + addAccents("\\hebvav", getutf8(0x05d5)); + addAccents("\\hebzayin", getutf8(0x05d6)); + addAccents("\\hebhet", getutf8(0x05d7)); + addAccents("\\hebtet", getutf8(0x05d8)); + addAccents("\\hebyod", getutf8(0x05d9)); + addAccents("\\hebfinalkaf", getutf8(0x05da)); + addAccents("\\hebkaf", getutf8(0x05db)); + addAccents("\\heblamed", getutf8(0x05dc)); + addAccents("\\hebfinalmem", getutf8(0x05dd)); + addAccents("\\hebmem", getutf8(0x05de)); + addAccents("\\hebfinalnun", getutf8(0x05df)); + addAccents("\\hebnun", getutf8(0x05e0)); + addAccents("\\hebsamekh", getutf8(0x05e1)); + addAccents("\\hebayin", getutf8(0x05e2)); + addAccents("\\hebfinalpe", getutf8(0x05e3)); + addAccents("\\hebpe", getutf8(0x05e4)); + addAccents("\\hebfinaltsadi", getutf8(0x05e5)); + addAccents("\\hebtsadi", getutf8(0x05e6)); + addAccents("\\hebqof", getutf8(0x05e7)); + addAccents("\\hebresh", getutf8(0x05e8)); + addAccents("\\hebshin", getutf8(0x05e9)); + addAccents("\\hebtav", getutf8(0x05ea)); + + // Thai characters + addAccents("\\thaiKoKai", getutf8(0x0e01)); + addAccents("\\thaiKhoKhai", getutf8(0x0e02)); + addAccents("\\thaiKhoKhuat", getutf8(0x0e03)); + addAccents("\\thaiKhoKhwai", getutf8(0x0e04)); + addAccents("\\thaiKhoKhon", getutf8(0x0e05)); + addAccents("\\thaiKhoRakhang", getutf8(0x0e06)); + addAccents("\\thaiNgoNgu", getutf8(0x0e07)); + addAccents("\\thaiChoChan", getutf8(0x0e08)); + addAccents("\\thaiChoChing", getutf8(0x0e09)); + addAccents("\\thaiChoChang", getutf8(0x0e0a)); + addAccents("\\thaiSoSo", getutf8(0x0e0b)); + addAccents("\\thaiChoChoe", getutf8(0x0e0c)); + addAccents("\\thaiYoYing", getutf8(0x0e0d)); + addAccents("\\thaiDoChada", getutf8(0x0e0e)); + addAccents("\\thaiToPatak", getutf8(0x0e0f)); + addAccents("\\thaiThoThan", getutf8(0x0e10)); + addAccents("\\thaiThoNangmontho", getutf8(0x0e11)); + addAccents("\\thaiThoPhuthao", getutf8(0x0e12)); + addAccents("\\thaiNoNen", getutf8(0x0e13)); + addAccents("\\thaiDoDek", getutf8(0x0e14)); + addAccents("\\thaiToTao", getutf8(0x0e15)); + addAccents("\\thaiThoThung", getutf8(0x0e16)); + addAccents("\\thaiThoThahan", getutf8(0x0e17)); + addAccents("\\thaiThoThong", getutf8(0x0e18)); + addAccents("\\thaiNoNu", getutf8(0x0e19)); + addAccents("\\thaiBoBaimai", getutf8(0x0e1a)); + addAccents("\\thaiPoPla", getutf8(0x0e1b)); + addAccents("\\thaiPhoPhung", getutf8(0x0e1c)); + addAccents("\\thaiFoFa", getutf8(0x0e1d)); + addAccents("\\thaiPhoPhan", getutf8(0x0e1e)); + addAccents("\\thaiFoFan", getutf8(0x0e1f)); + addAccents("\\thaiPhoSamphao", getutf8(0x0e20)); + addAccents("\\thaiMoMa", getutf8(0x0e21)); + addAccents("\\thaiYoYak", getutf8(0x0e22)); + addAccents("\\thaiRoRua", getutf8(0x0e23)); + addAccents("\\thaiRu", getutf8(0x0e24)); + addAccents("\\thaiLoLing", getutf8(0x0e25)); + addAccents("\\thaiLu", getutf8(0x0e26)); + addAccents("\\thaiWoWaen", getutf8(0x0e27)); + addAccents("\\thaiSoSala", getutf8(0x0e28)); + addAccents("\\thaiSoRusi", getutf8(0x0e29)); + addAccents("\\thaiSoSua", getutf8(0x0e2a)); + addAccents("\\thaiHoHip", getutf8(0x0e2b)); + addAccents("\\thaiLoChula", getutf8(0x0e2c)); + addAccents("\\thaiOAng", getutf8(0x0e2d)); + addAccents("\\thaiHoNokhuk", getutf8(0x0e2e)); + addAccents("\\thaiPaiyannoi", getutf8(0x0e2f)); + addAccents("\\thaiSaraA", getutf8(0x0e30)); + addAccents("\\thaiMaiHanakat", getutf8(0x0e31)); + addAccents("\\thaiSaraAa", getutf8(0x0e32)); + addAccents("\\thaiSaraAm", getutf8(0x0e33)); + addAccents("\\thaiSaraI", getutf8(0x0e34)); + addAccents("\\thaiSaraIi", getutf8(0x0e35)); + addAccents("\\thaiSaraUe", getutf8(0x0e36)); + addAccents("\\thaiSaraUee", getutf8(0x0e37)); + addAccents("\\thaiSaraU", getutf8(0x0e38)); + addAccents("\\thaiSaraUu", getutf8(0x0e39)); + addAccents("\\thaiPhinthu", getutf8(0x0e3a)); + addAccents("\\thaiSaraE", getutf8(0x0e40)); + addAccents("\\thaiSaraAe", getutf8(0x0e41)); + addAccents("\\thaiSaraO", getutf8(0x0e42)); + addAccents("\\thaiSaraAiMaimuan", getutf8(0x0e43)); + addAccents("\\thaiSaraAiMaimalai", getutf8(0x0e44)); + addAccents("\\thaiLakkhangyao", getutf8(0x0e45)); + addAccents("\\thaiMaiyamok", getutf8(0x0e46)); + addAccents("\\thaiMaitaikhu", getutf8(0x0e47)); + addAccents("\\thaiMaiEk", getutf8(0x0e48)); + addAccents("\\thaiMaiTho", getutf8(0x0e49)); + addAccents("\\thaiMaiTri", getutf8(0x0e4a)); + addAccents("\\thaiMaiChattawa", getutf8(0x0e4b)); + addAccents("\\thaiThanthakhat", getutf8(0x0e4c)); + addAccents("\\thaiNikhahit", getutf8(0x0e4d)); + addAccents("\\thaiYamakkan", getutf8(0x0e4e)); + addAccents("\\thaiFongman", getutf8(0x0e4f)); + addAccents("\\thaizero", getutf8(0x0e50)); + addAccents("\\thaione", getutf8(0x0e51)); + addAccents("\\thaitwo", getutf8(0x0e52)); + addAccents("\\thaithree", getutf8(0x0e53)); + addAccents("\\thaifour", getutf8(0x0e54)); + addAccents("\\thaifive", getutf8(0x0e55)); + addAccents("\\thaisix", getutf8(0x0e56)); + addAccents("\\thaiseven", getutf8(0x0e57)); + addAccents("\\thaieight", getutf8(0x0e58)); + addAccents("\\thainine", getutf8(0x0e59)); + addAccents("\\thaiAngkhankhu", getutf8(0x0e5a)); + addAccents("\\thaiKhomut", getutf8(0x0e5b)); + addAccents("\\dag", getutf8(0x2020)); + addAccents("\\dagger", getutf8(0x2020)); + addAccents("\\textdagger", getutf8(0x2020)); + addAccents("\\ddag", getutf8(0x2021)); + addAccents("\\ddagger", getutf8(0x2021)); + addAccents("\\textdaggerdbl", getutf8(0x2021)); + addAccents("\\textbullet", getutf8(0x2022)); + addAccents("\\bullet", getutf8(0x2022)); + addAccents("\\dots", getutf8(0x2026)); + addAccents("\\ldots", getutf8(0x2026)); + addAccents("\\textellipsis", getutf8(0x2026)); + addAccents("\\textasciiacute", getutf8(0x2032)); + addAccents("\\prime", getutf8(0x2032)); + addAccents("\\textacutedbl", getutf8(0x2033)); + addAccents("\\dprime", getutf8(0x2033)); + addAccents("\\textasciigrave", getutf8(0x2035)); + addAccents("\\backprime", getutf8(0x2035)); + addAccents("\\textsubcircum{ }", getutf8(0x2038)); + addAccents("\\caretinsert", getutf8(0x2038)); + addAccents("\\textasteriskcentered", getutf8(0x204e)); + addAccents("\\ast", getutf8(0x204e)); + addAccents("\\textmho", getutf8(0x2127)); + addAccents("\\mho", getutf8(0x2127)); + addAccents("\\textleftarrow", getutf8(0x2190)); + addAccents("\\leftarrow", getutf8(0x2190)); + addAccents("\\textuparrow", getutf8(0x2191)); + addAccents("\\uparrow", getutf8(0x2191)); + addAccents("\\textrightarrow", getutf8(0x2192)); + addAccents("\\rightarrow", getutf8(0x2192)); + addAccents("\\textdownarrow", getutf8(0x2193)); + addAccents("\\downarrow", getutf8(0x2193)); + addAccents("\\textglobrise", getutf8(0x2197)); + addAccents("\\nearrow", getutf8(0x2197)); + addAccents("\\textglobfall", getutf8(0x2198)); + addAccents("\\searrow", getutf8(0x2198)); + addAccents("\\textsurd", getutf8(0x221a)); + addAccents("\\surd", getutf8(0x221a)); + addAccents("\\textbigcircle", getutf8(0x25ef)); + addAccents("\\bigcirc", getutf8(0x25ef)); + addAccents("\\FiveStar", getutf8(0x2605)); + addAccents("\\bigstar", getutf8(0x2605)); + addAccents("\\FiveStarOpen", getutf8(0x2606)); + addAccents("\\bigwhitestar", getutf8(0x2606)); + addAccents("\\Checkmark", getutf8(0x2713)); + addAccents("\\checkmark", getutf8(0x2713)); + addAccents("\\CrossMaltese", getutf8(0x2720)); + addAccents("\\maltese", getutf8(0x2720)); + addAccents("\\textlangle", getutf8(0x27e8)); + addAccents("\\langle", getutf8(0x27e8)); + addAccents("\\textrangle", getutf8(0x27e9)); + addAccents("\\rangle", getutf8(0x27e9)); +} + +static void buildAccentsMap() +{ + accents["imath"] = "ı"; + accents["i"] = "ı"; + accents["jmath"] = "È·"; + accents["cdot"] = "·"; + accents["textasciicircum"] = "^"; + accents["mathcircumflex"] = "^"; + accents["guillemotright"] = "»"; + accents["guillemotleft"] = "«"; + accents["hairspace"] = getutf8(0xf0000); // select from free unicode plane 15 + accents["thinspace"] = getutf8(0xf0002); // and used _only_ by findadv + accents["negthinspace{}"]= getutf8(0xf0003); // to omit backslashed latex macros + accents["medspace"] = getutf8(0xf0004); // See https://en.wikipedia.org/wiki/Private_Use_Areas + accents["negmedspace{}"] = getutf8(0xf0005); + accents["thickspace"] = getutf8(0xf0006); + accents["negthickspace{}"]= getutf8(0xf0007); + accents["lyx"] = getutf8(0xf0010); // Used logos + accents["LyX"] = getutf8(0xf0010); + accents["tex"] = getutf8(0xf0011); + accents["TeX"] = getutf8(0xf0011); + accents["latex"] = getutf8(0xf0012); + accents["LaTeX"] = getutf8(0xf0012); + accents["latexe"] = getutf8(0xf0013); + accents["LaTeXe"] = getutf8(0xf0013); + accents["lyxarrow"] = getutf8(0xf0020); + accents["braceleft"] = getutf8(0xf0030); + accents["braceright"] = getutf8(0xf0031); + accents["lyxtilde"] = getutf8(0xf0032); + accents["sim"] = getutf8(0xf0032); + accents["lyxdollar"] = getutf8(0xf0033); + accents["backslash lyx"] = getutf8(0xf0010); // Used logos inserted with starting \backslash + accents["backslash LyX"] = getutf8(0xf0010); + accents["backslash tex"] = getutf8(0xf0011); + accents["backslash TeX"] = getutf8(0xf0011); + accents["backslash latex"] = getutf8(0xf0012); + accents["backslash LaTeX"] = getutf8(0xf0012); + accents["backslash latexe"] = getutf8(0xf0013); + accents["backslash LaTeXe"] = getutf8(0xf0013); + accents["backslash lyxarrow"] = getutf8(0xf0020); + accents["ddot{\\imath}"] = "ï"; + buildaccent("ddot", "aAeEhHiIoOtuUwWxXyY", + "äÄëËḧḦïÏöÖẗüÜẅẄẍẌÿŸ"); // umlaut + buildaccent("dot|.", "aAbBcCdDeEfFGghHIimMnNoOpPrRsStTwWxXyYzZ", + "ȧȦḃḂċĊḋḊėĖḟḞĠġḣḢİİṁṀṅṄȯȮṗṖṙṘṡṠṫṪẇẆẋẊẏẎżŻ"); // dot{i} can only happen if ignoring case, but there is no lowercase of 'Ä°' + accents["acute{\\imath}"] = "í"; + buildaccent("acute", "aAcCeEgGkKlLmMoOnNpPrRsSuUwWyYzZiI", + "áÁćĆéÉǵǴḱḰĺĹḿḾóÓńŃṕṔŕŔśŚúÚẃẂýÝźŹíÍ"); + buildaccent("dacute|H|h", "oOuU", "őŐűŰ"); // double acute + buildaccent("mathring|r", "aAuUwy", + "åÅůŮẘẙ"); // ring + accents["check{\\imath}"] = "ǐ"; + accents["check{\\jmath}"] = "Ç°"; + buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTtzZ", + "čČďĎǎǍěĚǐǏǒǑǔǓǧǦǩǨȟȞľĽňŇřŘšŠŤťžŽ"); // caron + accents["hat{\\imath}"] = "î"; + accents["hat{\\jmath}"] = "ĵ"; + buildaccent("hat|^", "aAcCeEgGhHiIjJoOsSuUwWyYzZ", + "âÂĉĈêÊĝĜĥĤîÎĵĴôÔŝŜûÛŵŴŷŶẑẐ"); // circ + accents["bar{\\imath}"] = "Ä«"; + buildaccent("bar|=", "aAeEiIoOuUyY", + "āĀēĒīĪōŌūŪȳȲ"); // macron + accents["tilde{\\imath}"] = "Ä©"; + buildaccent("tilde", "aAeEiInNoOuUvVyY", + "ãÃẽẼĩĨñÑõÕũŨṽṼỹỸ"); // tilde + accents["breve{\\imath}"] = "Ä­"; + buildaccent("breve|u", "aAeEgGiIoOuU", + "ăĂĕĔğĞĭĬŏŎŭŬ"); // breve + accents["grave{\\imath}"] = "ì"; + buildaccent("grave|`", "aAeEiIoOuUnNwWyY", + "àÀèÈìÌòÒùÙǹǸẁẀỳỲ"); // grave + buildaccent("subdot|d", "BbDdHhKkLlMmNnRrSsTtVvWwZzAaEeIiOoUuYy", + "ḄḅḌḍḤḥḲḳḶḷṂṃṆṇṚṛṢṣṬṭṾṿẈẉẒẓẠạẸẹỊịỌọỤụỴỵ"); // dot below + buildaccent("ogonek|k", "AaEeIiUuOo", + "ĄąĘęĮįŲųǪǫ"); // ogonek + buildaccent("cedilla|c", "CcGgKkLlNnRrSsTtEeDdHh", + "ÇçĢģĶķĻļŅņŖŗŞşŢţȨȩḐḑḨḩ"); // cedilla + buildaccent("subring|textsubring", "Aa", + "Ḁḁ"); // subring + buildaccent("subhat|textsubcircum", "DdEeLlNnTtUu", + "ḒḓḘḙḼḽṊṋṰṱṶṷ"); // subcircum + buildaccent("subtilde|textsubtilde", "EeIiUu", + "ḚḛḬḭṴṵ"); // subtilde + accents["dgrave{\\imath}"] = "ȉ"; + accents["textdoublegrave{\\i}"] = "ȉ"; + buildaccent("dgrave|textdoublegrave", "AaEeIiOoRrUu", + "ȀȁȄȅȈȉȌȍȐȑȔȕ"); // double grave + accents["rcap{\\imath}"] = "ȋ"; + accents["textroundcap{\\i}"] = "ȋ"; + buildaccent("rcap|textroundcap", "AaEeIiOoRrUu", + "ȂȃȆȇȊȋȎȏȒȓȖȗ"); // inverted breve + buildaccent("slashed", "oO", + "øØ"); // slashed + fillMissingUnicodesymbols(); // Add some still not handled entries contained in 'unicodesynbols' + // LYXERR0("Number of accents " << accents.size()); +} + +/* + * Created accents in math or regexp environment + * are macros, but we need the utf8 equivalent + */ +void Intervall::removeAccents() +{ + if (accents.empty()) + buildAccentsMap(); + static regex const accre("\\\\(" + "([\\S]|[A-Za-z]+)\\{[^\\\\\\{\\}]*\\}" + "|([\\S]|[A-Za-z]+)\\{\\\\[ij](math)?\\}" + "|(" + "(backslash ([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))" + "|[A-Za-z]+" + ")" + "(?![a-zA-Z]))"); + smatch sub; + for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) { + sub = *itacc; + string key = sub.str(1); + AccentsIterator it_ac = accents.find(key); + if (it_ac != accents.end()) { + string val = it_ac->second; + size_t pos = sub.position(size_t(0)); + for (size_t i = 0; i < val.size(); i++) { + par[pos+i] = val[i]; + } + // Remove possibly following space too + if (par[pos+sub.str(0).size()] == ' ') + addIntervall(pos+val.size(), pos + sub.str(0).size()+1); + else + addIntervall(pos+val.size(), pos + sub.str(0).size()); + for (size_t i = pos+val.size(); i < pos + sub.str(0).size(); i++) { + // remove traces of any remaining chars + par[i] = ' '; + } + } + else { + LYXERR(Debug::INFO, "Not added accent for \"" << key << "\""); + } + } +} + +void Intervall::handleOpenP(int i) +{ + actualdeptindex++; + if ((size_t) actualdeptindex >= depts.size()) { + depts.resize(actualdeptindex + 30); + closes.resize(actualdeptindex + 30); + } + depts[actualdeptindex] = i+1; + closes[actualdeptindex] = -1; + // checkDepthIndex(actualdeptindex); +} + +void Intervall::handleCloseP(int i, bool closingAllowed) +{ + if (actualdeptindex <= 0) { + if (! closingAllowed) + LYXERR(Debug::FINDVERBOSE, "Bad closing parenthesis in latex"); /* should not happen, but the latex input may be wrong */ + // if we are at the very end + addIntervall(i, i+1); + } + else { + closes[actualdeptindex] = i+1; + actualdeptindex--; + } +} + void Intervall::resetOpenedP(int openPos) { - // Used as initializer for foreignlanguage entry - actualdeptindex = 1; - depts[1] = openPos+1; - closes[1] = -1; + // Used as initializer for foreignlanguage entry + actualdeptindex = 1; + depts[1] = openPos+1; + closes[1] = -1; } -int Intervall::previousNotIgnored(int start) +int Intervall::previousNotIgnored(int start) const { - int idx = 0; /* int intervalls */ - for (idx = ignoreidx; idx >= 0; --idx) { - if (start > borders[idx].upper) - return start; - if (start >= borders[idx].low) - start = borders[idx].low-1; - } - return start; + int idx = 0; /* int intervalls */ + for (idx = ignoreidx; idx >= 0; --idx) { + if (start > borders[idx].upper) + return start; + if (start >= borders[idx].low) + start = borders[idx].low-1; + } + return start; } -int Intervall::nextNotIgnored(int start) +int Intervall::nextNotIgnored(int start) const { - int idx = 0; /* int intervalls */ - for (idx = 0; idx <= ignoreidx; idx++) { - if (start < borders[idx].low) - return start; - if (start < borders[idx].upper) - start = borders[idx].upper; - } - return start; + int idx = 0; /* int intervalls */ + for (idx = 0; idx <= ignoreidx; idx++) { + if (start < borders[idx].low) + return start; + if (start < borders[idx].upper) + start = borders[idx].upper; + } + return start; } -typedef map KeysMap; +typedef unordered_map KeysMap; +typedef unordered_map::const_iterator KeysIterator; typedef vector< KeyInfo> Entries; -static KeysMap keys = map(); +static KeysMap keys = unordered_map(); class LatexInfo { - private: - int entidx; - Entries entries; - Intervall interval; - void buildKeys(bool); - void buildEntries(bool); - void makeKey(const string &, KeyInfo, bool isPatternString); - void processRegion(int start, int region_end); /* remove {} parts */ - void removeHead(KeyInfo&, int count=0); +private: + int entidx_; + Entries entries_; + Intervall interval_; + void buildKeys(bool); + void buildEntries(bool); + void makeKey(const string &, KeyInfo, bool isPatternString); + void processRegion(int start, int region_end); /* remove {} parts */ + void removeHead(KeyInfo const &, int count=0); - public: - LatexInfo(string par, bool isPatternString) : entidx(-1), interval(isPatternString) { - interval.par = par; - interval.hasTitle = false; - interval.titleValue = ""; - buildKeys(isPatternString); - entries = vector(); - buildEntries(isPatternString); - }; - int getFirstKey() { - entidx = 0; - if (entries.empty()) { - return (-1); - } - if (entries[0].keytype == KeyInfo::isTitle) { - if (! entries[0].disabled) { - interval.hasTitle = true; - interval.titleValue = entries[0].head; - } - else { - interval.hasTitle = false; - interval.titleValue = ""; - } - removeHead(entries[0]); - if (entries.size() > 1) - return (1); - else - return (-1); - } - return 0; - }; - int getNextKey() { - entidx++; - if (int(entries.size()) > entidx) { - return entidx; - } - else { - return (-1); - } - }; - bool setNextKey(int idx) { - if ((idx == entidx) && (entidx >= 0)) { - entidx--; - return true; - } - else - return false; - }; - int find(int start, KeyInfo::KeyType keytype) { - if (start < 0) - return (-1); - int tmpIdx = start; - while (tmpIdx < int(entries.size())) { - if (entries[tmpIdx].keytype == keytype) - return tmpIdx; - tmpIdx++; - } - return(-1); - }; - int process(ostringstream &os, KeyInfo &actual); - int dispatch(ostringstream &os, int previousStart, KeyInfo &actual); - // string show(int lastpos) { return interval.show(lastpos);}; - int nextNotIgnored(int start) { return interval.nextNotIgnored(start);}; - KeyInfo &getKeyInfo(int keyinfo) { - static KeyInfo invalidInfo = KeyInfo(); - if ((keyinfo < 0) || ( keyinfo >= int(entries.size()))) - return invalidInfo; - else - return entries[keyinfo]; - }; - void setForDefaultLang(KeyInfo &defLang) {interval.setForDefaultLang(defLang);}; - void addIntervall(int low, int up) { interval.addIntervall(low, up); }; +public: + LatexInfo(string const & par, bool isPatternString) + : entidx_(-1), interval_(isPatternString, par) + { + buildKeys(isPatternString); + entries_ = vector(); + buildEntries(isPatternString); + } + int getFirstKey() { + entidx_ = 0; + if (entries_.empty()) { + return -1; + } + if (entries_[0].keytype == KeyInfo::isTitle) { + interval_.hasTitle = true; + if (! entries_[0].disabled) { + interval_.titleValue = entries_[0].head; + } + else { + interval_.titleValue = ""; + } + removeHead(entries_[0]); + if (entries_.size() > 1) + return 1; + else + return -1; + } + return 0; + } + int getNextKey() { + entidx_++; + if (int(entries_.size()) > entidx_) { + return entidx_; + } + else { + return -1; + } + } + bool setNextKey(int idx) { + if ((idx == entidx_) && (entidx_ >= 0)) { + entidx_--; + return true; + } + else + return false; + } + int find(int start, KeyInfo::KeyType keytype) const { + if (start < 0) + return -1; + int tmpIdx = start; + while (tmpIdx < int(entries_.size())) { + if (entries_[tmpIdx].keytype == keytype) + return tmpIdx; + tmpIdx++; + } + return -1; + } + int process(ostringstream & os, KeyInfo const & actual); + int dispatch(ostringstream & os, int previousStart, KeyInfo & actual); + // string show(int lastpos) { return interval.show(lastpos);} + int nextNotIgnored(int start) { return interval_.nextNotIgnored(start);} + KeyInfo &getKeyInfo(int keyinfo) { + static KeyInfo invalidInfo = KeyInfo(); + if ((keyinfo < 0) || ( keyinfo >= int(entries_.size()))) + return invalidInfo; + else + return entries_[keyinfo]; + } + void setForDefaultLang(KeyInfo const & defLang) {interval_.setForDefaultLang(defLang);} + void addIntervall(int low, int up) { interval_.addIntervall(low, up); } }; int Intervall::findclosing(int start, int end, char up = '{', char down = '}', int repeat = 1) { - int skip = 0; - int depth = 0; - repeat--; - for (int i = start; i < end; i += 1 + skip) { - char c; - c = par[i]; - skip = 0; - if (c == '\\') skip = 1; - else if (c == up) { - depth++; - } - else if (c == down) { - if (depth == 0) { - if ((repeat <= 0) || (par[i+1] != up)) - return i; - } - --depth; - } - } - return end; + int skip = 0; + int depth = 0; + for (int i = start; i < end; i += 1 + skip) { + char c; + c = par[i]; + skip = 0; + if (c == '\\') skip = 1; + else if (c == up) { + depth++; + } + else if (c == down) { + if (depth == 0) { + repeat--; + if ((repeat <= 0) || (par[i+1] != up)) + return i; + } + --depth; + } + } + return end; } +void Intervall::removeInvalidClosings(void) +{ + // this can happen, if there are deleted parts + int skip = 0; + int depth = 0; + for (unsigned i = 0; i < par.size(); i += 1 + skip) { + char c = par[i]; + skip = 0; + if (c == '\\') skip = 1; + else if (c == '{') + depth++; + else if (c == '}') { + if (depth == 0) { + addIntervall(i, i+1); + LYXERR(Debug::FINDVERBOSE, "removed invalid closing '}' at " << i); + } + else + --depth; + } + } +} class MathInfo { - class MathEntry { - public: - string wait; - size_t mathEnd; - size_t mathStart; - size_t mathSize; - }; - size_t actualIdx; - vector entries; - public: - MathInfo() { - actualIdx = 0; - } - void insert(string wait, size_t start, size_t end) { - MathEntry m = MathEntry(); - m.wait = wait; - m.mathStart = start; - m.mathEnd = end; - m.mathSize = end - start; - entries.push_back(m); - } - bool empty() { return entries.empty(); }; - size_t getEndPos() { - if (entries.empty() || (actualIdx >= entries.size())) { - return 0; - } - return entries[actualIdx].mathEnd; - } - size_t getStartPos() { - if (entries.empty() || (actualIdx >= entries.size())) { - return 100000; /* definitely enough? */ - } - return entries[actualIdx].mathStart; - } - size_t getFirstPos() { - actualIdx = 0; - return getStartPos(); - } - size_t getSize() { - if (entries.empty() || (actualIdx >= entries.size())) { - return size_t(0); - } - return entries[actualIdx].mathSize; - } - void incrEntry() { actualIdx++; }; + class MathEntry { + public: + string wait; + size_t mathEnd; + size_t mathpostfixsize; + size_t mathStart; + size_t mathprefixsize; + size_t mathSize; + }; + size_t actualIdx_; + vector entries_; +public: + MathInfo() { + actualIdx_ = 0; + } + void insert(string const & wait, size_t start, size_t prefixsize, size_t end, size_t postfixsize) { + MathEntry m = MathEntry(); + m.wait = wait; + m.mathStart = start; + m.mathprefixsize = prefixsize; + m.mathEnd = end + postfixsize; + m.mathpostfixsize = postfixsize; + m.mathSize = m.mathEnd - m.mathStart; + entries_.push_back(m); + } + bool empty() const { return entries_.empty(); } + size_t getEndPos() const { + if (entries_.empty() || (actualIdx_ >= entries_.size())) { + return 0; + } + return entries_[actualIdx_].mathEnd; + } + size_t getStartPos() const { + if (entries_.empty() || (actualIdx_ >= entries_.size())) { + return 100000; /* definitely enough? */ + } + return entries_[actualIdx_].mathStart; + } + size_t getPrefixSize() const { + if (entries_.empty() || (actualIdx_ >= entries_.size())) { + return 0; + } + return entries_[actualIdx_].mathprefixsize; + } + size_t getPostfixSize() const { + if (entries_.empty() || (actualIdx_ >= entries_.size())) { + return 0; + } + return entries_[actualIdx_].mathpostfixsize; + } + size_t getFirstPos() { + actualIdx_ = 0; + return getStartPos(); + } + size_t getSize() const { + if (entries_.empty() || (actualIdx_ >= entries_.size())) { + return size_t(0); + } + return entries_[actualIdx_].mathSize; + } + void incrEntry() { actualIdx_++; } }; void LatexInfo::buildEntries(bool isPatternString) { - static regex const rmath("\\$|\\\\\\[|\\\\\\]|\\\\(begin|end)\\{((eqnarray|equation|flalign|gather|multline|align|alignat)\\*?)\\}"); - static regex const rkeys("\\$|\\\\\\[|\\\\\\]|\\\\((([a-zA-Z]+\\*?)(\\{([a-z]+\\*?)\\}|=[0-9]+[a-z]+)?))"); - static bool disableLanguageOverride = false; - smatch sub, submath; - bool evaluatingRegexp = false; - MathInfo mi; - bool evaluatingMath = false; - bool evaluatingCode = false; - size_t codeEnd = 0; - bool evaluatingOptional = false; - size_t optionalEnd = 0; - int codeStart = -1; - KeyInfo found; - bool math_end_waiting = false; - size_t math_pos = 10000; - string math_end; - - for (sregex_iterator itmath(interval.par.begin(), interval.par.end(), rmath), end; itmath != end; ++itmath) { - submath = *itmath; - if (math_end_waiting) { - size_t pos = submath.position(size_t(0)); - if ((math_end == "$") && - (submath.str(0) == "$") && - (interval.par[pos-1] != '\\')) { - mi.insert("$", math_pos, pos + 1); - math_end_waiting = false; - } - else if ((math_end == "\\]") && - (submath.str(0) == "\\]")) { - mi.insert("\\]", math_pos, pos + 2); - math_end_waiting = false; - } - else if ((submath.str(1).compare("end") == 0) && - (submath.str(2).compare(math_end) == 0)) { - mi.insert(math_end, math_pos, pos + submath.str(0).length()); - math_end_waiting = false; - } - else - continue; - } - else { - if (submath.str(1).compare("begin") == 0) { - math_end_waiting = true; - math_end = submath.str(2); - math_pos = submath.position(size_t(0)); - } - else if (submath.str(0).compare("\\[") == 0) { - math_end_waiting = true; - math_end = "\\]"; - math_pos = submath.position(size_t(0)); - } - else if (submath.str(0) == "$") { - size_t pos = submath.position(size_t(0)); - if ((pos == 0) || (interval.par[pos-1] != '\\')) { - math_end_waiting = true; - math_end = "$"; - math_pos = pos; - } - } - } - } - // Ignore language if there is math somewhere in pattern-string - if (isPatternString) { - if (! mi.empty()) { - // Disable language - keys["foreignlanguage"].disabled = true; - disableLanguageOverride = true; - } - else - disableLanguageOverride = false; - } - else { - if (disableLanguageOverride) { - keys["foreignlanguage"].disabled = true; - } - } - math_pos = mi.getFirstPos(); - for (sregex_iterator it(interval.par.begin(), interval.par.end(), rkeys), end; it != end; ++it) { - sub = *it; - string key = sub.str(3); - if (key == "") { - if (sub.str(0)[0] == '\\') - key = sub.str(0)[1]; - else { - key = sub.str(0); - if (key == "$") { - size_t k_pos = sub.position(size_t(0)); - if ((k_pos > 0) && (interval.par[k_pos - 1] == '\\')) { - // Escaped '$', ignoring - continue; - } - } - } - }; - if (evaluatingRegexp) { - if (sub.str(1).compare("endregexp") == 0) { - evaluatingRegexp = false; - // found._tokenstart already set - found._dataEnd = sub.position(size_t(0)) + 13; - found._dataStart = found._dataEnd; - found._tokensize = found._dataEnd - found._tokenstart; - found.parenthesiscount = 0; - found.head = interval.par.substr(found._tokenstart, found._tokensize); - } - else - continue; - } - else { - if (evaluatingMath) { - if (size_t(sub.position(size_t(0))) < mi.getEndPos()) - continue; - evaluatingMath = false; - mi.incrEntry(); - math_pos = mi.getStartPos(); - } - if (keys.find(key) == keys.end()) { - found = KeyInfo(KeyInfo::isStandard, 0, true); - if (isPatternString) { - found.keytype = KeyInfo::isChar; - found.disabled = false; - found.used = true; - } - keys[key] = found; - } - else - found = keys[key]; - if (key.compare("regexp") == 0) { - evaluatingRegexp = true; - found._tokenstart = sub.position(size_t(0)); - found._tokensize = 0; - continue; - } - } - // Handle the other params of key - if (found.keytype == KeyInfo::isIgnored) - continue; - else if (found.keytype == KeyInfo::isMath) { - if (size_t(sub.position(size_t(0))) == math_pos) { - found = keys[key]; - found._tokenstart = sub.position(size_t(0)); - found._tokensize = mi.getSize(); - found._dataEnd = found._tokenstart + found._tokensize; - found._dataStart = found._dataEnd; - found.parenthesiscount = 0; - found.head = interval.par.substr(found._tokenstart, found._tokensize); - evaluatingMath = true; - } - else { - // begin|end of unknown env, discard - // First handle tables - // longtable|tabular - bool discardComment; - found = keys[key]; - found.keytype = KeyInfo::doRemove; - if ((sub.str(5).compare("longtable") == 0) || - (sub.str(5).compare("tabular") == 0)) { - discardComment = true; /* '%' */ - } - else { - discardComment = false; - static regex const removeArgs("^(multicols|multipar|sectionbox|subsectionbox|tcolorbox)$"); - smatch sub2; - string token = sub.str(5); - if (regex_match(token, sub2, removeArgs)) { - found.keytype = KeyInfo::removeWithArg; - } - } - // discard spaces before pos(0) - int pos = sub.position(size_t(0)); - int count; - for (count = 0; pos - count > 0; count++) { - char c = interval.par[pos-count-1]; - if (discardComment) { - if ((c != ' ') && (c != '%')) - break; - } - else if (c != ' ') - break; - } - found._tokenstart = pos - count; - if (sub.str(1).compare(0, 5, "begin") == 0) { - size_t pos1 = pos + sub.str(0).length(); - if (sub.str(5).compare("cjk") == 0) { - pos1 = interval.findclosing(pos1+1, interval.par.length()) + 1; - if ((interval.par[pos1] == '{') && (interval.par[pos1+1] == '}')) - pos1 += 2; - found.keytype = KeyInfo::isMain; - found._dataStart = pos1; - found._dataEnd = interval.par.length(); - found.disabled = keys["foreignlanguage"].disabled; - found.used = keys["foreignlanguage"].used; - found._tokensize = pos1 - found._tokenstart; - found.head = interval.par.substr(found._tokenstart, found._tokensize); - } - else { - // Swallow possible optional params - while (interval.par[pos1] == '[') { - pos1 = interval.findclosing(pos1+1, interval.par.length(), '[', ']')+1; - } - // Swallow also the eventual parameter - if (interval.par[pos1] == '{') { - found._dataEnd = interval.findclosing(pos1+1, interval.par.length()) + 1; - } - else { - found._dataEnd = pos1; - } - found._dataStart = found._dataEnd; - found._tokensize = count + found._dataEnd - pos; - found.parenthesiscount = 0; - found.head = interval.par.substr(found._tokenstart, found._tokensize); - found.disabled = true; - } - } - else { - // Handle "\end{...}" - found._dataStart = pos + sub.str(0).length(); - found._dataEnd = found._dataStart; - found._tokensize = count + found._dataEnd - pos; - found.parenthesiscount = 0; - found.head = interval.par.substr(found._tokenstart, found._tokensize); - found.disabled = true; - } - } - } - else if (found.keytype != KeyInfo::isRegex) { - found._tokenstart = sub.position(size_t(0)); - if (found.parenthesiscount == 0) { - // Probably to be discarded - size_t following_pos = sub.position(size_t(0)) + sub.str(3).length() + 1; - char following = interval.par[following_pos]; - if (following == ' ') - found.head = "\\" + sub.str(3) + " "; - else if (following == '=') { - // like \uldepth=1000pt - found.head = sub.str(0); - } - else - found.head = "\\" + key; - found._tokensize = found.head.length(); - found._dataEnd = found._tokenstart + found._tokensize; - found._dataStart = found._dataEnd; - } - else { - int params = found._tokenstart + key.length() + 1; - if (evaluatingOptional) { - if (size_t(found._tokenstart) > optionalEnd) { - evaluatingOptional = false; - } - else { - found.disabled = true; - } - } - int optend = params; - while (interval.par[optend] == '[') { - // discard optional parameters - optend = interval.findclosing(optend+1, interval.par.length(), '[', ']') + 1; - } - if (optend > params) { - key += interval.par.substr(params, optend-params); - evaluatingOptional = true; - optionalEnd = optend; - } - string token = sub.str(5); - int closings = found.parenthesiscount; - if (found.parenthesiscount == 1) { - found.head = "\\" + key + "{"; - } - else if (found.parenthesiscount > 1) { - if (token != "") { - found.head = sub.str(0) + "{"; - closings = found.parenthesiscount - 1; - } - else { - found.head = "\\" + key + "{"; - } - } - found._tokensize = found.head.length(); - found._dataStart = found._tokenstart + found.head.length(); - if (interval.par.substr(found._dataStart-1, 15).compare("\\endarguments{}") == 0) { - found._dataStart += 15; - } - size_t endpos = interval.findclosing(found._dataStart, interval.par.length(), '{', '}', closings); - if (found.keytype == KeyInfo::isList) { - // Check if it really is list env - static regex const listre("^([a-z]+)$"); - smatch sub2; - if (!regex_match(token, sub2, listre)) { - // Change the key of this entry. It is not in a list/item environment - found.keytype = KeyInfo::endArguments; - } - } - if (found.keytype == KeyInfo::noMain) { - evaluatingCode = true; - codeEnd = endpos; - codeStart = found._dataStart; - } - else if (evaluatingCode) { - if (size_t(found._dataStart) > codeEnd) - evaluatingCode = false; - else if (found.keytype == KeyInfo::isMain) { - // Disable this key, treate it as standard - found.keytype = KeyInfo::isStandard; - found.disabled = true; - if ((codeEnd == interval.par.length()) && - (found._tokenstart == codeStart)) { - // trickery, because the code inset starts - // with \selectlanguage ... - codeEnd = endpos; - if (entries.size() > 1) { - entries[entries.size()-1]._dataEnd = codeEnd; - } - } - } - } - if ((endpos == interval.par.length()) && - (found.keytype == KeyInfo::doRemove)) { - // Missing closing => error in latex-input? - // therefore do not delete remaining data - found._dataStart -= 1; - found._dataEnd = found._dataStart; - } - else - found._dataEnd = endpos; - } - if (isPatternString) { - keys[key].used = true; - } - } - entries.push_back(found); - } + static regex const rmath("(\\\\)*(\\$|\\\\\\[|\\\\\\]|\\\\(begin|end)\\{((eqnarray|equation|flalign|gather|multline|align|x?x?alignat)\\*?\\})(\\{[0-9]+\\})?)"); + static regex const rkeys("(\\\\)*(\\$|\\\\\\[|\\\\\\]|\\\\((([a-zA-Z]+\\*?)(\\{([a-z]+\\*?)\\}|=[0-9]+[a-z]+)?)))"); + static bool disableLanguageOverride = false; + smatch sub, submath; + bool evaluatingRegexp = false; + MathInfo mi; + bool evaluatingMath = false; + bool evaluatingCode = false; + size_t codeEnd = 0; + bool evaluatingOptional = false; + size_t optionalEnd = 0; + int codeStart = -1; + KeyInfo found; + bool math_end_waiting = false; + size_t math_pos = 10000; + size_t math_prefix_size = 1; + string math_end; + static vector usedText = vector(); + static bool removeMathHull = false; + + interval_.removeAccents(); + interval_.removeInvalidClosings(); + + for (sregex_iterator itmath(interval_.par.begin(), interval_.par.end(), rmath), end; itmath != end; ++itmath) { + submath = *itmath; + if ((submath.position(2) - submath.position(0)) %2 == 1) { + // prefixed by odd count of '\\' + continue; + } + if (math_end_waiting) { + size_t pos = submath.position(size_t(2)); + if ((math_end == "$") && + (submath.str(2) == "$")) { + mi.insert("$", math_pos, 1, pos, 1); + math_end_waiting = false; + } + else if ((math_end == "\\]") && + (submath.str(2) == "\\]")) { + mi.insert("\\]", math_pos, 2, pos, 2); + math_end_waiting = false; + } + else if ((submath.str(3).compare("end") == 0) && + (submath.str(5).compare(math_end) == 0)) { + mi.insert(math_end, math_pos, math_prefix_size, pos, submath.str(2).length()); + math_end_waiting = false; + } + else + continue; + } + else { + if (submath.str(3).compare("begin") == 0) { + math_end_waiting = true; + math_end = submath.str(5); + math_pos = submath.position(size_t(2)); + math_prefix_size = submath.str(2).length(); + } + else if (submath.str(2).compare("\\[") == 0) { + math_end_waiting = true; + math_end = "\\]"; + math_pos = submath.position(size_t(2)); + } + else if (submath.str(2) == "$") { + size_t pos = submath.position(size_t(2)); + math_end_waiting = true; + math_end = "$"; + math_pos = pos; + } + } + } + // Ignore language if there is math somewhere in pattern-string + if (isPatternString) { + for (auto const & s: usedText) { + // Remove entries created in previous search runs + keys.erase(s); + } + usedText = vector(); + if (! mi.empty()) { + // Disable language + keys["foreignlanguage"].disabled = true; + disableLanguageOverride = true; + removeMathHull = false; + } + else { + removeMathHull = true; // used later if not isPatternString + disableLanguageOverride = false; + } + } + else { + if (disableLanguageOverride) { + keys["foreignlanguage"].disabled = true; + } + } + math_pos = mi.getFirstPos(); + for (sregex_iterator it(interval_.par.begin(), interval_.par.end(), rkeys), end; it != end; ++it) { + sub = *it; + if ((sub.position(2) - sub.position(0)) %2 == 1) { + // prefixed by odd count of '\\' + continue; + } + string key = sub.str(5); + if (key == "") { + if (sub.str(2)[0] == '\\') + key = sub.str(2)[1]; + else { + key = sub.str(2); + } + } + KeysIterator it_key = keys.find(key); + if (it_key != keys.end()) { + if (it_key->second.keytype == KeyInfo::headRemove) { + KeyInfo found1 = it_key->second; + found1.disabled = true; + found1.head = "\\" + key + "{"; + found1._tokenstart = sub.position(size_t(2)); + found1._tokensize = found1.head.length(); + found1._dataStart = found1._tokenstart + found1.head.length(); + int endpos = interval_.findclosing(found1._dataStart, interval_.par.length(), '{', '}', 1); + found1._dataEnd = endpos; + removeHead(found1); + continue; + } + } + if (evaluatingRegexp) { + if (sub.str(3).compare("endregexp") == 0) { + evaluatingRegexp = false; + // found._tokenstart already set + found._dataEnd = sub.position(size_t(2)) + 13; + found._dataStart = found._dataEnd; + found._tokensize = found._dataEnd - found._tokenstart; + found.parenthesiscount = 0; + found.head = interval_.par.substr(found._tokenstart, found._tokensize); + } + else { + continue; + } + } + else { + if (evaluatingMath) { + if (size_t(sub.position(size_t(2))) < mi.getEndPos()) + continue; + evaluatingMath = false; + mi.incrEntry(); + math_pos = mi.getStartPos(); + } + if (it_key == keys.end()) { + found = KeyInfo(KeyInfo::isStandard, 0, true); + LYXERR(Debug::INFO, "Undefined key " << key << " ==> will be used as text"); + found = KeyInfo(KeyInfo::isText, 0, false); + if (isPatternString) { + found.keytype = KeyInfo::isChar; + found.disabled = false; + found.used = true; + } + keys[key] = found; + usedText.push_back(key); + } + else + found = keys[key]; + if (key.compare("regexp") == 0) { + evaluatingRegexp = true; + found._tokenstart = sub.position(size_t(2)); + found._tokensize = 0; + continue; + } + } + // Handle the other params of key + if (found.keytype == KeyInfo::isIgnored) + continue; + else if (found.keytype == KeyInfo::isMath) { + if (size_t(sub.position(size_t(2))) == math_pos) { + found = keys[key]; + found._tokenstart = sub.position(size_t(2)); + found._tokensize = mi.getSize(); + found._dataEnd = found._tokenstart + found._tokensize; + found._dataStart = found._dataEnd; + found.parenthesiscount = 0; + found.head = interval_.par.substr(found._tokenstart, found._tokensize); + if (removeMathHull) { + interval_.addIntervall(found._tokenstart, found._tokenstart + mi.getPrefixSize()); + interval_.addIntervall(found._dataEnd - mi.getPostfixSize(), found._dataEnd); + } + else { + // Treate all math constructs as simple math + interval_.par[found._tokenstart] = '$'; + interval_.par[found._dataEnd - mi.getPostfixSize()] = '$'; + interval_.addIntervall(found._tokenstart + 1, found._tokenstart + mi.getPrefixSize()); + interval_.addIntervall(found._dataEnd - mi.getPostfixSize() + 1, found._dataEnd); + } + evaluatingMath = true; + } + else { + // begin|end of unknown env, discard + // First handle tables + // longtable|tabular + bool discardComment; + found = keys[key]; + found.keytype = KeyInfo::doRemove; + if ((sub.str(7).compare("longtable") == 0) || + (sub.str(7).compare("tabular") == 0)) { + discardComment = true; /* '%' */ + } + else { + discardComment = false; + static regex const removeArgs("^(multicols|multipar|sectionbox|subsectionbox|tcolorbox)$"); + smatch sub2; + string token = sub.str(7); + if (regex_match(token, sub2, removeArgs)) { + found.keytype = KeyInfo::removeWithArg; + } + } + // discard spaces before pos(2) + int pos = sub.position(size_t(2)); + int count; + for (count = 0; pos - count > 0; count++) { + char c = interval_.par[pos-count-1]; + if (discardComment) { + if ((c != ' ') && (c != '%')) + break; + } + else if (c != ' ') + break; + } + found._tokenstart = pos - count; + if (sub.str(3).compare(0, 5, "begin") == 0) { + size_t pos1 = pos + sub.str(2).length(); + if (sub.str(7).compare("cjk") == 0) { + pos1 = interval_.findclosing(pos1+1, interval_.par.length()) + 1; + if ((interval_.par[pos1] == '{') && (interval_.par[pos1+1] == '}')) + pos1 += 2; + found.keytype = KeyInfo::isMain; + found._dataStart = pos1; + found._dataEnd = interval_.par.length(); + found.disabled = keys["foreignlanguage"].disabled; + found.used = keys["foreignlanguage"].used; + found._tokensize = pos1 - found._tokenstart; + found.head = interval_.par.substr(found._tokenstart, found._tokensize); + } + else { + // Swallow possible optional params + while (interval_.par[pos1] == '[') { + pos1 = interval_.findclosing(pos1+1, interval_.par.length(), '[', ']')+1; + } + // Swallow also the eventual parameter + if (interval_.par[pos1] == '{') { + found._dataEnd = interval_.findclosing(pos1+1, interval_.par.length()) + 1; + } + else { + found._dataEnd = pos1; + } + found._dataStart = found._dataEnd; + found._tokensize = count + found._dataEnd - pos; + found.parenthesiscount = 0; + found.head = interval_.par.substr(found._tokenstart, found._tokensize); + found.disabled = true; + } + } + else { + // Handle "\end{...}" + found._dataStart = pos + sub.str(2).length(); + found._dataEnd = found._dataStart; + found._tokensize = count + found._dataEnd - pos; + found.parenthesiscount = 0; + found.head = interval_.par.substr(found._tokenstart, found._tokensize); + found.disabled = true; + } + } + } + else if (found.keytype != KeyInfo::isRegex) { + found._tokenstart = sub.position(size_t(2)); + if (found.parenthesiscount == 0) { + // Probably to be discarded + size_t following_pos = sub.position(size_t(2)) + sub.str(5).length() + 1; + char following = interval_.par[following_pos]; + if (following == ' ') + found.head = "\\" + sub.str(5) + " "; + else if (following == '=') { + // like \uldepth=1000pt + found.head = sub.str(2); + } + else + found.head = "\\" + key; + found._tokensize = found.head.length(); + found._dataEnd = found._tokenstart + found._tokensize; + found._dataStart = found._dataEnd; + } + else { + int params = found._tokenstart + key.length() + 1; + if (evaluatingOptional) { + if (size_t(found._tokenstart) > optionalEnd) { + evaluatingOptional = false; + } + else { + found.disabled = true; + } + } + int optend = params; + while (interval_.par[optend] == '[') { + // discard optional parameters + optend = interval_.findclosing(optend+1, interval_.par.length(), '[', ']') + 1; + } + if (optend > params) { + key += interval_.par.substr(params, optend-params); + evaluatingOptional = true; + optionalEnd = optend; + if (found.keytype == KeyInfo::isSectioning) { + // Remove optional values (but still keep in header) + interval_.addIntervall(params, optend); + } + } + string token = sub.str(7); + int closings; + if (interval_.par[optend] != '{') { + closings = 0; + found.parenthesiscount = 0; + found.head = "\\" + key; + } + else + closings = found.parenthesiscount; + if (found.parenthesiscount == 1) { + found.head = "\\" + key + "{"; + } + else if (found.parenthesiscount > 1) { + if (token != "") { + found.head = sub.str(2) + "{"; + closings = found.parenthesiscount - 1; + } + else { + found.head = "\\" + key + "{"; + } + } + found._tokensize = found.head.length(); + found._dataStart = found._tokenstart + found.head.length(); + if (found.keytype == KeyInfo::doRemove) { + if (closings > 0) { + size_t endpar = 2 + interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings); + if (endpar >= interval_.par.length()) + found._dataStart = interval_.par.length(); + else + found._dataStart = endpar; + found._tokensize = found._dataStart - found._tokenstart; + } + else { + found._dataStart = found._tokenstart + found._tokensize; + } + closings = 0; + } + if (interval_.par.substr(found._dataStart, 15).compare("\\endarguments{}") == 0) { + found._dataStart += 15; + } + size_t endpos; + if (closings < 1) + endpos = found._dataStart - 1; + else + endpos = interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings); + if (found.keytype == KeyInfo::isList) { + // Check if it really is list env + static regex const listre("^([a-z]+)$"); + smatch sub2; + if (!regex_match(token, sub2, listre)) { + // Change the key of this entry. It is not in a list/item environment + found.keytype = KeyInfo::endArguments; + } + } + if (found.keytype == KeyInfo::noMain) { + evaluatingCode = true; + codeEnd = endpos; + codeStart = found._dataStart; + } + else if (evaluatingCode) { + if (size_t(found._dataStart) > codeEnd) + evaluatingCode = false; + else if (found.keytype == KeyInfo::isMain) { + // Disable this key, treate it as standard + found.keytype = KeyInfo::isStandard; + found.disabled = true; + if ((codeEnd +1 >= interval_.par.length()) && + (found._tokenstart == codeStart)) { + // trickery, because the code inset starts + // with \selectlanguage ... + codeEnd = endpos; + if (entries_.size() > 1) { + entries_[entries_.size()-1]._dataEnd = codeEnd; + } + } + } + } + if ((endpos == interval_.par.length()) && + (found.keytype == KeyInfo::doRemove)) { + // Missing closing => error in latex-input? + // therefore do not delete remaining data + found._dataStart -= 1; + found._dataEnd = found._dataStart; + } + else + found._dataEnd = endpos; + } + if (isPatternString) { + keys[key].used = true; + } + } + entries_.push_back(found); + } } void LatexInfo::makeKey(const string &keysstring, KeyInfo keyI, bool isPatternString) { - stringstream s(keysstring); - string key; - const char delim = '|'; - while (getline(s, key, delim)) { - KeyInfo keyII(keyI); - if (isPatternString) { - keyII.used = false; - } - else if ( !keys[key].used) - keyII.disabled = true; - keys[key] = keyII; - } + stringstream s(keysstring); + string key; + const char delim = '|'; + while (getline(s, key, delim)) { + KeyInfo keyII(keyI); + if (isPatternString) { + keyII.used = false; + } + else if ( !keys[key].used) + keyII.disabled = true; + keys[key] = keyII; + } } void LatexInfo::buildKeys(bool isPatternString) { - static bool keysBuilt = false; - if (keysBuilt && !isPatternString) return; - - // Known standard keys with 1 parameter. - // Split is done, if not at start of region - makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString); - makeKey("textbf", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getSeries()), isPatternString); - makeKey("textit|textsc|textsl", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getShape()), isPatternString); - makeKey("uuline|uline|uwave", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getUnderline()), isPatternString); - makeKey("emph|noun", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getMarkUp()), isPatternString); - makeKey("sout|xout", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getStrikeOut()), isPatternString); - - makeKey("section|subsection|subsubsection|paragraph|subparagraph|minisec", - KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString); - makeKey("section*|subsection*|subsubsection*|paragraph*", - KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString); - makeKey("part|part*|chapter|chapter*", KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString); - makeKey("title|subtitle|author|subject|publishers|dedication|uppertitleback|lowertitleback|extratitle|lyxaddress|lyxrightaddress", KeyInfo(KeyInfo::isTitle, 1, ignoreFormats.getFrontMatter()), isPatternString); - // Regex - makeKey("regexp", KeyInfo(KeyInfo::isRegex, 1, false), isPatternString); - - // Split is done, if not at start of region - makeKey("textcolor", KeyInfo(KeyInfo::isStandard, 2, ignoreFormats.getColor()), isPatternString); - makeKey("latexenvironment", KeyInfo(KeyInfo::isStandard, 2, false), isPatternString); - - // Split is done always. - makeKey("foreignlanguage", KeyInfo(KeyInfo::isMain, 2, ignoreFormats.getLanguage()), isPatternString); - - // Known charaters - // No split - makeKey("backslash|textbackslash|slash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); - makeKey("textasciicircum|textasciitilde", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); - makeKey("textasciiacute|texemdash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); - makeKey("dots|ldots", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); - // Spaces - makeKey("quad|qquad|hfill|dotfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); - makeKey("textvisiblespace|nobreakspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); - makeKey("negthickspace|negmedspace|negthinspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); - // Skip - // makeKey("enskip|smallskip|medskip|bigskip|vfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); - // Custom space/skip, remove the content (== length value) - makeKey("vspace|vspace*|hspace|hspace*|mspace", KeyInfo(KeyInfo::noContent, 1, false), isPatternString); - // Found in fr/UserGuide.lyx - makeKey("og|fg", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); - // quotes - makeKey("textquotedbl|quotesinglbase|lyxarrow", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); - makeKey("textquotedblleft|textquotedblright", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); - // Known macros to remove (including their parameter) - // No split - makeKey("input|inputencoding|label|ref|index|bibitem", KeyInfo(KeyInfo::doRemove, 1, false), isPatternString); - makeKey("addtocounter|setlength", KeyInfo(KeyInfo::noContent, 2, true), isPatternString); - // handle like standard keys with 1 parameter. - makeKey("url|href|vref|thanks", KeyInfo(KeyInfo::isStandard, 1, false), isPatternString); - - // Macros to remove, but let the parameter survive - // No split - makeKey("menuitem|textmd|textrm", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); - - // Remove language spec from content of these insets - makeKey("code", KeyInfo(KeyInfo::noMain, 1, false), isPatternString); - - // Same effect as previous, parameter will survive (because there is no one anyway) - // No split - makeKey("noindent|textcompwordmark|maketitle", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString); - // Remove table decorations - makeKey("hline|tabularnewline|toprule|bottomrule|midrule", KeyInfo(KeyInfo::doRemove, 0, true), isPatternString); - // Discard shape-header. - // For footnote or shortcut too, because of lang settings - // and wrong handling if used 'KeyInfo::noMain' - makeKey("circlepar|diamondpar|heartpar|nutpar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); - makeKey("trianglerightpar|hexagonpar|starpar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); - makeKey("triangleuppar|triangledownpar|droppar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); - makeKey("triangleleftpar|shapepar|dropuppar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); - makeKey("hphantom|vphantom|footnote|shortcut|include|includegraphics", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); - makeKey("parbox", KeyInfo(KeyInfo::doRemove, 1, true), isPatternString); - // like ('tiny{}' or '\tiny ' ... ) - makeKey("footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge", KeyInfo(KeyInfo::isSize, 0, false), isPatternString); - - // Survives, like known character - makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); - makeKey("item|listitem", KeyInfo(KeyInfo::isList, 1, false), isPatternString); - - makeKey("begin|end", KeyInfo(KeyInfo::isMath, 1, false), isPatternString); - makeKey("[|]", KeyInfo(KeyInfo::isMath, 1, false), isPatternString); - makeKey("$", KeyInfo(KeyInfo::isMath, 1, false), isPatternString); - - makeKey("par|uldepth|ULdepth|protect|nobreakdash|medskip|relax", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString); - // Remove RTL/LTR marker - makeKey("l|r|textlr|textfr|textar|beginl|endl", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString); - makeKey("lettrine", KeyInfo(KeyInfo::cleanToStart, 0, true), isPatternString); - makeKey("lyxslide", KeyInfo(KeyInfo::isSectioning, 1, true), isPatternString); - makeKey("endarguments", KeyInfo(KeyInfo::endArguments, 0, true), isPatternString); - makeKey("twocolumn", KeyInfo(KeyInfo::removeWithArg, 2, true), isPatternString); - makeKey("tnotetext|ead|fntext|cortext|address", KeyInfo(KeyInfo::removeWithArg, 0, true), isPatternString); - makeKey("lyxend", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString); - if (isPatternString) { - // Allow the first searched string to rebuild the keys too - keysBuilt = false; - } - else { - // no need to rebuild again - keysBuilt = true; - } + static bool keysBuilt = false; + if (keysBuilt && !isPatternString) return; + + // Keys to ignore in any case + makeKey("text|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true); + makeKey("nonumber|notag", KeyInfo(KeyInfo::headRemove, 0, true), true); + // Known standard keys with 1 parameter. + // Split is done, if not at start of region + makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString); + makeKey("textbf", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getSeries()), isPatternString); + makeKey("textit|textsc|textsl", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getShape()), isPatternString); + makeKey("uuline|uline|uwave", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getUnderline()), isPatternString); + makeKey("emph|noun", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getMarkUp()), isPatternString); + makeKey("sout|xout", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getStrikeOut()), isPatternString); + + makeKey("section|subsection|subsubsection|paragraph|subparagraph|minisec", + KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString); + makeKey("section*|subsection*|subsubsection*|paragraph*", + KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString); + makeKey("part|part*|chapter|chapter*", KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString); + makeKey("title|subtitle|author|subject|publishers|dedication|uppertitleback|lowertitleback|extratitle|lyxaddress|lyxrightaddress", KeyInfo(KeyInfo::isTitle, 1, ignoreFormats.getFrontMatter()), isPatternString); + // Regex + makeKey("regexp", KeyInfo(KeyInfo::isRegex, 1, false), isPatternString); + + // Split is done, if not at start of region + makeKey("textcolor", KeyInfo(KeyInfo::isStandard, 2, ignoreFormats.getColor()), isPatternString); + makeKey("latexenvironment", KeyInfo(KeyInfo::isStandard, 2, false), isPatternString); + + // Split is done always. + makeKey("foreignlanguage", KeyInfo(KeyInfo::isMain, 2, ignoreFormats.getLanguage()), isPatternString); + + // Known charaters + // No split + makeKey("backslash|textbackslash|slash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("textasciicircum|textasciitilde", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("textasciiacute|texemdash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("dots|ldots", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + // Spaces + makeKey("quad|qquad|hfill|dotfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("textvisiblespace|nobreakspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("negthickspace|negmedspace|negthinspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("thickspace|medspace|thinspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + // Skip + // makeKey("enskip|smallskip|medskip|bigskip|vfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + // Custom space/skip, remove the content (== length value) + makeKey("vspace|vspace*|hspace|hspace*|mspace", KeyInfo(KeyInfo::noContent, 1, false), isPatternString); + // Found in fr/UserGuide.lyx + makeKey("og|fg", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + // quotes + makeKey("textquotedbl|quotesinglbase|lyxarrow", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("textquotedblleft|textquotedblright", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + // Known macros to remove (including their parameter) + // No split + makeKey("input|inputencoding|label|ref|index|bibitem", KeyInfo(KeyInfo::doRemove, 1, false), isPatternString); + makeKey("addtocounter|setlength", KeyInfo(KeyInfo::noContent, 2, true), isPatternString); + // handle like standard keys with 1 parameter. + makeKey("url|href|vref|thanks", KeyInfo(KeyInfo::isStandard, 1, false), isPatternString); + + if (ignoreFormats.getDeleted()) { + // Ignore deleted text + makeKey("lyxdeleted", KeyInfo(KeyInfo::doRemove, 3, false), isPatternString); + } + else { + // but preserve added text + makeKey("lyxdeleted", KeyInfo(KeyInfo::doRemove, 2, false), isPatternString); + } + makeKey("lyxadded", KeyInfo(KeyInfo::doRemove, 2, false), isPatternString); + + // Macros to remove, but let the parameter survive + // No split + makeKey("menuitem|textmd|textrm", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); + + // Remove language spec from content of these insets + makeKey("code", KeyInfo(KeyInfo::noMain, 1, false), isPatternString); + + // Same effect as previous, parameter will survive (because there is no one anyway) + // No split + makeKey("noindent|textcompwordmark|maketitle", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString); + // Remove table decorations + makeKey("hline|tabularnewline|toprule|bottomrule|midrule", KeyInfo(KeyInfo::doRemove, 0, true), isPatternString); + // Discard shape-header. + // For footnote or shortcut too, because of lang settings + // and wrong handling if used 'KeyInfo::noMain' + makeKey("circlepar|diamondpar|heartpar|nutpar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); + makeKey("trianglerightpar|hexagonpar|starpar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); + makeKey("triangleuppar|triangledownpar|droppar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); + makeKey("triangleleftpar|shapepar|dropuppar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); + makeKey("hphantom|vphantom|note|footnote|shortcut|include|includegraphics", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); + makeKey("textgreek|textcyrillic", KeyInfo(KeyInfo::isStandard, 1, true), false); + makeKey("parbox", KeyInfo(KeyInfo::doRemove, 1, true), isPatternString); + // like ('tiny{}' or '\tiny ' ... ) + makeKey("footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge", KeyInfo(KeyInfo::isSize, 0, ignoreFormats.getSize()), isPatternString); + + // Survives, like known character + // makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("tableofcontents", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("item|listitem", KeyInfo(KeyInfo::isList, 1, false), isPatternString); + + makeKey("begin|end", KeyInfo(KeyInfo::isMath, 1, false), isPatternString); + makeKey("[|]", KeyInfo(KeyInfo::isMath, 1, false), isPatternString); + makeKey("$", KeyInfo(KeyInfo::isMath, 1, false), isPatternString); + + makeKey("par|uldepth|ULdepth|protect|nobreakdash|medskip|relax", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString); + // Remove RTL/LTR marker + makeKey("l|r|textlr|textfr|textar|beginl|endl", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString); + makeKey("lettrine", KeyInfo(KeyInfo::cleanToStart, 0, true), isPatternString); + makeKey("lyxslide", KeyInfo(KeyInfo::isSectioning, 1, true), isPatternString); + makeKey("endarguments", KeyInfo(KeyInfo::endArguments, 0, true), isPatternString); + makeKey("twocolumn", KeyInfo(KeyInfo::removeWithArg, 2, true), isPatternString); + makeKey("tnotetext|ead|fntext|cortext|address", KeyInfo(KeyInfo::removeWithArg, 0, true), isPatternString); + makeKey("lyxend", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString); + if (isPatternString) { + // Allow the first searched string to rebuild the keys too + keysBuilt = false; + } + else { + // no need to rebuild again + keysBuilt = true; + } } /* @@ -1924,514 +2943,557 @@ void LatexInfo::buildKeys(bool isPatternString) */ void Intervall::handleParentheses(int lastpos, bool closingAllowed) { - int skip = 0; - for (int i = depts[actualdeptindex]; i < lastpos; i+= 1 + skip) { - char c; - c = par[i]; - skip = 0; - if (c == '\\') skip = 1; - else if (c == '{') { - handleOpenP(i); - } - else if (c == '}') { - handleCloseP(i, closingAllowed); - } - } + int skip = 0; + for (int i = depts[actualdeptindex]; i < lastpos; i+= 1 + skip) { + char c; + c = par[i]; + skip = 0; + if (c == '\\') skip = 1; + else if (c == '{') { + handleOpenP(i); + } + else if (c == '}') { + handleCloseP(i, closingAllowed); + } + } } #if (0) string Intervall::show(int lastpos) { - int idx = 0; /* int intervalls */ - string s; - int i = 0; - for (idx = 0; idx <= ignoreidx; idx++) { - while (i < lastpos) { - int printsize; - if (i <= borders[idx].low) { - if (borders[idx].low > lastpos) - printsize = lastpos - i; - else - printsize = borders[idx].low - i; - s += par.substr(i, printsize); - i += printsize; - if (i >= borders[idx].low) - i = borders[idx].upper; - } - else { - i = borders[idx].upper; - break; - } - } - } - if (lastpos > i) { - s += par.substr(i, lastpos-i); - } - return (s); + int idx = 0; /* int intervalls */ + string s; + int i = 0; + if ((unsigned) lastpos > par.size()) + lastpos = par.size(); + for (idx = 0; idx <= ignoreidx; idx++) { + while (i < lastpos) { + int printsize; + if (i <= borders[idx].low) { + if (borders[idx].low > lastpos) + printsize = lastpos - i; + else + printsize = borders[idx].low - i; + s += par.substr(i, printsize); + i += printsize; + if (i >= borders[idx].low) + i = borders[idx].upper; + } + else { + i = borders[idx].upper; + break; + } + } + } + if (lastpos > i) { + s += par.substr(i, lastpos-i); + } + return s; } #endif void Intervall::output(ostringstream &os, int lastpos) { - // get number of chars to output - int idx = 0; /* int intervalls */ - int i = 0; - int printed = 0; - string startTitle = titleValue; - for (idx = 0; idx <= ignoreidx; idx++) { - if (i < lastpos) { - if (i <= borders[idx].low) { - int printsize; - if (borders[idx].low > lastpos) - printsize = lastpos - i; - else - printsize = borders[idx].low - i; - if (printsize > 0) { - os << startTitle << par.substr(i, printsize); - i += printsize; - printed += printsize; - startTitle = ""; - } - handleParentheses(i, false); - if (i >= borders[idx].low) - i = borders[idx].upper; - } - else { - i = borders[idx].upper; - } - } - else - break; - } - if (lastpos > i) { - os << startTitle << par.substr(i, lastpos-i); - printed += lastpos-i; - } - handleParentheses(lastpos, false); - for (int i = actualdeptindex; i > 0; --i) { - os << "}"; - } - if (hasTitle && (printed > 0)) - os << "}"; - if (! isPatternString) - os << "\n"; - handleParentheses(lastpos, true); /* extra closings '}' allowed here */ + // get number of chars to output + int idx = 0; /* int intervalls */ + int i = 0; + int printed = 0; + string startTitle = titleValue; + for (idx = 0; idx <= ignoreidx; idx++) { + if (i < lastpos) { + if (i <= borders[idx].low) { + int printsize; + if (borders[idx].low > lastpos) + printsize = lastpos - i; + else + printsize = borders[idx].low - i; + if (printsize > 0) { + os << startTitle << par.substr(i, printsize); + i += printsize; + printed += printsize; + startTitle = ""; + } + handleParentheses(i, false); + if (i >= borders[idx].low) + i = borders[idx].upper; + } + else { + i = borders[idx].upper; + } + } + else + break; + } + if (lastpos > i) { + os << startTitle << par.substr(i, lastpos-i); + printed += lastpos-i; + } + handleParentheses(lastpos, false); + int startindex; + if (keys["foreignlanguage"].disabled) + startindex = actualdeptindex-langcount; + else + startindex = actualdeptindex; + for (int i = startindex; i > 0; --i) { + os << "}"; + } + if (hasTitle && (printed > 0)) + os << "}"; + if (! isPatternString_) + os << "\n"; + handleParentheses(lastpos, true); /* extra closings '}' allowed here */ } void LatexInfo::processRegion(int start, int region_end) { - while (start < region_end) { /* Let {[} and {]} survive */ - int cnt = interval.isOpeningPar(start); - if (cnt == 1) { - // Closing is allowed past the region - int closing = interval.findclosing(start+1, interval.par.length()); - interval.addIntervall(start, start+1); - interval.addIntervall(closing, closing+1); - } - else if (cnt == 3) - start += 2; - start = interval.nextNotIgnored(start+1); - } -} - -void LatexInfo::removeHead(KeyInfo &actual, int count) -{ - if (actual.parenthesiscount == 0) { - // "{\tiny{} ...}" ==> "{{} ...}" - interval.addIntervall(actual._tokenstart-count, actual._tokenstart + actual._tokensize); - } - else { - // Remove header hull, that is "\url{abcd}" ==> "abcd" - interval.addIntervall(actual._tokenstart - count, actual._dataStart); - interval.addIntervall(actual._dataEnd, actual._dataEnd+1); - } + while (start < region_end) { /* Let {[} and {]} survive */ + int cnt = interval_.isOpeningPar(start); + if (cnt == 1) { + // Closing is allowed past the region + int closing = interval_.findclosing(start+1, interval_.par.length()); + interval_.addIntervall(start, start+1); + interval_.addIntervall(closing, closing+1); + } + else if (cnt == 3) + start += 2; + start = interval_.nextNotIgnored(start+1); + } +} + +void LatexInfo::removeHead(KeyInfo const & actual, int count) +{ + if (actual.parenthesiscount == 0) { + // "{\tiny{} ...}" ==> "{{} ...}" + interval_.addIntervall(actual._tokenstart-count, actual._tokenstart + actual._tokensize); + } + else { + // Remove header hull, that is "\url{abcd}" ==> "abcd" + interval_.addIntervall(actual._tokenstart - count, actual._dataStart); + interval_.addIntervall(actual._dataEnd, actual._dataEnd+1); + } } int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) { - int nextKeyIdx = 0; - switch (actual.keytype) - { - case KeyInfo::isTitle: { - removeHead(actual); - nextKeyIdx = getNextKey(); - break; - } - case KeyInfo::cleanToStart: { - actual._dataEnd = actual._dataStart; - nextKeyIdx = getNextKey(); - // Search for end of arguments - int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments); - if (tmpIdx > 0) { - for (int i = nextKeyIdx; i <= tmpIdx; i++) { - entries[i].disabled = true; - } - actual._dataEnd = entries[tmpIdx]._dataEnd; - } - while (interval.par[actual._dataEnd] == ' ') - actual._dataEnd++; - interval.addIntervall(0, actual._dataEnd+1); - interval.actualdeptindex = 0; - interval.depts[0] = actual._dataEnd+1; - interval.closes[0] = -1; - break; - } - case KeyInfo::noContent: { /* char like "\hspace{2cm}" */ - if (actual.disabled) - interval.addIntervall(actual._tokenstart, actual._dataEnd); - else - interval.addIntervall(actual._dataStart, actual._dataEnd); - } - // fall through - case KeyInfo::isChar: { - nextKeyIdx = getNextKey(); - break; - } - case KeyInfo::isSize: { - if (actual.disabled || (interval.par[actual._dataStart] != '{') || (interval.par[actual._dataStart-1] == ' ')) { - processRegion(actual._dataEnd, actual._dataEnd+1); /* remove possibly following {} */ - interval.addIntervall(actual._tokenstart, actual._dataEnd+1); - nextKeyIdx = getNextKey(); - } else { - // Here _dataStart points to '{', so correct it - actual._dataStart += 1; - actual._tokensize += 1; - actual.parenthesiscount = 1; - if (interval.par[actual._dataStart] == '}') { - // Determine the end if used like '{\tiny{}...}' - actual._dataEnd = interval.findclosing(actual._dataStart+1, interval.par.length()) + 1; - interval.addIntervall(actual._dataStart, actual._dataStart+1); - } - else { - // Determine the end if used like '\tiny{...}' - actual._dataEnd = interval.findclosing(actual._dataStart, interval.par.length()) + 1; - } - // Split on this key if not at start - int start = interval.nextNotIgnored(previousStart); - if (start < actual._tokenstart) { - interval.output(os, actual._tokenstart); - interval.addIntervall(start, actual._tokenstart); - } - // discard entry if at end of actual - nextKeyIdx = process(os, actual); - } - break; - } - case KeyInfo::endArguments: - // Remove trailing '{}' too - actual._dataStart += 1; - actual._dataEnd += 1; - interval.addIntervall(actual._tokenstart, actual._dataEnd+1); - nextKeyIdx = getNextKey(); - break; - case KeyInfo::noMain: - // fall through - case KeyInfo::isStandard: { - if (actual.disabled) { - removeHead(actual); - processRegion(actual._dataStart, actual._dataStart+1); - nextKeyIdx = getNextKey(); - } else { - // Split on this key if not at datastart of calling entry - int start = interval.nextNotIgnored(previousStart); - if (start < actual._tokenstart) { - interval.output(os, actual._tokenstart); - interval.addIntervall(start, actual._tokenstart); - } - // discard entry if at end of actual - nextKeyIdx = process(os, actual); - } - break; - } - case KeyInfo::removeWithArg: { - nextKeyIdx = getNextKey(); - // Search for end of arguments - int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments); - if (tmpIdx > 0) { - for (int i = nextKeyIdx; i <= tmpIdx; i++) { - entries[i].disabled = true; - } - actual._dataEnd = entries[tmpIdx]._dataEnd; - } - interval.addIntervall(actual._tokenstart, actual._dataEnd+1); - break; - } - case KeyInfo::doRemove: { - // Remove the key with all parameters and following spaces - size_t pos; - for (pos = actual._dataEnd+1; pos < interval.par.length(); pos++) { - if ((interval.par[pos] != ' ') && (interval.par[pos] != '%')) - break; - } - interval.addIntervall(actual._tokenstart, pos); - nextKeyIdx = getNextKey(); - break; - } - case KeyInfo::isList: { - // Discard space before _tokenstart - int count; - for (count = 0; count < actual._tokenstart; count++) { - if (interval.par[actual._tokenstart-count-1] != ' ') - break; - } - nextKeyIdx = getNextKey(); - int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments); - if (tmpIdx > 0) { - // Special case: \item is not a list, but a command (like in Style Author_Biography in maa-monthly.layout) - // with arguments - // How else can we catch this one? - for (int i = nextKeyIdx; i <= tmpIdx; i++) { - entries[i].disabled = true; - } - actual._dataEnd = entries[tmpIdx]._dataEnd; - } - else if (nextKeyIdx > 0) { - // Ignore any lang entries inside data region - for (int i = nextKeyIdx; i < int(entries.size()) && entries[i]._tokenstart < actual._dataEnd; i++) { - if (entries[i].keytype == KeyInfo::isMain) - entries[i].disabled = true; - } - } - if (actual.disabled) { - interval.addIntervall(actual._tokenstart-count, actual._dataEnd+1); - } - else { - interval.addIntervall(actual._tokenstart-count, actual._tokenstart); - } - if (interval.par[actual._dataEnd+1] == '[') { - int posdown = interval.findclosing(actual._dataEnd+2, interval.par.length(), '[', ']'); - if ((interval.par[actual._dataEnd+2] == '{') && - (interval.par[posdown-1] == '}')) { - interval.addIntervall(actual._dataEnd+1,actual._dataEnd+3); - interval.addIntervall(posdown-1, posdown+1); - } - else { - interval.addIntervall(actual._dataEnd+1, actual._dataEnd+2); - interval.addIntervall(posdown, posdown+1); - } - int blk = interval.nextNotIgnored(actual._dataEnd+1); - if (blk > posdown) { - // Discard at most 1 space after empty item - int count; - for (count = 0; count < 1; count++) { - if (interval.par[blk+count] != ' ') - break; - } - if (count > 0) - interval.addIntervall(blk, blk+count); - } - } - break; - } - case KeyInfo::isSectioning: { - // Discard spaces before _tokenstart - int count; - int val = actual._tokenstart; - for (count = 0; count < actual._tokenstart;) { - val = interval.previousNotIgnored(val-1); - if (interval.par[val] != ' ') - break; - else { - count = actual._tokenstart - val; - } - } - if (actual.disabled) { - removeHead(actual, count); - nextKeyIdx = getNextKey(); - } else { - interval.addIntervall(actual._tokenstart-count, actual._tokenstart); - nextKeyIdx = process(os, actual); - } - break; - } - case KeyInfo::isMath: { - // Same as regex, use the content unchanged - nextKeyIdx = getNextKey(); - break; - } - case KeyInfo::isRegex: { - // DO NOT SPLIT ON REGEX - // Do not disable - nextKeyIdx = getNextKey(); - break; - } - case KeyInfo::isIgnored: { - // Treat like a character for now - nextKeyIdx = getNextKey(); - break; - } - case KeyInfo::isMain: { - if (interval.par.substr(actual._dataStart, 2) == "% ") - interval.addIntervall(actual._dataStart, actual._dataStart+2); - if (actual._tokenstart > 0) { - int prev = interval.previousNotIgnored(actual._tokenstart - 1); - if ((prev >= 0) && interval.par[prev] == '%') - interval.addIntervall(prev, prev+1); - } - if (actual.disabled) { - removeHead(actual); - if ((interval.par.substr(actual._dataStart, 3) == " \\[") || - (interval.par.substr(actual._dataStart, 8) == " \\begin{")) { - // Discard also the space before math-equation - interval.addIntervall(actual._dataStart, actual._dataStart+1); - } - nextKeyIdx = getNextKey(); - // interval.resetOpenedP(actual._dataStart-1); - } - else { - if (actual._tokenstart < 26) { - // for the first (and maybe dummy) language - interval.setForDefaultLang(actual); - } - interval.resetOpenedP(actual._dataStart-1); - } - break; - } - case KeyInfo::invalid: - // This cannot happen, already handled - // fall through - default: { - // LYXERR0("Unhandled keytype"); - nextKeyIdx = getNextKey(); - break; - } - } - return nextKeyIdx; -} - -int LatexInfo::process(ostringstream &os, KeyInfo &actual ) -{ - int end = interval.nextNotIgnored(actual._dataEnd); - int oldStart = actual._dataStart; - int nextKeyIdx = getNextKey(); - while (true) { - if ((nextKeyIdx < 0) || - (entries[nextKeyIdx]._tokenstart >= actual._dataEnd) || - (entries[nextKeyIdx].keytype == KeyInfo::invalid)) { - if (oldStart <= end) { - processRegion(oldStart, end); - oldStart = end+1; - } - break; - } - KeyInfo &nextKey = getKeyInfo(nextKeyIdx); - - if ((nextKey.keytype == KeyInfo::isMain) && !nextKey.disabled) { - (void) dispatch(os, actual._dataStart, nextKey); - end = nextKey._tokenstart; - break; - } - processRegion(oldStart, nextKey._tokenstart); - nextKeyIdx = dispatch(os, actual._dataStart, nextKey); - - oldStart = nextKey._dataEnd+1; - } - // now nextKey is either invalid or is outside of actual._dataEnd - // output the remaining and discard myself - if (oldStart <= end) { - processRegion(oldStart, end); - } - if (interval.par[end] == '}') { - end += 1; - // This is the normal case. - // But if using the firstlanguage, the closing may be missing - } - // get minimum of 'end' and 'actual._dataEnd' in case that the nextKey.keytype was 'KeyInfo::isMain' - int output_end; - if (actual._dataEnd < end) - output_end = interval.nextNotIgnored(actual._dataEnd); - else - output_end = interval.nextNotIgnored(end); - if ((actual.keytype == KeyInfo::isMain) && actual.disabled) { - interval.addIntervall(actual._tokenstart, actual._tokenstart+actual._tokensize); - } - // Remove possible empty data - int dstart = interval.nextNotIgnored(actual._dataStart); - while (interval.isOpeningPar(dstart) == 1) { - interval.addIntervall(dstart, dstart+1); - int dend = interval.findclosing(dstart+1, output_end); - interval.addIntervall(dend, dend+1); - dstart = interval.nextNotIgnored(dstart+1); - } - if (dstart < output_end) - interval.output(os, output_end); - interval.addIntervall(actual._tokenstart, end); - return nextKeyIdx; + int nextKeyIdx = 0; + switch (actual.keytype) + { + case KeyInfo::isTitle: { + removeHead(actual); + nextKeyIdx = getNextKey(); + break; + } + case KeyInfo::cleanToStart: { + actual._dataEnd = actual._dataStart; + nextKeyIdx = getNextKey(); + // Search for end of arguments + int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments); + if (tmpIdx > 0) { + for (int i = nextKeyIdx; i <= tmpIdx; i++) { + entries_[i].disabled = true; + } + actual._dataEnd = entries_[tmpIdx]._dataEnd; + } + while (interval_.par[actual._dataEnd] == ' ') + actual._dataEnd++; + interval_.addIntervall(0, actual._dataEnd+1); + interval_.actualdeptindex = 0; + interval_.depts[0] = actual._dataEnd+1; + interval_.closes[0] = -1; + break; + } + case KeyInfo::isText: + interval_.par[actual._tokenstart] = '#'; + //interval_.addIntervall(actual._tokenstart, actual._tokenstart+1); + nextKeyIdx = getNextKey(); + break; + case KeyInfo::noContent: { /* char like "\hspace{2cm}" */ + if (actual.disabled) + interval_.addIntervall(actual._tokenstart, actual._dataEnd); + else + interval_.addIntervall(actual._dataStart, actual._dataEnd); + } + // fall through + case KeyInfo::isChar: { + nextKeyIdx = getNextKey(); + break; + } + case KeyInfo::isSize: { + if (actual.disabled || (interval_.par[actual._dataStart] != '{') || (interval_.par[actual._dataStart-1] == ' ')) { + if (actual.parenthesiscount == 0) + interval_.addIntervall(actual._tokenstart, actual._dataEnd); + else { + interval_.addIntervall(actual._tokenstart, actual._dataEnd+1); + } + nextKeyIdx = getNextKey(); + } else { + // Here _dataStart points to '{', so correct it + actual._dataStart += 1; + actual._tokensize += 1; + actual.parenthesiscount = 1; + if (interval_.par[actual._dataStart] == '}') { + // Determine the end if used like '{\tiny{}...}' + actual._dataEnd = interval_.findclosing(actual._dataStart+1, interval_.par.length()) + 1; + interval_.addIntervall(actual._dataStart, actual._dataStart+1); + } + else { + // Determine the end if used like '\tiny{...}' + actual._dataEnd = interval_.findclosing(actual._dataStart, interval_.par.length()) + 1; + } + // Split on this key if not at start + int start = interval_.nextNotIgnored(previousStart); + if (start < actual._tokenstart) { + interval_.output(os, actual._tokenstart); + interval_.addIntervall(start, actual._tokenstart); + } + // discard entry if at end of actual + nextKeyIdx = process(os, actual); + } + break; + } + case KeyInfo::endArguments: { + // Remove trailing '{}' too + actual._dataStart += 1; + actual._dataEnd += 1; + interval_.addIntervall(actual._tokenstart, actual._dataEnd+1); + nextKeyIdx = getNextKey(); + break; + } + case KeyInfo::noMain: + // fall through + case KeyInfo::isStandard: { + if (actual.disabled) { + removeHead(actual); + processRegion(actual._dataStart, actual._dataStart+1); + nextKeyIdx = getNextKey(); + } else { + // Split on this key if not at datastart of calling entry + int start = interval_.nextNotIgnored(previousStart); + if (start < actual._tokenstart) { + interval_.output(os, actual._tokenstart); + interval_.addIntervall(start, actual._tokenstart); + } + // discard entry if at end of actual + nextKeyIdx = process(os, actual); + } + break; + } + case KeyInfo::removeWithArg: { + nextKeyIdx = getNextKey(); + // Search for end of arguments + int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments); + if (tmpIdx > 0) { + for (int i = nextKeyIdx; i <= tmpIdx; i++) { + entries_[i].disabled = true; + } + actual._dataEnd = entries_[tmpIdx]._dataEnd; + } + interval_.addIntervall(actual._tokenstart, actual._dataEnd+1); + break; + } + case KeyInfo::doRemove: { + // Remove the key with all parameters and following spaces + size_t pos; + size_t start; + if (interval_.par[actual._dataEnd-1] == ' ' || interval_.par[actual._dataEnd-1] == '}') + start = actual._dataEnd; + else + start = actual._dataEnd+1; + for (pos = start; pos < interval_.par.length(); pos++) { + if ((interval_.par[pos] != ' ') && (interval_.par[pos] != '%')) + break; + } + // Remove also enclosing parentheses [] and {} + int numpars = 0; + int spaces = 0; + while (actual._tokenstart > numpars) { + if (pos+numpars >= interval_.par.size()) + break; + else if (interval_.par[pos+numpars] == ']' && interval_.par[actual._tokenstart-numpars-1] == '[') + numpars++; + else if (interval_.par[pos+numpars] == '}' && interval_.par[actual._tokenstart-numpars-1] == '{') + numpars++; + else + break; + } + if (numpars > 0) { + if (interval_.par[pos+numpars] == ' ') + spaces++; + } + + interval_.addIntervall(actual._tokenstart-numpars, pos+numpars+spaces); + nextKeyIdx = getNextKey(); + break; + } + case KeyInfo::isList: { + // Discard space before _tokenstart + int count; + for (count = 0; count < actual._tokenstart; count++) { + if (interval_.par[actual._tokenstart-count-1] != ' ') + break; + } + nextKeyIdx = getNextKey(); + int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments); + if (tmpIdx > 0) { + // Special case: \item is not a list, but a command (like in Style Author_Biography in maa-monthly.layout) + // with arguments + // How else can we catch this one? + for (int i = nextKeyIdx; i <= tmpIdx; i++) { + entries_[i].disabled = true; + } + actual._dataEnd = entries_[tmpIdx]._dataEnd; + } + else if (nextKeyIdx > 0) { + // Ignore any lang entries inside data region + for (int i = nextKeyIdx; i < int(entries_.size()) && entries_[i]._tokenstart < actual._dataEnd; i++) { + if (entries_[i].keytype == KeyInfo::isMain) + entries_[i].disabled = true; + } + } + if (actual.disabled) { + interval_.addIntervall(actual._tokenstart-count, actual._dataEnd+1); + } + else { + interval_.addIntervall(actual._tokenstart-count, actual._tokenstart); + } + if (interval_.par[actual._dataEnd+1] == '[') { + int posdown = interval_.findclosing(actual._dataEnd+2, interval_.par.length(), '[', ']'); + if ((interval_.par[actual._dataEnd+2] == '{') && + (interval_.par[posdown-1] == '}')) { + interval_.addIntervall(actual._dataEnd+1,actual._dataEnd+3); + interval_.addIntervall(posdown-1, posdown+1); + } + else { + interval_.addIntervall(actual._dataEnd+1, actual._dataEnd+2); + interval_.addIntervall(posdown, posdown+1); + } + int blk = interval_.nextNotIgnored(actual._dataEnd+1); + if (blk > posdown) { + // Discard at most 1 space after empty item + int count; + for (count = 0; count < 1; count++) { + if (interval_.par[blk+count] != ' ') + break; + } + if (count > 0) + interval_.addIntervall(blk, blk+count); + } + } + break; + } + case KeyInfo::isSectioning: { + // Discard spaces before _tokenstart + int count; + int val = actual._tokenstart; + for (count = 0; count < actual._tokenstart;) { + val = interval_.previousNotIgnored(val-1); + if (val < 0 || interval_.par[val] != ' ') + break; + else { + count = actual._tokenstart - val; + } + } + if (actual.disabled) { + removeHead(actual, count); + nextKeyIdx = getNextKey(); + } else { + interval_.addIntervall(actual._tokenstart-count, actual._tokenstart); + nextKeyIdx = process(os, actual); + } + break; + } + case KeyInfo::isMath: { + // Same as regex, use the content unchanged + nextKeyIdx = getNextKey(); + break; + } + case KeyInfo::isRegex: { + // DO NOT SPLIT ON REGEX + // Do not disable + nextKeyIdx = getNextKey(); + break; + } + case KeyInfo::isIgnored: { + // Treat like a character for now + nextKeyIdx = getNextKey(); + break; + } + case KeyInfo::isMain: { + if (interval_.par.substr(actual._dataStart, 2) == "% ") + interval_.addIntervall(actual._dataStart, actual._dataStart+2); + if (actual._tokenstart > 0) { + int prev = interval_.previousNotIgnored(actual._tokenstart - 1); + if ((prev >= 0) && interval_.par[prev] == '%') + interval_.addIntervall(prev, prev+1); + } + if (actual.disabled) { + removeHead(actual); + interval_.langcount++; + if ((interval_.par.substr(actual._dataStart, 3) == " \\[") || + (interval_.par.substr(actual._dataStart, 8) == " \\begin{")) { + // Discard also the space before math-equation + interval_.addIntervall(actual._dataStart, actual._dataStart+1); + } + nextKeyIdx = getNextKey(); + // interval.resetOpenedP(actual._dataStart-1); + } + else { + if (actual._tokenstart < 26) { + // for the first (and maybe dummy) language + interval_.setForDefaultLang(actual); + } + interval_.resetOpenedP(actual._dataStart-1); + } + break; + } + case KeyInfo::invalid: + case KeyInfo::headRemove: + // These two cases cannot happen, already handled + // fall through + default: { + // LYXERR(Debug::INFO, "Unhandled keytype"); + nextKeyIdx = getNextKey(); + break; + } + } + return nextKeyIdx; +} + +int LatexInfo::process(ostringstream & os, KeyInfo const & actual ) +{ + int end = interval_.nextNotIgnored(actual._dataEnd); + int oldStart = actual._dataStart; + int nextKeyIdx = getNextKey(); + while (true) { + if ((nextKeyIdx < 0) || + (entries_[nextKeyIdx]._tokenstart >= actual._dataEnd) || + (entries_[nextKeyIdx].keytype == KeyInfo::invalid)) { + if (oldStart <= end) { + processRegion(oldStart, end); + oldStart = end+1; + } + break; + } + KeyInfo &nextKey = getKeyInfo(nextKeyIdx); + + if ((nextKey.keytype == KeyInfo::isMain) && !nextKey.disabled) { + (void) dispatch(os, actual._dataStart, nextKey); + end = nextKey._tokenstart; + break; + } + processRegion(oldStart, nextKey._tokenstart); + nextKeyIdx = dispatch(os, actual._dataStart, nextKey); + + oldStart = nextKey._dataEnd+1; + } + // now nextKey is either invalid or is outside of actual._dataEnd + // output the remaining and discard myself + if (oldStart <= end) { + processRegion(oldStart, end); + } + if (interval_.par.size() > (size_t) end && interval_.par[end] == '}') { + end += 1; + // This is the normal case. + // But if using the firstlanguage, the closing may be missing + } + // get minimum of 'end' and 'actual._dataEnd' in case that the nextKey.keytype was 'KeyInfo::isMain' + int output_end; + if (actual._dataEnd < end) + output_end = interval_.nextNotIgnored(actual._dataEnd); + else if (interval_.par.size() > (size_t) end) + output_end = interval_.nextNotIgnored(end); + else + output_end = interval_.par.size(); + if ((actual.keytype == KeyInfo::isMain) && actual.disabled) { + interval_.addIntervall(actual._tokenstart, actual._tokenstart+actual._tokensize); + } + // Remove possible empty data + int dstart = interval_.nextNotIgnored(actual._dataStart); + while (interval_.isOpeningPar(dstart) == 1) { + interval_.addIntervall(dstart, dstart+1); + int dend = interval_.findclosing(dstart+1, output_end); + interval_.addIntervall(dend, dend+1); + dstart = interval_.nextNotIgnored(dstart+1); + } + if (dstart < output_end) + interval_.output(os, output_end); + if (nextKeyIdx < 0) + interval_.addIntervall(0, end); + else + interval_.addIntervall(actual._tokenstart, end); + return nextKeyIdx; } string splitOnKnownMacros(string par, bool isPatternString) { - ostringstream os; - LatexInfo li(par, isPatternString); - // LYXERR0("Berfore split: " << par); - KeyInfo DummyKey = KeyInfo(KeyInfo::KeyType::isMain, 2, true); - DummyKey.head = ""; - DummyKey._tokensize = 0; - DummyKey._dataStart = 0; - DummyKey._dataEnd = par.length(); - DummyKey.disabled = true; - int firstkeyIdx = li.getFirstKey(); - string s; - if (firstkeyIdx >= 0) { - KeyInfo firstKey = li.getKeyInfo(firstkeyIdx); - DummyKey._tokenstart = firstKey._tokenstart; - int nextkeyIdx; - if ((firstKey.keytype != KeyInfo::isMain) || firstKey.disabled) { - // Use dummy firstKey - firstKey = DummyKey; - (void) li.setNextKey(firstkeyIdx); - } - else { - if (par.substr(firstKey._dataStart, 2) == "% ") - li.addIntervall(firstKey._dataStart, firstKey._dataStart+2); - } - nextkeyIdx = li.process(os, firstKey); - while (nextkeyIdx >= 0) { - // Check for a possible gap between the last - // entry and this one - int datastart = li.nextNotIgnored(firstKey._dataStart); - KeyInfo &nextKey = li.getKeyInfo(nextkeyIdx); - if ((nextKey._tokenstart > datastart)) { - // Handle the gap - firstKey._dataStart = datastart; - firstKey._dataEnd = par.length(); - (void) li.setNextKey(nextkeyIdx); - // Fake the last opened parenthesis - li.setForDefaultLang(firstKey); - nextkeyIdx = li.process(os, firstKey); - } - else { - if (nextKey.keytype != KeyInfo::isMain) { - firstKey._dataStart = datastart; - firstKey._dataEnd = nextKey._dataEnd+1; - (void) li.setNextKey(nextkeyIdx); - li.setForDefaultLang(firstKey); - nextkeyIdx = li.process(os, firstKey); - } - else { - nextkeyIdx = li.process(os, nextKey); - } - } - } - // Handle the remaining - firstKey._dataStart = li.nextNotIgnored(firstKey._dataStart); - firstKey._dataEnd = par.length(); - // Check if ! empty - if ((firstKey._dataStart < firstKey._dataEnd) && - (par[firstKey._dataStart] != '}')) { - li.setForDefaultLang(firstKey); - (void) li.process(os, firstKey); - } - s = os.str(); - if (s.empty()) { - // return string definitelly impossible to match - s = "\\foreignlanguage{ignore}{ }"; - } - } - else - s = par; /* no known macros found */ - // LYXERR0("After split: " << s); - return s; + ostringstream os; + LatexInfo li(par, isPatternString); + // LYXERR(Debug::INFO, "Berfore split: " << par); + KeyInfo DummyKey = KeyInfo(KeyInfo::KeyType::isMain, 2, true); + DummyKey.head = ""; + DummyKey._tokensize = 0; + DummyKey._dataStart = 0; + DummyKey._dataEnd = par.length(); + DummyKey.disabled = true; + int firstkeyIdx = li.getFirstKey(); + string s; + if (firstkeyIdx >= 0) { + KeyInfo firstKey = li.getKeyInfo(firstkeyIdx); + DummyKey._tokenstart = firstKey._tokenstart; + int nextkeyIdx; + if ((firstKey.keytype != KeyInfo::isMain) || firstKey.disabled) { + // Use dummy firstKey + firstKey = DummyKey; + (void) li.setNextKey(firstkeyIdx); + } + else { + if (par.substr(firstKey._dataStart, 2) == "% ") + li.addIntervall(firstKey._dataStart, firstKey._dataStart+2); + } + nextkeyIdx = li.process(os, firstKey); + while (nextkeyIdx >= 0) { + // Check for a possible gap between the last + // entry and this one + int datastart = li.nextNotIgnored(firstKey._dataStart); + KeyInfo &nextKey = li.getKeyInfo(nextkeyIdx); + if ((nextKey._tokenstart > datastart)) { + // Handle the gap + firstKey._dataStart = datastart; + firstKey._dataEnd = par.length(); + (void) li.setNextKey(nextkeyIdx); + // Fake the last opened parenthesis + li.setForDefaultLang(firstKey); + nextkeyIdx = li.process(os, firstKey); + } + else { + if (nextKey.keytype != KeyInfo::isMain) { + firstKey._dataStart = datastart; + firstKey._dataEnd = nextKey._dataEnd+1; + (void) li.setNextKey(nextkeyIdx); + li.setForDefaultLang(firstKey); + nextkeyIdx = li.process(os, firstKey); + } + else { + nextkeyIdx = li.process(os, nextKey); + } + } + } + // Handle the remaining + firstKey._dataStart = li.nextNotIgnored(firstKey._dataStart); + firstKey._dataEnd = par.length(); + // Check if ! empty + if ((firstKey._dataStart < firstKey._dataEnd) && + (par[firstKey._dataStart] != '}')) { + li.setForDefaultLang(firstKey); + (void) li.process(os, firstKey); + } + s = os.str(); + // return string definitelly impossible to match, but should be known + } + else + s = par; /* no known macros found */ + // LYXERR(Debug::INFO, "After split: " << s); + return s; } /* @@ -2439,7 +3501,7 @@ string splitOnKnownMacros(string par, bool isPatternString) * Resulting modified string is set to "", if * the searched tex does not contain all the features in the search pattern */ -static string correctlanguagesetting(string par, bool isPatternString, bool withformat) +static string correctlanguagesetting(string par, bool isPatternString, bool withformat, lyx::Buffer *pbuf = nullptr) { static Features regex_f; static int missed = 0; @@ -2450,17 +3512,35 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with while ((parlen > 0) && (par[parlen-1] == '\n')) { parlen--; } +#if 0 if (isPatternString && (parlen > 0) && (par[parlen-1] == '~')) { // Happens to be there in case of description or labeling environment parlen--; } +#endif string result; if (withformat) { // Split the latex input into pieces which // can be digested by our search engine - LYXERR(Debug::FIND, "input: \"" << par << "\""); + LYXERR(Debug::FINDVERBOSE, "input: \"" << par << "\""); + if (isPatternString && (pbuf != nullptr)) { // Check if we should disable/enable test for language + // We check for polyglossia, because in runparams.flavor we use Flavor::XeTeX + string doclang = pbuf->params().language->polyglossia(); + static regex langre("\\\\(foreignlanguage)\\{([^\\}]+)\\}"); + smatch sub; + bool toIgnoreLang = true; + for (sregex_iterator it(par.begin(), par.end(), langre), end; it != end; ++it) { + sub = *it; + if (sub.str(2) != doclang) { + toIgnoreLang = false; + break; + } + } + setIgnoreFormat("language", toIgnoreLang, false); + + } result = splitOnKnownMacros(par.substr(0,parlen), isPatternString); - LYXERR(Debug::FIND, "After split: \"" << result << "\""); + LYXERR(Debug::FINDVERBOSE, "After splitOnKnownMacros:\n\"" << result << "\""); } else result = par.substr(0, parlen); @@ -2468,14 +3548,14 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with missed = 0; if (withformat) { regex_f = identifyFeatures(result); - string features = ""; + string features = ""; for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) { string a = it->first; regex_with_format = true; - features += " " + a; - // LYXERR0("Identified regex format:" << a); + features += " " + a; + // LYXERR(Debug::INFO, "Identified regex format:" << a); } - LYXERR(Debug::FIND, "Identified Features" << features); + LYXERR(Debug::FINDVERBOSE, "Identified Features" << features); } } else if (regex_with_format) { @@ -2485,102 +3565,214 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with bool b = it->second; if (b && ! info[a]) { missed++; - LYXERR(Debug::FIND, "Missed(" << missed << " " << a <<", srclen = " << parlen ); - return(""); + LYXERR(Debug::FINDVERBOSE, "Missed(" << missed << " " << a <<", srclen = " << parlen ); + return ""; } } + } else { - // LYXERR0("No regex formats"); + // LYXERR(Debug::INFO, "No regex formats"); } - return(result); + return result; } // Remove trailing closure of math, macros and environments, so to catch parts of them. -static int identifyClosing(string & t) +static void identifyClosing(string & t, bool ignoreformat) { - int open_braces = 0; do { - LYXERR(Debug::FIND, "identifyClosing(): t now is '" << t << "'"); - if (regex_replace(t, t, "(.*[^\\\\])\\$" REGEX_EOS, "$1")) + LYXERR(Debug::FINDVERBOSE, "identifyClosing(): t now is '" << t << "'"); + if (regex_replace(t, t, "(.*[^\\\\])\\$$", "$1")) continue; - if (regex_replace(t, t, "(.*[^\\\\]) \\\\\\]" REGEX_EOS, "$1")) + if (regex_replace(t, t, "(.*[^\\\\])\\\\\\]$", "$1")) continue; - if (regex_replace(t, t, "(.*[^\\\\]) \\\\end\\{[a-zA-Z_]*\\*?\\}" REGEX_EOS, "$1")) - continue; - if (regex_replace(t, t, "(.*[^\\\\])\\}" REGEX_EOS, "$1")) { - ++open_braces; + if (regex_replace(t, t, "(.*[^\\\\])\\\\end\\{[a-zA-Z_]+\\*?\\}$", "$1")) continue; + if (! ignoreformat) { + if (regex_replace(t, t, "(.*[^\\\\])\\}$", "$1")) + continue; } break; } while (true); - return open_braces; + return; } +static int num_replaced = 0; +static bool previous_single_replace = true; -MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt) +void MatchStringAdv::CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string) +{ +#if QTSEARCH + if (regexp_str.empty() || regexp2_str.empty()) { + regexIsValid = false; + regexError = "Invalid empty regex"; + return; + } + // Handle \w properly + QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption; + if (! opt.casesensitive) { + popts |= QRegularExpression::CaseInsensitiveOption; + } + regexp = QRegularExpression(QString::fromStdString(regexp_str), popts); + regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts); + regexError = ""; + if (regexp.isValid() && regexp2.isValid()) { + regexIsValid = true; + // Check '{', '}' pairs inside the regex + int balanced = 0; + int skip = 1; + for (unsigned i = 0; i < par_as_string.size(); i+= skip) { + char c = par_as_string[i]; + if (c == '\\') { + skip = 2; + continue; + } + if (c == '{') + balanced++; + else if (c == '}') { + balanced--; + if (balanced < 0) + break; + } + skip = 1; + } + if (balanced != 0) { + regexIsValid = false; + regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\""; + } + } + else { + regexIsValid = false; + if (!regexp.isValid()) + regexError += "Invalid regexp \"" + regexp_str + "\", error = " + regexp.errorString().toStdString(); + else + regexError += "Invalid regexp2 \"" + regexp2_str + "\", error = " + regexp2.errorString().toStdString(); + } +#else + (void)par_as_string; + if (opt.casesensitive) { + regexp = regex(regexp_str); + regexp2 = regex(regexp2_str); + } + else { + regexp = regex(regexp_str, std::regex_constants::icase); + regexp2 = regex(regexp2_str, std::regex_constants::icase); + } +#endif +} + +static void modifyRegexForMatchWord(string &t) +{ + string s(""); + regex wordre("(\\\\)*((\\.|\\\\b))"); + size_t lastpos = 0; + smatch sub; + for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) { + sub = *it; + if ((sub.position(2) - sub.position(0)) % 2 == 1) { + continue; + } + else if (sub.str(2) == "\\\\b") + return; + if (lastpos < (size_t) sub.position(2)) + s += t.substr(lastpos, sub.position(2) - lastpos); + s += "\\S"; + lastpos = sub.position(2) + sub.length(2); + } + if (lastpos == 0) { + s = "\\b" + t + "\\b"; + t = s; + return; + } + else if (lastpos < t.length()) + s += t.substr(lastpos, t.length() - lastpos); + t = "\\b" + s + "\\b"; +} + +MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) : p_buf(&buf), p_first_buf(&buf), opt(opt) { Buffer & find_buf = *theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true); docstring const & ds = stringifySearchBuffer(find_buf, opt); - use_regexp = lyx::to_utf8(ds).find("\\regexp{") != std::string::npos; + if (ds.empty() ) { + CreateRegexp(opt, "", "", ""); + return; + } + use_regexp = ds.find(from_utf8("\\regexp{")) != std::string::npos; + if (opt.replace_all && previous_single_replace) { + previous_single_replace = false; + num_replaced = 0; + } + else if (!opt.replace_all) { + num_replaced = 0; // count number of replaced strings + previous_single_replace = true; + } // When using regexp, braces are hacked already by escape_for_regex() - par_as_string = normalize(ds, !use_regexp); - open_braces = 0; - close_wildcards = 0; + par_as_string = convertLF2Space(ds, opt.ignoreformat); size_t lead_size = 0; // correct the language settings - par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat); - if (opt.ignoreformat) { - if (!use_regexp) { - // if par_as_string_nolead were emty, - // the following call to findAux will always *find* the string - // in the checked data, and thus always using the slow - // examining of the current text part. - par_as_string_nolead = par_as_string; + par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat, &buf); + if (par_as_string.empty()) { + CreateRegexp(opt, "", "", ""); + return; + } + opt.matchAtStart = false; + if (!use_regexp) { + identifyClosing(par_as_string, opt.ignoreformat); // Removes math closings ($, ], ...) at end of string + if (opt.ignoreformat) { + lead_size = 0; } - } else { + else { + lead_size = identifyLeading(par_as_string); + } + lead_as_string = par_as_string.substr(0, lead_size); + string lead_as_regex_string = string2regex(lead_as_string); + par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size); + string par_as_regex_string_nolead = string2regex(par_as_string_nolead); + /* Handle whole words too in this case + */ + if (opt.matchword) { + par_as_regex_string_nolead = "\\b" + par_as_regex_string_nolead + "\\b"; + opt.matchword = false; + } + string regexp_str = "(" + lead_as_regex_string + ")()" + par_as_regex_string_nolead; + string regexp2_str = "(" + lead_as_regex_string + ")(.*?)" + par_as_regex_string_nolead; + CreateRegexp(opt, regexp_str, regexp2_str); + use_regexp = true; + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); + return; + } + + if (!opt.ignoreformat) { lead_size = identifyLeading(par_as_string); - LYXERR(Debug::FIND, "Lead_size: " << lead_size); + LYXERR(Debug::FINDVERBOSE, "Lead_size: " << lead_size); lead_as_string = par_as_string.substr(0, lead_size); par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size); } - if (!use_regexp) { - open_braces = identifyClosing(par_as_string); - identifyClosing(par_as_string_nolead); - LYXERR(Debug::FIND, "Open braces: " << open_braces); - LYXERR(Debug::FIND, "Built MatchStringAdv object: par_as_string = '" << par_as_string << "'"); - } else { + // Here we are using regexp + LASSERT(use_regexp, /**/); + { string lead_as_regexp; if (lead_size > 0) { - // @todo No need to search for \regexp{} insets in leading material - lead_as_regexp = escape_for_regex(par_as_string.substr(0, lead_size), !opt.ignoreformat); + lead_as_regexp = string2regex(par_as_string.substr(0, lead_size)); + (void)regex_replace(par_as_string_nolead, par_as_string_nolead, "\\$$", ""); + (void)regex_replace(par_as_string_nolead, par_as_string_nolead, "}$", ""); par_as_string = par_as_string_nolead; - LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'"); - LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); + LYXERR(Debug::FINDVERBOSE, "lead_as_regexp is '" << lead_as_regexp << "'"); + LYXERR(Debug::FINDVERBOSE, "par_as_string now is '" << par_as_string << "'"); } + // LYXERR(Debug::FINDVERBOSE, "par_as_string before escape_for_regex() is '" << par_as_string << "'"); par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat); // Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them. - LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); - if ( - // Insert .* before trailing '\$' ('$' has been escaped by escape_for_regex) - regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\$)\\'", "$1(.*?)$2") - // Insert .* before trailing '\\\]' ('\]' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])( \\\\\\\\\\\\\\])\\'", "$1(.*?)$2") - // Insert .* before trailing '\\end\{...}' ('\end{...}' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, - "(.*[^\\\\])( \\\\\\\\end\\\\\\{[a-zA-Z_]*)(\\\\\\*)?(\\\\\\})\\'", "$1(.*?)$2$3$4") - // Insert .* before trailing '\}' ('}' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\})\\'", "$1(.*?)$2") - ) { - ++close_wildcards; - } + // LYXERR(Debug::FINDVERBOSE, "par_as_string now is '" << par_as_string << "'"); + ++close_wildcards; + size_t lng = par_as_string.size(); if (!opt.ignoreformat) { // Remove extra '\}' at end if not part of \{\.\} - size_t lng = par_as_string.size(); while(lng > 2) { if (par_as_string.substr(lng-2, 2).compare("\\}") == 0) { if (lng >= 6) { @@ -2594,24 +3786,17 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & break; } if (lng < par_as_string.size()) - par_as_string = par_as_string.substr(0,lng); - /* - // save '\.' - regex_replace(par_as_string, par_as_string, "\\\\\\.", "_xxbdotxx_"); - // handle '.' -> '[^]', replace later as '[^\}\{\\]' - regex_replace(par_as_string, par_as_string, "\\.", "[^]"); - // replace '[^...]' with '[^...\}\{\\]' - regex_replace(par_as_string, par_as_string, "\\[\\^([^\\\\\\]]*)\\]", "_xxbrlxx_$1\\}\\{\\\\_xxbrrxx_"); - regex_replace(par_as_string, par_as_string, "_xxbrlxx_", "[^"); - regex_replace(par_as_string, par_as_string, "_xxbrrxx_", "]"); - // restore '\.' - regex_replace(par_as_string, par_as_string, "_xxbdotxx_", "\\."); - */ - } - LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); - LYXERR(Debug::FIND, "Open braces: " << open_braces); - LYXERR(Debug::FIND, "Close .*? : " << close_wildcards); - LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string); + par_as_string.resize(lng); + } + LYXERR(Debug::FINDVERBOSE, "par_as_string after correctRegex is '" << par_as_string << "'"); + if ((lng > 0) && (par_as_string[0] == '^')) { + par_as_string = par_as_string.substr(1); + --lng; + opt.matchAtStart = true; + } + // LYXERR(Debug::FINDVERBOSE, "par_as_string now is '" << par_as_string << "'"); + // LYXERR(Debug::FINDVERBOSE, "Open braces: " << open_braces); + // LYXERR(Debug::FINDVERBOSE, "Replaced text (to be used as regex): " << par_as_string); // If entered regexp must match at begin of searched string buffer // Kornel: Added parentheses to use $1 for size of the leading string @@ -2621,100 +3806,72 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & // TODO: Adapt '\[12345678]' in par_as_string to acount for the first '() // Unfortunately is '\1', '\2', etc not working for strings with extra format // so the convert has no effect in that case - for (int i = 8; i > 0; --i) { + for (int i = 7; i > 0; --i) { string orig = "\\\\" + std::to_string(i); - string dest = "\\" + std::to_string(i+1); + string dest = "\\" + std::to_string(i+2); while (regex_replace(par_as_string, par_as_string, orig, dest)); } - regexp_str = "(" + lead_as_regexp + ")" + par_as_string; - regexp2_str = "(" + lead_as_regexp + ").*?" + par_as_string; - } - LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); - regexp = lyx::regex(regexp_str); - - LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); - regexp2 = lyx::regex(regexp2_str); - } -} - - -// Count number of characters in string -// {]} ==> 1 -// \& ==> 1 -// --- ==> 1 -// \\[a-zA-Z]+ ==> 1 -static int computeSize(string s, int len) -{ - if (len == 0) - return 0; - int skip = 1; - int count = 0; - for (int i = 0; i < len; i += skip, count++) { - if (s[i] == '\\') { - skip = 2; - if (isalpha(s[i+1])) { - for (int j = 2; i+j < len; j++) { - if (! isalpha(s[i+j])) { - if (s[i+j] == ' ') - skip++; - else if ((s[i+j] == '{') && s[i+j+1] == '}') - skip += 2; - else if ((s[i+j] == '{') && (i + j + 1 >= len)) - skip++; - break; - } - skip++; - } - } - } - else if (s[i] == '{') { - if (s[i+1] == '}') - skip = 2; - else - skip = 3; - } - else if (s[i] == '-') { - if (s[i+1] == '-') { - if (s[i+2] == '-') - skip = 3; - else - skip = 2; + if (opt.matchword) { + modifyRegexForMatchWord(par_as_string); + // opt.matchword = false; } - else - skip = 1; - } - else { - skip = 1; + regexp_str = "(" + lead_as_regexp + ")()" + par_as_string; + regexp2_str = "(" + lead_as_regexp + ")(.*?)" + par_as_string; } + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); + CreateRegexp(opt, regexp_str, regexp2_str, par_as_string); } - return count; } -MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const +MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, MatchStringAdv::matchType at_begin) const { MatchResult mres; - if (at_begin && - (opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) ) - return mres; + mres.searched_size = len; docstring docstr = stringifyFromForSearch(opt, cur, len); - string str = normalize(docstr, true); + string str; + str = convertLF2Space(docstr, opt.ignoreformat); if (!opt.ignoreformat) { str = correctlanguagesetting(str, false, !opt.ignoreformat); + // remove closing '}' and '\n' to allow for use of '$' in regex + size_t lng = str.size(); + while ((lng > 1) && ((str[lng -1] == '}') || (str[lng -1] == '\n'))) + lng--; + if (lng != str.size()) { + str = str.substr(0, lng); + } + // Replace occurences of '~' to ' ' + static std::regex specialChars { R"(~)" }; + str = std::regex_replace(str, specialChars, R"( )" ); } if (str.empty()) { mres.match_len = -1; return mres; } - LYXERR(Debug::FIND, "Matching against '" << lyx::to_utf8(docstr) << "'"); - LYXERR(Debug::FIND, "After normalization: '" << str << "'"); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "After normalization: Matching against:\n'" << str << "'"); - if (use_regexp) { - LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin); + LASSERT(use_regexp, /**/); + { + // use_regexp always true + LYXERR(Debug::FINDVERBOSE, "Searching in regexp mode: at_begin=" << matchTypeAsString(at_begin)); +#if QTSEARCH + QString qstr = QString::fromStdString(str); + QRegularExpression const *p_regexp; + QRegularExpression::MatchType flags = QRegularExpression::NormalMatch; + if (at_begin == MatchStringAdv::MatchFromStart) { + p_regexp = ®exp; + } else { + p_regexp = ®exp2; + } + QRegularExpressionMatch match = p_regexp->match(qstr, 0, flags); + if (!match.hasMatch()) + return mres; +#else regex const *p_regexp; regex_constants::match_flag_type flags; - if (at_begin) { + if (at_begin == MatchStringAdv::MatchFromStart) { flags = regex_constants::match_continuous; p_regexp = ®exp; } else { @@ -2725,248 +3882,294 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be if (re_it == sregex_iterator()) return mres; match_results const & m = *re_it; - - if (0) { // Kornel Benko: DO NOT CHECKK - // Check braces on the segment that matched the entire regexp expression, - // plus the last subexpression, if a (.*?) was inserted in the constructor. - if (!braces_match(m[0].first, m[0].second, open_braces)) - return mres; - } - - // Check braces on segments that matched all (.*?) subexpressions, - // except the last "padding" one inserted by lyx. - for (size_t i = 1; i < m.size() - 1; ++i) - if (!braces_match(m[i].first, m[i].second, open_braces)) - return mres; - - // Exclude from the returned match length any length - // due to close wildcards added at end of regexp - // and also the length of the leading (e.g. '\emph{') +#endif + // Whole found string, including the leading + // std: m[0].second - m[0].first + // Qt: match.capturedEnd(0) - match.capturedStart(0) // - // Whole found string, including the leading: m[0].second - m[0].first - // Size of the leading string: m[1].second - m[1].first + // Size of the leading string + // std: m[1].second - m[1].first + // Qt: match.capturedEnd(1) - match.capturedStart(1) int leadingsize = 0; - if (m.size() > 1) - leadingsize = m[1].second - m[1].first; - int result; - for (size_t i = 0; i < m.size(); i++) { - LYXERR(Debug::FIND, "Match " << i << " is " << m[i].second - m[i].first << " long"); - } - if (close_wildcards == 0) - result = m[0].second - m[0].first; - - else - result = m[m.size() - close_wildcards].first - m[0].first; - - size_t pos = m.position(size_t(0)); - // Ignore last closing characters - while (result > 0) { - if (str[pos+result-1] == '}') - --result; - else - break; +#if QTSEARCH + if (match.lastCapturedIndex() > 0) { + leadingsize = match.capturedEnd(1) - match.capturedStart(1); } - if (result > leadingsize) - result -= leadingsize; - else - result = 0; - mres.match_len = computeSize(str.substr(pos+leadingsize,result), result); - mres.match2end = str.size() - pos - leadingsize; - mres.pos = pos+leadingsize; - return mres; - } - // else !use_regexp: but all code paths above return - LYXERR(Debug::FIND, "Searching in normal mode: par_as_string='" - << par_as_string << "', str='" << str << "'"); - LYXERR(Debug::FIND, "Searching in normal mode: lead_as_string='" - << lead_as_string << "', par_as_string_nolead='" - << par_as_string_nolead << "'"); - - if (at_begin) { - LYXERR(Debug::FIND, "size=" << par_as_string.size() - << ", substr='" << str.substr(0, par_as_string.size()) << "'"); - if (str.substr(0, par_as_string.size()) == par_as_string) { - mres.match_len = par_as_string.size(); - mres.match2end = str.size(); - mres.pos = 0; - return mres; +#else + if (m.size() > 2) { + leadingsize = m[1].second - m[1].first; } - } else { - size_t pos = str.find(par_as_string_nolead); - if (pos != string::npos) { - mres.match_len = par_as_string.size(); - mres.match2end = str.size() - pos; - mres.pos = pos; - return mres; +#endif +#if QTSEARCH + mres.match_prefix = match.capturedEnd(2) - match.capturedStart(2); + mres.match_len = match.capturedEnd(0) - match.capturedEnd(2); + // because of different number of closing at end of string + // we have to 'unify' the length of the post-match. + // Done by ignoring closing parenthesis and linefeeds at string end + int matchend = match.capturedEnd(0); + size_t strsize = qstr.size(); + if (!opt.ignoreformat) { + while (mres.match_len > 1) { + QChar c = qstr.at(matchend - 1); + if ((c == '\n') || (c == '}') || (c == '{')) { + mres.match_len--; + matchend--; + } + else + break; + } + while (strsize > (size_t) match.capturedEnd(0)) { + QChar c = qstr.at(strsize-1); + if ((c == '\n') || (c == '}')) { + --strsize; + } + else + break; + } + } + // LYXERR0(qstr.toStdString()); + mres.match2end = strsize - matchend; + mres.pos = match.capturedStart(2); +#else + mres.match_prefix = m[2].second - m[2].first; + mres.match_len = m[0].second - m[2].second; + // ignore closing parenthesis and linefeeds at string end + size_t strend = m[0].second - m[0].first; + int matchend = strend; + size_t strsize = str.size(); + if (!opt.ignoreformat) { + while (mres.match_len > 1) { + char c = str.at(matchend - 1); + if ((c == '\n') || (c == '}') || (c == '{')) { + mres.match_len--; + matchend--; + } + else + break; + } + while (strsize > strend) { + if ((str.at(strsize-1) == '}') || (str.at(strsize-1) == '\n')) { + --strsize; + } + else + break; + } + } + // LYXERR0(str); + mres.match2end = strsize - matchend; + mres.pos = m[2].first - m[0].first;; +#endif + if (mres.match2end < 0) + mres.match_len = 0; + mres.leadsize = leadingsize; +#if QTSEARCH + if (mres.match_len > 0) { + string a0 = match.captured(0).mid(mres.pos + mres.match_prefix, mres.match_len).toStdString(); + mres.result.push_back(a0); + for (int i = 3; i <= match.lastCapturedIndex(); i++) { + mres.result.push_back(match.captured(i).toStdString()); + } + } +#else + if (mres.match_len > 0) { + string a0 = m[0].str().substr(mres.pos + mres.match_prefix, mres.match_len); + mres.result.push_back(a0); + for (size_t i = 3; i < m.size(); i++) { + mres.result.push_back(m[i]); + } } +#endif + return mres; } - return mres; } -MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin) const +MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, MatchStringAdv::matchType at_begin) const { MatchResult mres = findAux(cur, len, at_begin); - int res = mres.match_len; - LYXERR(Debug::FIND, - "res=" << res << ", at_begin=" << at_begin - << ", matchword=" << opt.matchword + LYXERR(Debug::FINDVERBOSE, + "res=" << mres.match_len << ", at_begin=" << matchTypeAsString(at_begin) + << ", matchAtStart=" << opt.matchAtStart << ", inTexted=" << cur.inTexted()); - if (res == 0 || !at_begin || !opt.matchword || !cur.inTexted()) - return mres; - if ((len > 0) && (res < len)) { - mres.match_len = 0; - return mres; - } - Paragraph const & par = cur.paragraph(); - bool ws_left = (cur.pos() > 0) - ? par.isWordSeparator(cur.pos() - 1) - : true; - bool ws_right = (cur.pos() + len < par.size()) - ? par.isWordSeparator(cur.pos() + len) - : true; - LYXERR(Debug::FIND, - "cur.pos()=" << cur.pos() << ", res=" << res - << ", separ: " << ws_left << ", " << ws_right - << ", len: " << len - << endl); - if (ws_left && ws_right) { - // Check for word separators inside the found 'word' - for (int i = 0; i < len; i++) { - if (par.isWordSeparator(cur.pos() + i)) { - mres.match_len = 0; - return mres; - } - } - return mres; - } - mres.match_len = 0; + if (mres.match_len > 0) { + if (opt.matchAtStart) { + if (cur.pos() > 0 || mres.match_prefix > 0) + mres.match_len = 0; + } + } return mres; } +#if 0 +static bool simple_replace(string &t, string from, string to) +{ + regex repl("(\\\\)*(" + from + ")"); + string s(""); + size_t lastpos = 0; + smatch sub; + for (sregex_iterator it(t.begin(), t.end(), repl), end; it != end; ++it) { + sub = *it; + if ((sub.position(2) - sub.position(0)) % 2 == 1) + continue; + if (lastpos < (size_t) sub.position(2)) + s += t.substr(lastpos, sub.position(2) - lastpos); + s += to; + lastpos = sub.position(2) + sub.length(2); + } + if (lastpos == 0) + return false; + else if (lastpos < t.length()) + s += t.substr(lastpos, t.length() - lastpos); + t = s; + return true; +} +#endif -string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const +string MatchStringAdv::convertLF2Space(docstring const &s, bool ignore_format) const { - string t; - if (! opt.casesensitive) - t = lyx::to_utf8(lowercase(s)); - else - t = lyx::to_utf8(s); - // Remove \n at begin - while (!t.empty() && t[0] == '\n') - t = t.substr(1); - // Remove \n at end - while (!t.empty() && t[t.size() - 1] == '\n') - t = t.substr(0, t.size() - 1); + // Using original docstring to handle '\n' + + if (s.size() == 0) return ""; + stringstream t; size_t pos; - // Handle all other '\n' - while ((pos = t.find("\n")) != string::npos) { - if (pos > 1 && t[pos-1] == '\\' && t[pos-2] == '\\' ) { - // Handle '\\\n' - if (isAlnumASCII(t[pos+1])) { - t.replace(pos-2, 3, " "); + size_t start = 0; + size_t end = s.size() - 1; + if (!ignore_format) { + while (s[start] == '\n' && start <= end) + start++; + while (end >= start && s[end] == '\n') + end--; + if (start >= end + 1) + return ""; + } + do { + bool dospace = true; + int skip = -1; + pos = s.find('\n', start); + if (pos >= end) { + t << lyx::to_utf8(s.substr(start, end + 1 - start)); + break; + } + if (!ignore_format) { + if ((pos > start + 1) && + s[pos-1] == '\\' && + s[pos-2] == '\\') { + skip = 2; + if ((pos > start + 2) && + (s[pos+1] == '~' || isSpace(s[pos+1]) || + s[pos-3] == '~' || isSpace(s[pos-3]))) { + // discard "\\\\\n", do not replace with space + dospace = false; + } } - else { - t.replace(pos-2, 3, ""); + else if (pos > start) { + if (s[pos-1] == '%') { + skip = 1; + while ((pos > start+skip) && (s[pos-1-skip] == '%')) + skip++; + if ((pos > start+skip) && + (s[pos+1] == '~' || isSpace(s[pos+1]) || + s[pos-1-skip] == '~' || isSpace(s[pos-1-skip]))) { + // discard '%%%%%\n' + dospace = false; + } + } + else if (!isAlnumASCII(s[pos+1]) || !isAlnumASCII(s[pos-1])) { + dospace = false; + skip = 0; // remove the '\n' only + } } } - else if (!isAlnumASCII(t[pos+1]) || !isAlnumASCII(t[pos-1])) { - // '\n' adjacent to non-alpha-numerics, discard - t.replace(pos, 1, ""); - } else { - // Replace all other \n with spaces - t.replace(pos, 1, " "); + dospace = true; + skip = 0; } - } - // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify - // Kornel: Added textsl, textsf, textit, texttt and noun - // + allow to seach for colored text too - LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, \\*section{} macros from: " << t); - while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", "")) - LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); - while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", "")) - LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); - - while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", "")); - // FIXME - check what preceeds the brace - if (hack_braces) { - if (opt.ignoreformat) - while (regex_replace(t, t, "\\{", "_x_<") - || regex_replace(t, t, "\\}", "_x_>")) - LYXERR(Debug::FIND, "After {} replacement: '" << t << "'"); - else - while (regex_replace(t, t, "\\\\\\{", "_x_<") - || regex_replace(t, t, "\\\\\\}", "_x_>")) - LYXERR(Debug::FIND, "After {} replacement: '" << t << "'"); - } - - return t; + t << lyx::to_utf8(s.substr(start, pos-skip-start)); + if (dospace) + t << ' '; + start = pos+1; + } while (start <= end); + return(t.str()); } - docstring stringifyFromCursor(DocIterator const & cur, int len) { - LYXERR(Debug::FIND, "Stringifying with len=" << len << " from cursor at pos: " << cur); + LYXERR(Debug::FINDVERBOSE, "Stringifying with len=" << len << " from cursor at pos: " << cur); if (cur.inTexted()) { Paragraph const & par = cur.paragraph(); // TODO what about searching beyond/across paragraph breaks ? // TODO Try adding a AS_STR_INSERTS as last arg pos_type end = ( len == -1 || cur.pos() + len > int(par.size()) ) ? - int(par.size()) : cur.pos() + len; - OutputParams runparams(&cur.buffer()->params().encoding()); + int(par.size()) : cur.pos() + len; + // OutputParams runparams(&cur.buffer()->params().encoding()); + OutputParams runparams(encodings.fromLyXName("utf8")); runparams.nice = true; - runparams.flavor = OutputParams::LATEX; - runparams.linelen = 10000; //lyxrc.plaintext_linelen; - // No side effect of file copying and image conversion - runparams.dryrun = true; - runparams.for_search = true; - LYXERR(Debug::FIND, "Stringifying with cur: " + setFindParams(runparams); + int option = AS_STR_INSETS | AS_STR_PLAINTEXT; + if (ignoreFormats.getDeleted()) { + option |= AS_STR_SKIPDELETE; + runparams.find_set_feature(OutputParams::SearchWithoutDeleted); + } + else { + runparams.find_set_feature(OutputParams::SearchWithDeleted); + } + if (ignoreFormats.getNonContent()) { + runparams.find_add_feature(OutputParams::SearchNonOutput); + } + LYXERR(Debug::FINDVERBOSE, "Stringifying with cur: " << cur << ", from pos: " << cur.pos() << ", end: " << end); - return par.asString(cur.pos(), end, - AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, - &runparams); + docstring res = from_utf8(latexNamesToUtf8(par.asString(cur.pos(), end, + option, + &runparams), false)); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Stringified text from pos(" << cur.pos() << ") len(" << len << "): " << res); + return res; } else if (cur.inMathed()) { CursorSlice cs = cur.top(); MathData md = cs.cell(); MathData::const_iterator it_end = - (( len == -1 || cs.pos() + len > int(md.size())) - ? md.end() - : md.begin() + cs.pos() + len ); + (( len == -1 || cs.pos() + len > int(md.size())) + ? md.end() + : md.begin() + cs.pos() + len ); MathData md2; - for (MathData::const_iterator it = md.begin() + cs.pos(); - it != it_end; ++it) + for (MathData::const_iterator it = md.begin() + cs.pos(); it != it_end; ++it) md2.push_back(*it); - docstring s = asString(md2); - LYXERR(Debug::FIND, "Stringified math: '" << s << "'"); - return s; + docstring res = from_utf8(latexNamesToUtf8(asString(md2), false)); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Stringified math from pos(" << cur.pos() << ") len(" << len << "): " << res); + return res; } - LYXERR(Debug::FIND, "Don't know how to stringify from here: " << cur); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Don't know how to stringify from here: " << cur); return docstring(); } - /** Computes the LaTeX export of buf starting from cur and ending len positions * after cur, if len is positive, or at the paragraph or innermost inset end * if len is -1. */ docstring latexifyFromCursor(DocIterator const & cur, int len) { - LYXERR(Debug::FIND, "Latexifying with len=" << len << " from cursor at pos: " << cur); - LYXERR(Debug::FIND, " with cur.lastpost=" << cur.lastpos() << ", cur.lastrow=" + /* + LYXERR(Debug::FINDVERBOSE, "Latexifying with len=" << len << " from cursor at pos: " << cur); + LYXERR(Debug::FINDVERBOSE, " with cur.lastpost=" << cur.lastpos() << ", cur.lastrow=" << cur.lastrow() << ", cur.lastcol=" << cur.lastcol()); + */ Buffer const & buf = *cur.buffer(); odocstringstream ods; otexstream os(ods); - OutputParams runparams(&buf.params().encoding()); + //OutputParams runparams(&buf.params().encoding()); + OutputParams runparams(encodings.fromLyXName("utf8")); runparams.nice = false; - runparams.flavor = OutputParams::LATEX; - runparams.linelen = 8000; //lyxrc.plaintext_linelen; - // No side effect of file copying and image conversion - runparams.dryrun = true; - runparams.for_search = true; + setFindParams(runparams); + if (ignoreFormats.getDeleted()) { + runparams.find_set_feature(OutputParams::SearchWithoutDeleted); + } + else { + runparams.find_set_feature(OutputParams::SearchWithDeleted); + } + if (ignoreFormats.getNonContent()) { + runparams.find_add_feature(OutputParams::SearchNonOutput); + } if (cur.inTexted()) { // @TODO what about searching beyond/across paragraph breaks ? @@ -2974,16 +4177,15 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) if (len != -1 && endpos > cur.pos() + len) endpos = cur.pos() + len; TeXOnePar(buf, *cur.innerText(), cur.pit(), os, runparams, - string(), cur.pos(), endpos); - string s = lyx::to_utf8(ods.str()); - LYXERR(Debug::FIND, "Latexified +modified text: '" << s << "'"); - return(lyx::from_utf8(s)); + string(), cur.pos(), endpos, true); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Latexified text from pos(" << cur.pos() << ") len(" << len << "): " << ods.str()); + return(ods.str()); } else if (cur.inMathed()) { // Retrieve the math environment type, and add '$' or '$[' or others (\begin{equation}) accordingly for (int s = cur.depth() - 1; s >= 0; --s) { CursorSlice const & cs = cur[s]; if (cs.asInsetMath() && cs.asInsetMath()->asHullInset()) { - WriteStream ws(os); + TeXMathStream ws(os); cs.asInsetMath()->asHullInset()->header_write(ws); break; } @@ -2992,9 +4194,9 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) CursorSlice const & cs = cur.top(); MathData md = cs.cell(); MathData::const_iterator it_end = - ((len == -1 || cs.pos() + len > int(md.size())) - ? md.end() - : md.begin() + cs.pos() + len); + ((len == -1 || cs.pos() + len > int(md.size())) + ? md.end() + : md.begin() + cs.pos() + len); MathData md2; for (MathData::const_iterator it = md.begin() + cs.pos(); it != it_end; ++it) @@ -3007,215 +4209,319 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) CursorSlice const & cs2 = cur[s]; InsetMath * inset = cs2.asInsetMath(); if (inset && inset->asHullInset()) { - WriteStream ws(os); + TeXMathStream ws(os); inset->asHullInset()->footer_write(ws); break; } } - LYXERR(Debug::FIND, "Latexified math: '" << lyx::to_utf8(ods.str()) << "'"); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Latexified math from pos(" << cur.pos() << ") len(" << len << "): " << ods.str()); } else { - LYXERR(Debug::FIND, "Don't know how to stringify from here: " << cur); + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Don't know how to stringify from here: " << cur); } return ods.str(); } +#if defined(ResultsDebug) +// Debugging output +static void displayMResult(MatchResult &mres, string from, DocIterator & cur) +{ + LYXERR0( "from:\t\t\t" << from); + string status; + if (mres.pos_len > 0) { + // Set in finalize + status = "FINALSEARCH"; + } + else { + if (mres.match_len > 0) { + if ((mres.match_prefix == 0) && (mres.pos == mres.leadsize)) + status = "Good Match"; + else + status = "Matched in"; + } + else + status = "MissedSearch"; + } + + LYXERR0( status << "(" << cur.pos() << " ... " << mres.searched_size + cur.pos() << ") cur.lastpos(" << cur.lastpos() << ")"); + if ((mres.leadsize > 0) || (mres.match_len > 0) || (mres.match2end > 0)) + LYXERR0( "leadsize(" << mres.leadsize << ") match_len(" << mres.match_len << ") match2end(" << mres.match2end << ")"); + if ((mres.pos > 0) || (mres.match_prefix > 0)) + LYXERR0( "pos(" << mres.pos << ") match_prefix(" << mres.match_prefix << ")"); + for (size_t i = 0; i < mres.result.size(); i++) + LYXERR0( "Match " << i << " = \"" << mres.result[i] << "\""); +} +#define displayMres(s, txt, cur) displayMResult(s, txt, cur); +#else +#define displayMres(s, txt, cur) +#endif /** Finalize an advanced find operation, advancing the cursor to the innermost ** position that matches, plus computing the length of the matching text to ** be selected + ** Return the cur.pos() difference between start and end of found match **/ -int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match) +MatchResult findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, MatchResult const & expected = MatchResult(-1)) { // Search the foremost position that matches (avoids find of entire math // inset when match at start of it) - size_t d; DocIterator old_cur(cur.buffer()); - do { - LYXERR(Debug::FIND, "Forwarding one step (searching for innermost match)"); - d = cur.depth(); + MatchResult mres; + static MatchResult fail = MatchResult(); + MatchResult max_match; + // If (prefix_len > 0) means that forwarding 1 position will remove the complete entry + // Happens with e.g. hyperlinks + // either one sees "http://www.bla.bla" or nothing + // so the search for "www" gives prefix_len = 7 (== sizeof("http://") + // and although we search for only 3 chars, we find the whole hyperlink inset + MatchStringAdv::matchType at_begin = (expected.match_prefix == 0) ? MatchStringAdv::MatchFromStart : MatchStringAdv::MatchAnyPlace; + if (!match.opt.forward && match.opt.ignoreformat) { + if (expected.pos > 0) + return fail; + } + LASSERT(at_begin == MatchStringAdv::MatchFromStart, /**/); + if (expected.match_len > 0 && at_begin == MatchStringAdv::MatchFromStart) { + // Search for deepest match old_cur = cur; - cur.forwardPos(); - } while (cur && cur.depth() > d && match(cur).match_len > 0); - cur = old_cur; - int max_match = match(cur).match_len; /* match valid only if not searching whole words */ - if (max_match <= 0) return 0; - LYXERR(Debug::FIND, "Ok"); + max_match = expected; + do { + size_t d = cur.depth(); + cur.forwardPos(); + if (!cur) + break; + if (cur.depth() < d) + break; + if (cur.depth() == d) + break; + size_t lastd = d; + while (cur && cur.depth() > lastd) { + lastd = cur.depth(); + mres = match(cur, -1, at_begin); + displayMres(mres, "Checking innermost", cur); + if (mres.match_len > 0) + break; + // maybe deeper? + cur.forwardPos(); + } + if (mres.match_len < expected.match_len) + break; + max_match = mres; + old_cur = cur;; + } while(1); + cur = old_cur; + } + else { + // (expected.match_len <= 0) + mres = match(cur, -1, MatchStringAdv::MatchFromStart); /* match valid only if not searching whole words */ + displayMres(mres, "Start with negative match", cur); + max_match = mres; + } + // Only now we are really at_begin + if ((max_match.match_len <= 0) || + (match.opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed())) + return fail; + LYXERR(Debug::FINDVERBOSE, "Ok"); // Compute the match length - int len = 1; + int len = 1; if (cur.pos() + len > cur.lastpos()) - return 0; - if (match.opt.matchword) { - LYXERR(Debug::FIND, "verifying unmatch with len = " << len); - while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) { - ++len; - LYXERR(Debug::FIND, "verifying unmatch with len = " << len); - } - // Length of matched text (different from len param) - int old_match = match(cur, len).match_len; - if (old_match < 0) - old_match = 0; - int new_match; - // Greedy behaviour while matching regexps - while ((new_match = match(cur, len + 1).match_len) > old_match) { - ++len; - old_match = new_match; - LYXERR(Debug::FIND, "verifying match with len = " << len); - } - if (old_match == 0) - len = 0; - } - else { - int minl = 1; - int maxl = cur.lastpos() - cur.pos(); - // Greedy behaviour while matching regexps - while (maxl > minl) { - int actual_match = match(cur, len).match_len; - if (actual_match >= max_match) { - // actual_match > max_match _can_ happen, - // if the search area splits - // some following word so that the regex - // (e.g. 'r.*r\b' matches 'r' from the middle of the - // splitted word) - // This means, the len value is too big - maxl = len; - len = (int)((maxl + minl)/2); - } - else { - // (actual_match < max_match) - minl = len + 1; - len = (int)((maxl + minl)/2); - } - } - old_cur = cur; - // Search for real start of matched characters - while (len > 1) { - int actual_match; - do { - cur.forwardPos(); - } while (cur.depth() > old_cur.depth()); /* Skip inner insets */ - if (cur.depth() < old_cur.depth()) { - // Outer inset? - LYXERR0("cur.depth() < old_cur.depth(), this should never happen"); - break; - } - if (cur.pos() != old_cur.pos()) { - // OK, forwarded 1 pos in actual inset - actual_match = match(cur, len-1).match_len; - if (actual_match == max_match) { - // Ha, got it! The shorter selection has the same match length - len--; - old_cur = cur; - } - else { - // OK, the shorter selection matches less chars, revert to previous value - cur = old_cur; - break; - } - } - else { - LYXERR0("cur.pos() == old_cur.pos(), this should never happen"); - actual_match = match(cur, len).match_len; - if (actual_match == max_match) - old_cur = cur; - } - } - } - return len; -} + return fail; + LASSERT(match.use_regexp, /**/); + { + int minl = 1; + int maxl = cur.lastpos() - cur.pos(); + // Greedy behaviour while matching regexps + while (maxl > minl) { + MatchResult mres2; + mres2 = match(cur, len, at_begin); + displayMres(mres2, "Finalize loop", cur); + int actual_match_len = mres2.match_len; + if (actual_match_len >= max_match.match_len) { + // actual_match_len > max_match _can_ happen, + // if the search area splits + // some following word so that the regex + // (e.g. 'r.*r\b' matches 'r' from the middle of the + // splitted word) + // This means, the len value is too big + actual_match_len = max_match.match_len; + max_match = mres2; + max_match.match_len = actual_match_len; + maxl = len; + if (maxl - minl < 4) + len = (maxl + minl)/2; + else + len = minl + (maxl - minl + 3)/4; + } + else { + // (actual_match_len < max_match.match_len) + minl = len + 1; + len = (maxl + minl)/2; + } + } + len = minl; + old_cur = cur; + // Search for real start of matched characters + while (len > 1) { + MatchResult actual_match; + do { + cur.forwardPos(); + } while (cur.depth() > old_cur.depth()); /* Skip inner insets */ + if (cur.depth() < old_cur.depth()) { + // Outer inset? + LYXERR(Debug::INFO, "cur.depth() < old_cur.depth(), this should never happen"); + break; + } + if (cur.pos() != old_cur.pos()) { + // OK, forwarded 1 pos in actual inset + actual_match = match(cur, len-1, at_begin); + if (actual_match.match_len == max_match.match_len) { + // Ha, got it! The shorter selection has the same match length + len--; + old_cur = cur; + max_match = actual_match; + } + else { + // OK, the shorter selection matches less chars, revert to previous value + cur = old_cur; + break; + } + } + else { + LYXERR(Debug::INFO, "cur.pos() == old_cur.pos(), this should never happen"); + actual_match = match(cur, len, at_begin); + if (actual_match.match_len == max_match.match_len) { + old_cur = cur; + max_match = actual_match; + } + } + } + if (len == 0) + return fail; + else { + max_match.pos_len = len; + displayMres(max_match, "SEARCH RESULT", cur) + return max_match; + } + } +} /// Finds forward int findForwardAdv(DocIterator & cur, MatchStringAdv & match) { if (!cur) return 0; + int repeat = 0; + DocIterator orig_cur; // to be used if repeat not successful + MatchResult orig_mres; + do { + orig_cur = cur; + cur.forwardPos(); + } while (cur.depth() > orig_cur.depth()); + cur = orig_cur; while (!theApp()->longOperationCancelled() && cur) { - LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur); - MatchResult mres = match(cur, -1, false); - int match_len = mres.match_len; - LYXERR(Debug::FIND, "match_len: " << match_len); + //(void) findAdvForwardInnermost(cur); + LYXERR(Debug::FINDVERBOSE, "findForwardAdv() cur: " << cur); + MatchResult mres = match(cur, -1, MatchStringAdv::MatchAnyPlace); + string msg = "Starting"; + if (repeat > 0) + msg = "Repeated"; + displayMres(mres, msg + " findForwardAdv", cur) + int match_len = mres.match_len; if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) { - LYXERR0("BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end); + LYXERR(Debug::INFO, "BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end); match_len = 0; } - if (match_len > 0) { + if (match_len <= 0) { + if (repeat > 0) { + repeat--; + } + else { + // This should exit nested insets, if any, or otherwise undefine the currsor. + cur.pos() = cur.lastpos(); + } + LYXERR(Debug::FINDVERBOSE, "Advancing pos: cur=" << cur); + cur.forwardPos(); + } + else { // match_len > 0 // Try to find the begin of searched string - int increment = mres.pos/2; - while (mres.pos > 5 && (increment > 5)) { + int increment; + int firstInvalid = cur.lastpos() - cur.pos(); + { + int incrmatch = (mres.match_prefix + mres.pos - mres.leadsize + 1)*3/4; + int incrcur = (firstInvalid + 1 )*3/4; + if (incrcur < incrmatch) + increment = incrcur; + else + increment = incrmatch; + if (increment < 1) + increment = 1; + } + LYXERR(Debug::FINDVERBOSE, "Set increment to " << increment); + while (increment > 0) { DocIterator old_cur = cur; - for (int i = 0; i < increment && cur; cur.forwardPos(), i++) { - } - if (! cur) { - cur = old_cur; + if (cur.pos() + increment >= cur.lastpos()) { increment /= 2; + continue; } - else { - MatchResult mres2 = match(cur, -1, false); - if ((mres2.match2end < mres.match2end) || - (mres2.match_len < mres.match_len)) { + cur.pos() = cur.pos() + increment; + MatchResult mres2 = match(cur, -1, MatchStringAdv::MatchAnyPlace); + displayMres(mres2, "findForwardAdv loop", cur) + switch (interpretMatch(mres, mres2)) { + case MatchResult::newIsTooFar: + // behind the expected match + firstInvalid = increment; cur = old_cur; increment /= 2; - } - else { + break; + case MatchResult::newIsBetter: + // not reached yet, but cur.pos()+increment is better mres = mres2; - increment -= 2; - if (increment > mres.pos/2) - increment = mres.pos/2; - } - } - } - int match_len_zero_count = 0; - for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) { - if (i++ > 10) { - int remaining_len = match(cur, -1, false).match_len; - if (remaining_len <= 0) { - // Apparently the searched string is not in the remaining part + firstInvalid -= increment; + if (increment > firstInvalid*3/4) + increment = firstInvalid*3/4; + if ((mres2.pos == mres2.leadsize) && (increment >= mres2.match_prefix)) { + if (increment >= mres2.match_prefix) + increment = (mres2.match_prefix+1)*3/4; + } + break; + default: + // Todo@ + // Handle not like MatchResult::newIsTooFar + LYXERR(Debug::FINDVERBOSE, "Probably too far: Increment = " << increment << " match_prefix = " << mres.match_prefix); + firstInvalid--; + increment = increment*3/4; + cur = old_cur; break; - } - else { - i = 0; - } } - LYXERR(Debug::FIND, "Advancing cur: " << cur); - int match_len3 = match(cur, 1).match_len; - if (match_len3 < 0) + } + if (mres.match_len > 0) { + if (mres.match_prefix + mres.pos - mres.leadsize > 0) { + // The match seems to indicate some deeper level + repeat = 2; + orig_cur = cur; + orig_mres = mres; + cur.forwardPos(); continue; - int match_len2 = match(cur).match_len; - LYXERR(Debug::FIND, "match_len2: " << match_len2); - if (match_len2 > 0) { - // Sometimes in finalize we understand it wasn't a match - // and we need to continue the outest loop - int len = findAdvFinalize(cur, match); - if (len > 0) { - return len; - } - } - if (match_len2 >= 0) { - if (match_len2 == 0) - match_len_zero_count++; - else - match_len_zero_count = 0; - } - else { - if (++match_len_zero_count > 3) { - LYXERR(Debug::FIND, "match_len2_zero_count: " << match_len_zero_count << ", match_len was " << match_len); - match_len_zero_count = 0; - } - break; } } - if (!cur) - return 0; - } - if (match_len >= 0 && cur.pit() < cur.lastpit()) { - LYXERR(Debug::FIND, "Advancing par: cur=" << cur); - cur.forwardPar(); - } else { - // This should exit nested insets, if any, or otherwise undefine the currsor. - cur.pos() = cur.lastpos(); - LYXERR(Debug::FIND, "Advancing pos: cur=" << cur); - cur.forwardPos(); + else if (repeat > 0) { + // should never be reached. + cur = orig_cur; + mres = orig_mres; + } + // LYXERR0("Leaving first loop"); + LYXERR(Debug::FINDVERBOSE, "Finalizing 1"); + MatchResult found_match = findAdvFinalize(cur, match, mres); + if (found_match.match_len > 0) { + match.FillResults(found_match); + return found_match.pos_len; + } + else { + // try next possible match + cur.forwardPos(); + repeat = false; + continue; + } } } return 0; @@ -3223,25 +4529,27 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match) /// Find the most backward consecutive match within same paragraph while searching backwards. -int findMostBackwards(DocIterator & cur, MatchStringAdv const & match) +MatchResult findMostBackwards(DocIterator & cur, MatchStringAdv const & match, MatchResult &expected) { - DocIterator cur_begin = doc_iterator_begin(cur.buffer()); + DocIterator cur_begin = cur; + cur_begin.pos() = 0; DocIterator tmp_cur = cur; - int len = findAdvFinalize(tmp_cur, match); + MatchResult mr = findAdvFinalize(tmp_cur, match, expected); Inset & inset = cur.inset(); for (; cur != cur_begin; cur.backwardPos()) { - LYXERR(Debug::FIND, "findMostBackwards(): cur=" << cur); + LYXERR(Debug::FINDVERBOSE, "findMostBackwards(): cur=" << cur); DocIterator new_cur = cur; new_cur.backwardPos(); - if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len) + if (new_cur == cur || &new_cur.inset() != &inset + || match(new_cur, -1, MatchStringAdv::MatchFromStart).match_len <= 0) break; - int new_len = findAdvFinalize(new_cur, match); - if (new_len == len) + MatchResult new_mr = findAdvFinalize(new_cur, match, expected); + if (new_mr.match_len == mr.match_len) break; - len = new_len; + mr = new_mr; } - LYXERR(Debug::FIND, "findMostBackwards(): exiting with cur=" << cur); - return len; + LYXERR(Debug::FINDVERBOSE, "findMostBackwards(): exiting with cur=" << cur); + return mr; } @@ -3259,21 +4567,26 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) bool pit_changed = false; do { cur.pos() = 0; - bool found_match = (match(cur, -1, false).match_len > 0); + MatchResult found_match = match(cur, -1, MatchStringAdv::MatchAnyPlace); - if (found_match) { + if (found_match.match_len > 0) { if (pit_changed) cur.pos() = cur.lastpos(); else cur.pos() = cur_orig.pos(); - LYXERR(Debug::FIND, "findBackAdv2: cur: " << cur); + LYXERR(Debug::FINDVERBOSE, "findBackAdv2: cur: " << cur); DocIterator cur_prev_iter; do { - found_match = (match(cur).match_len > 0); - LYXERR(Debug::FIND, "findBackAdv3: found_match=" - << found_match << ", cur: " << cur); - if (found_match) - return findMostBackwards(cur, match); + found_match = match(cur, -1, MatchStringAdv::MatchFromStart); + LYXERR(Debug::FINDVERBOSE, "findBackAdv3: found_match=" + << (found_match.match_len > 0) << ", cur: " << cur); + if (found_match.match_len > 0) { + MatchResult found_mr = findMostBackwards(cur, match, found_match); + if (found_mr.pos_len > 0) { + match.FillResults(found_mr); + return found_mr.pos_len; + } + } // Stop if begin of document reached if (cur == cur_begin) @@ -3301,7 +4614,7 @@ docstring stringifyFromForSearch(FindAndReplaceOptions const & opt, DocIterator const & cur, int len) { if (cur.pos() < 0 || cur.pos() > cur.lastpos()) - return docstring(); + return docstring(); if (!opt.ignoreformat) return latexifyFromCursor(cur, len); else @@ -3310,13 +4623,13 @@ docstring stringifyFromForSearch(FindAndReplaceOptions const & opt, FindAndReplaceOptions::FindAndReplaceOptions( - docstring const & find_buf_name, bool casesensitive, - bool matchword, bool forward, bool expandmacros, bool ignoreformat, - docstring const & repl_buf_name, bool keep_case, - SearchScope scope, SearchRestriction restr) - : find_buf_name(find_buf_name), casesensitive(casesensitive), matchword(matchword), - forward(forward), expandmacros(expandmacros), ignoreformat(ignoreformat), - repl_buf_name(repl_buf_name), keep_case(keep_case), scope(scope), restr(restr) + docstring const & _find_buf_name, bool _casesensitive, + bool _matchword, bool _forward, bool _expandmacros, bool _ignoreformat, + docstring const & _repl_buf_name, bool _keep_case, + SearchScope _scope, SearchRestriction _restr, bool _replace_all) + : find_buf_name(_find_buf_name), casesensitive(_casesensitive), matchword(_matchword), + forward(_forward), expandmacros(_expandmacros), ignoreformat(_ignoreformat), + repl_buf_name(_repl_buf_name), keep_case(_keep_case), scope(_scope), restr(_restr), replace_all(_replace_all) { } @@ -3330,7 +4643,7 @@ static bool allNonLowercase(Cursor const & cur, int len) pos_type beg_pos = cur.selectionBegin().pos(); pos_type end_pos = cur.selectionBegin().pos() + len; if (len > cur.lastpos() + 1 - beg_pos) { - LYXERR(Debug::FIND, "This should not happen, more debug needed"); + LYXERR(Debug::FINDVERBOSE, "This should not happen, more debug needed"); len = cur.lastpos() + 1 - beg_pos; end_pos = beg_pos + len; } @@ -3347,13 +4660,13 @@ static bool firstUppercase(Cursor const & cur) char_type ch1, ch2; pos_type pos = cur.selectionBegin().pos(); if (pos >= cur.lastpos() - 1) { - LYXERR(Debug::FIND, "No upper-case at cur: " << cur); + LYXERR(Debug::FINDVERBOSE, "No upper-case at cur: " << cur); return false; } ch1 = cur.paragraph().getChar(pos); ch2 = cur.paragraph().getChar(pos + 1); bool result = isUpperCase(ch1) && isLowerCase(ch2); - LYXERR(Debug::FIND, "firstUppercase(): " + LYXERR(Debug::FINDVERBOSE, "firstUppercase(): " << "ch1=" << ch1 << "(" << char(ch1) << "), ch2=" << ch2 << "(" << char(ch2) << ")" << ", result=" << result << ", cur=" << cur); @@ -3368,50 +4681,80 @@ static bool firstUppercase(Cursor const & cur) static void changeFirstCase(Buffer & buffer, TextCase first_case, TextCase others_case) { ParagraphList::iterator pit = buffer.paragraphs().begin(); - LASSERT(pit->size() >= 1, /**/); + LASSERT(!pit->empty(), /**/); pos_type right = pos_type(1); pit->changeCase(buffer.params(), pos_type(0), right, first_case); right = pit->size(); pit->changeCase(buffer.params(), pos_type(1), right, others_case); } - } // namespace +static bool replaceMatches(string &t, int maxmatchnum, vector const & replacements) +{ + // Should replace the string "$" + std::to_string(matchnum) with replacement + // if the char '$' is not prefixed with odd number of char '\\' + static regex const rematch("(\\\\)*(\\$\\$([0-9]))"); + string s; + size_t lastpos = 0; + smatch sub; + for (sregex_iterator it(t.begin(), t.end(), rematch), end; it != end; ++it) { + sub = *it; + if ((sub.position(2) - sub.position(0)) % 2 == 1) + continue; + int num = stoi(sub.str(3), nullptr, 10); + if (num >= maxmatchnum) + continue; + if (lastpos < (size_t) sub.position(2)) + s += t.substr(lastpos, sub.position(2) - lastpos); + s += replacements[num]; + lastpos = sub.position(2) + sub.length(2); + } + if (lastpos == 0) + return false; + else if (lastpos < t.length()) + s += t.substr(lastpos, t.length() - lastpos); + t = s; + return true; +} + /// -static void findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv) +static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv) { Cursor & cur = bv->cursor(); - if (opt.repl_buf_name == docstring() - || theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true) == 0 - || theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true) == 0) - return; + if (opt.repl_buf_name.empty() + || theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true) == 0 + || theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true) == 0) + return 0; DocIterator sel_beg = cur.selectionBegin(); DocIterator sel_end = cur.selectionEnd(); if (&sel_beg.inset() != &sel_end.inset() - || sel_beg.pit() != sel_end.pit() - || sel_beg.idx() != sel_end.idx()) - return; + || sel_beg.pit() != sel_end.pit() + || sel_beg.idx() != sel_end.idx()) + return 0; int sel_len = sel_end.pos() - sel_beg.pos(); - LYXERR(Debug::FIND, "sel_beg: " << sel_beg << ", sel_end: " << sel_end + LYXERR(Debug::FINDVERBOSE, "sel_beg: " << sel_beg << ", sel_end: " << sel_end << ", sel_len: " << sel_len << endl); if (sel_len == 0) - return; - LASSERT(sel_len > 0, return); + return 0; + LASSERT(sel_len > 0, return 0); - if (!matchAdv(sel_beg, sel_len).match_len) - return; + if (matchAdv(sel_beg, sel_len, MatchStringAdv::MatchFromStart).match_len <= 0) + return 0; // Build a copy of the replace buffer, adapted to the KeepCase option - Buffer & repl_buffer_orig = *theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true); + Buffer const & repl_buffer_orig = *theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true); ostringstream oss; repl_buffer_orig.write(oss); string lyx = oss.str(); - Buffer repl_buffer("", false); + if (matchAdv.valid_matches > 0) + replaceMatches(lyx, matchAdv.valid_matches, matchAdv.matches); + Buffer repl_buffer(string(), false); + repl_buffer.setInternal(true); repl_buffer.setUnnamed(true); - LASSERT(repl_buffer.readString(lyx), return); + LASSERT(repl_buffer.readString(lyx), return 0); if (opt.keep_case && sel_len >= 2) { - LYXERR(Debug::FIND, "keep_case true: cur.pos()=" << cur.pos() << ", sel_len=" << sel_len); + LYXERR(Debug::FINDVERBOSE, "keep_case true: cur.pos()=" << cur.pos() << ", sel_len=" << sel_len); if (cur.inTexted()) { if (firstUppercase(cur)) changeFirstCase(repl_buffer, text_uppercase, text_lowercase); @@ -3422,53 +4765,58 @@ static void findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, M cap::cutSelection(cur, false); if (cur.inTexted()) { repl_buffer.changeLanguage( - repl_buffer.language(), - cur.getFont().language()); - LYXERR(Debug::FIND, "Replacing by pasteParagraphList()ing repl_buffer"); - LYXERR(Debug::FIND, "Before pasteParagraphList() cur=" << cur << endl); + repl_buffer.language(), + cur.getFont().language()); + LYXERR(Debug::FINDVERBOSE, "Replacing by pasteParagraphList()ing repl_buffer"); + LYXERR(Debug::FINDVERBOSE, "Before pasteParagraphList() cur=" << cur << endl); cap::pasteParagraphList(cur, repl_buffer.paragraphs(), repl_buffer.params().documentClassPtr(), + repl_buffer.params().authors(), bv->buffer().errorList("Paste")); - LYXERR(Debug::FIND, "After pasteParagraphList() cur=" << cur << endl); + LYXERR(Debug::FINDVERBOSE, "After pasteParagraphList() cur=" << cur << endl); sel_len = repl_buffer.paragraphs().begin()->size(); } else if (cur.inMathed()) { odocstringstream ods; otexstream os(ods); - OutputParams runparams(&repl_buffer.params().encoding()); + // OutputParams runparams(&repl_buffer.params().encoding()); + OutputParams runparams(encodings.fromLyXName("utf8")); runparams.nice = false; - runparams.flavor = OutputParams::LATEX; - runparams.linelen = 8000; //lyxrc.plaintext_linelen; - runparams.dryrun = true; - TeXOnePar(repl_buffer, repl_buffer.text(), 0, os, runparams); + setFindParams(runparams); + TeXOnePar(repl_buffer, repl_buffer.text(), 0, os, runparams, string(), -1, -1, true); //repl_buffer.getSourceCode(ods, 0, repl_buffer.paragraphs().size(), false); docstring repl_latex = ods.str(); - LYXERR(Debug::FIND, "Latexified replace_buffer: '" << repl_latex << "'"); + LYXERR(Debug::FINDVERBOSE, "Latexified replace_buffer: '" << repl_latex << "'"); string s; (void)regex_replace(to_utf8(repl_latex), s, "\\$(.*)\\$", "$1"); (void)regex_replace(s, s, "\\\\\\[(.*)\\\\\\]", "$1"); repl_latex = from_utf8(s); - LYXERR(Debug::FIND, "Replacing by insert()ing latex: '" << repl_latex << "' cur=" << cur << " with depth=" << cur.depth()); + LYXERR(Debug::FINDVERBOSE, "Replacing by insert()ing latex: '" << repl_latex << "' cur=" << cur << " with depth=" << cur.depth()); MathData ar(cur.buffer()); asArray(repl_latex, ar, Parse::NORMAL); cur.insert(ar); sel_len = ar.size(); - LYXERR(Debug::FIND, "After insert() cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len); + LYXERR(Debug::FINDVERBOSE, "After insert() cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len); } if (cur.pos() >= sel_len) cur.pos() -= sel_len; else cur.pos() = 0; - LYXERR(Debug::FIND, "After pos adj cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len); + LYXERR(Debug::FINDVERBOSE, "After pos adj cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len); bv->putSelectionAt(DocIterator(cur), sel_len, !opt.forward); bv->processUpdateFlags(Update::Force); + return 1; } +static bool isWordChar(char_type c) +{ + return isLetterChar(c) || isNumberChar(c); +} /// Perform a FindAdv operation. -bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) +bool findAdv(BufferView * bv, FindAndReplaceOptions & opt) { DocIterator cur; - int match_len = 0; + int pos_len = 0; // e.g., when invoking word-findadv from mini-buffer wither with // wrong options syntax or before ever opening advanced F&R pane @@ -3477,30 +4825,104 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) try { MatchStringAdv matchAdv(bv->buffer(), opt); +#if QTSEARCH + if (!matchAdv.regexIsValid) { + bv->message(lyx::from_utf8(matchAdv.regexError)); + return(false); + } +#endif int length = bv->cursor().selectionEnd().pos() - bv->cursor().selectionBegin().pos(); if (length > 0) bv->putSelectionAt(bv->cursor().selectionBegin(), length, !opt.forward); - findAdvReplace(bv, opt, matchAdv); + num_replaced += findAdvReplace(bv, opt, matchAdv); cur = bv->cursor(); - if (opt.forward) - match_len = findForwardAdv(cur, matchAdv); + if (opt.forward) { + if (opt.matchword && cur.pos() > 0) { // Skip word-characters if we are in the mid of a word + if (cur.inTexted()) { + Paragraph const & par = cur.paragraph(); + int len_limit, new_pos; + if (cur.lastpos() < par.size()) + len_limit = cur.lastpos(); + else + len_limit = par.size(); + for (new_pos = cur.pos() - 1; new_pos < len_limit; new_pos++) { + if (!isWordChar(par.getChar(new_pos))) + break; + } + if (new_pos > cur.pos()) + cur.pos() = new_pos; + } + else if (cur.inMathed()) { + // Check if 'cur.pos()-1' and 'cur.pos()' both point to a letter, + // I am not sure, we should consider the selection + bool sel = bv->cursor().selection(); + if (!sel && cur.pos() < cur.lastpos()) { + CursorSlice const & cs = cur.top(); + MathData md = cs.cell(); + int len = -1; + MathData::const_iterator it_end = md.end(); + MathData md2; + // Start the check with one character before actual cursor position + for (MathData::const_iterator it = md.begin() + cs.pos() - 1; + it != it_end; ++it) + md2.push_back(*it); + docstring inp = asString(md2); + for (len = 0; (unsigned) len < inp.size() && len + cur.pos() <= cur.lastpos(); len++) { + if (!isWordChar(inp[len])) + break; + } + // len == 0 means previous char was a word separator + // len == 1 search starts with a word separator + // len == 2 ... we have to skip len -1 chars + if (len > 1) + cur.pos() = cur.pos() + len - 1; + } + } + opt.matchword = false; + } + pos_len = findForwardAdv(cur, matchAdv); + } else - match_len = findBackwardsAdv(cur, matchAdv); - } catch (...) { - // This may only be raised by lyx::regex() - bv->message(_("Invalid regular expression!")); + pos_len = findBackwardsAdv(cur, matchAdv); + } catch (exception & ex) { + bv->message(from_utf8(ex.what())); return false; } - if (match_len == 0) { - bv->message(_("Match not found!")); + if (pos_len == 0) { + if (num_replaced > 0) { + switch (num_replaced) + { + case 1: + bv->message(_("One match has been replaced.")); + break; + case 2: + bv->message(_("Two matches have been replaced.")); + break; + default: + bv->message(bformat(_("%1$d matches have been replaced."), num_replaced)); + break; + } + num_replaced = 0; + } + else { + bv->message(_("Match not found.")); + } return false; } - bv->message(_("Match found!")); + if (num_replaced > 0) + bv->message(_("Match has been replaced.")); + else + bv->message(_("Match found.")); - LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << match_len); - bv->putSelectionAt(cur, match_len, !opt.forward); + if (cur.pos() + pos_len > cur.lastpos()) { + // Prevent crash in bv->putSelectionAt() + // Should never happen, maybe LASSERT() here? + pos_len = cur.lastpos() - cur.pos(); + } + LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Putting selection at cur=" << cur << " with len: " << pos_len); + bv->putSelectionAt(cur, pos_len, !opt.forward); return true; } @@ -3514,12 +4936,13 @@ ostringstream & operator<<(ostringstream & os, FindAndReplaceOptions const & opt << opt.forward << ' ' << opt.expandmacros << ' ' << opt.ignoreformat << ' ' + << opt.replace_all << ' ' << to_utf8(opt.repl_buf_name) << "\nEOSS\n" << opt.keep_case << ' ' << int(opt.scope) << ' ' << int(opt.restr); - LYXERR(Debug::FIND, "built: " << os.str()); + LYXERR(Debug::FINDVERBOSE, "built: " << os.str()); return os; } @@ -3527,7 +4950,7 @@ ostringstream & operator<<(ostringstream & os, FindAndReplaceOptions const & opt istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) { - LYXERR(Debug::FIND, "parsing"); + // LYXERR(Debug::FINDVERBOSE, "parsing"); string s; string line; getline(is, line); @@ -3539,9 +4962,9 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) break; getline(is, line); } - LYXERR(Debug::FIND, "file_buf_name: '" << s << "'"); + // LYXERR(Debug::FINDVERBOSE, "file_buf_name: '" << s << "'"); opt.find_buf_name = from_utf8(s); - is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat; + is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat >> opt.replace_all; is.get(); // Waste space before replace string s = ""; getline(is, line); @@ -3553,7 +4976,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) break; getline(is, line); } - LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'"); + // LYXERR(Debug::FINDVERBOSE, "repl_buf_name: '" << s << "'"); opt.repl_buf_name = from_utf8(s); is >> opt.keep_case; int i; @@ -3562,9 +4985,11 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) is >> i; opt.restr = FindAndReplaceOptions::SearchRestriction(i); - LYXERR(Debug::FIND, "parsed: " << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' ' + /* + LYXERR(Debug::FINDVERBOSE, "parsed: " << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' ' << opt.expandmacros << ' ' << opt.ignoreformat << ' ' << opt.keep_case << ' ' << opt.scope << ' ' << opt.restr); + */ return is; }