X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Flyxfind.cpp;h=93e454fcf10d058a9c86a15a97485cb2a8f1b4e7;hb=3a1b19c5c363428f424180270e32bc8b468ea54f;hp=d4085d04cddbb66c811e768236ffd3a062768337;hpb=e55244ccd8432c1e2caf2ab381a442c71cb0313c;p=lyx.git diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp index d4085d04cd..93e454fcf1 100644 --- a/src/lyxfind.cpp +++ b/src/lyxfind.cpp @@ -8,6 +8,7 @@ * \author Jürgen Vigna * \author Alfredo Braunstein * \author Tommaso Cucinotta + * \author Kornel Benko * * Full author contact details are available in file CREDITS. */ @@ -17,7 +18,6 @@ #include "lyxfind.h" #include "Buffer.h" -#include "buffer_funcs.h" #include "BufferList.h" #include "BufferParams.h" #include "BufferView.h" @@ -29,97 +29,111 @@ #include "output_latex.h" #include "OutputParams.h" #include "Paragraph.h" -#include "ParIterator.h" -#include "TexRow.h" #include "Text.h" #include "Encoding.h" +#include "Language.h" #include "frontends/Application.h" #include "frontends/alert.h" #include "mathed/InsetMath.h" -#include "mathed/InsetMathGrid.h" #include "mathed/InsetMathHull.h" #include "mathed/MathData.h" #include "mathed/MathStream.h" #include "mathed/MathSupport.h" -#include "support/convert.h" #include "support/debug.h" #include "support/docstream.h" #include "support/FileName.h" #include "support/gettext.h" #include "support/lassert.h" #include "support/lstrings.h" - -#include "support/regex.h" #include "support/textutils.h" -#include + +#include +#include + +//#define ResultsDebug +#define USE_QT_FOR_SEARCH +#if defined(USE_QT_FOR_SEARCH) + #include // sets QT_VERSION + #if (QT_VERSION >= 0x050000) + #include + #define QTSEARCH 1 + #else + #define QTSEARCH 0 + #endif +#else + #define QTSEARCH 0 +#endif using namespace std; using namespace lyx::support; namespace lyx { +typedef unordered_map AccentsMap; +typedef unordered_map::const_iterator AccentsIterator; +static AccentsMap accents = unordered_map(); // Helper class for deciding what should be ignored class IgnoreFormats { public: /// - IgnoreFormats() - : ignoreFamily_(false), ignoreSeries_(false), - ignoreShape_(false), ignoreUnderline_(false), - ignoreMarkUp_(false), ignoreStrikeOut_(false), - ignoreSectioning_(false), ignoreFrontMatter_(false), - ignoreColor_(false), ignoreLanguage_(false) {} + IgnoreFormats() = default; /// - bool getFamily() { return ignoreFamily_; }; + bool getFamily() const { return ignoreFamily_; } /// - bool getSeries() { return ignoreSeries_; }; + bool getSeries() const { return ignoreSeries_; } /// - bool getShape() { return ignoreShape_; }; + bool getShape() const { return ignoreShape_; } /// - bool getUnderline() { return ignoreUnderline_; }; + bool getUnderline() const { return ignoreUnderline_; } /// - bool getMarkUp() { return ignoreMarkUp_; }; + bool getMarkUp() const { return ignoreMarkUp_; } /// - bool getStrikeOut() { return ignoreStrikeOut_; }; + bool getStrikeOut() const { return ignoreStrikeOut_; } /// - bool getSectioning() { return ignoreSectioning_; }; + bool getSectioning() const { return ignoreSectioning_; } /// - bool getFrontMatter() { return ignoreFrontMatter_; }; + bool getFrontMatter() const { return ignoreFrontMatter_; } /// - bool getColor() { return ignoreColor_; }; + bool getColor() const { return ignoreColor_; } /// - bool getLanguage() { return ignoreLanguage_; }; + bool getLanguage() const { return ignoreLanguage_; } /// - void setIgnoreFormat(string type, bool value); + bool getDeleted() const { return ignoreDeleted_; } + /// + void setIgnoreDeleted(bool value); + /// + void setIgnoreFormat(string const & type, bool value); private: /// - bool ignoreFamily_; + bool ignoreFamily_ = false; + /// + bool ignoreSeries_ = false; /// - bool ignoreSeries_; + bool ignoreShape_ = false; /// - bool ignoreShape_; + bool ignoreUnderline_ = false; /// - bool ignoreUnderline_; + bool ignoreMarkUp_ = false; /// - bool ignoreMarkUp_; + bool ignoreStrikeOut_ = false; /// - bool ignoreStrikeOut_; + bool ignoreSectioning_ = false; /// - bool ignoreSectioning_; + bool ignoreFrontMatter_ = false; /// - bool ignoreFrontMatter_; + bool ignoreColor_ = false; /// - bool ignoreColor_; + bool ignoreLanguage_ = false; /// - bool ignoreLanguage_; + bool ignoreDeleted_ = true; }; - -void IgnoreFormats::setIgnoreFormat(string type, bool value) +void IgnoreFormats::setIgnoreFormat(string const & type, bool value) { if (type == "color") { ignoreColor_ = value; @@ -154,13 +168,16 @@ void IgnoreFormats::setIgnoreFormat(string type, bool value) else if (type == "strike") { ignoreStrikeOut_ = value; } + else if (type == "deleted") { + ignoreDeleted_ = value; + } } // The global variable that can be changed from outside IgnoreFormats ignoreFormats; -void setIgnoreFormat(string type, bool value) +void setIgnoreFormat(string const & type, bool value) { ignoreFormats.setIgnoreFormat(type, value); } @@ -178,11 +195,11 @@ bool parse_bool(docstring & howto) } -class MatchString : public binary_function +class MatchString { public: - MatchString(docstring const & str, bool cs, bool mw) - : str(str), case_sens(cs), whole_words(mw) + MatchString(docstring const & s, bool cs, bool mw) + : str(s), case_sens(cs), whole_words(mw) {} // returns true if the specified string is at the specified position @@ -357,11 +374,10 @@ pair replaceOne(BufferView * bv, docstring searchstr, bool whole, bool forward, bool findnext) { Cursor & cur = bv->cursor(); - bool found = false; if (!cur.selection()) { // no selection, non-empty search string: find it if (!searchstr.empty()) { - found = findOne(bv, searchstr, case_sens, whole, forward, true, findnext); + bool const found = findOne(bv, searchstr, case_sens, whole, forward, true, findnext); return make_pair(found, 0); } // empty search string @@ -372,7 +388,7 @@ pair replaceOne(BufferView * bv, docstring searchstr, // This causes a minor bug as undo will restore this selection, // which the user did not create (#8986). cur.innerText()->selectWord(cur, WHOLE_WORD); - searchstr = cur.selectionAsString(false); + searchstr = cur.selectionAsString(false, true); } // if we still don't have a search string, report the error @@ -381,7 +397,7 @@ pair replaceOne(BufferView * bv, docstring searchstr, return make_pair(false, 0); bool have_selection = cur.selection(); - docstring const selected = cur.selectionAsString(false); + docstring const selected = cur.selectionAsString(false, true); bool match = case_sens ? searchstr == selected @@ -390,7 +406,7 @@ pair replaceOne(BufferView * bv, docstring searchstr, // no selection or current selection is not search word: // just find the search word if (!have_selection || !match) { - found = findOne(bv, searchstr, case_sens, whole, forward, true, findnext); + bool const found = findOne(bv, searchstr, case_sens, whole, forward, true, findnext); return make_pair(found, 0); } @@ -460,12 +476,11 @@ bool lyxfind(BufferView * bv, FuncRequest const & ev) bool matchword = parse_bool(howto); bool forward = parse_bool(howto); - return findOne(bv, search, casesensitive, matchword, forward, true, true); + return findOne(bv, search, casesensitive, matchword, forward, false, true); } -bool lyxreplace(BufferView * bv, - FuncRequest const & ev, bool has_deleted) +bool lyxreplace(BufferView * bv, FuncRequest const & ev) { if (!bv || ev.action() != LFUN_WORD_REPLACE) return false; @@ -487,40 +502,31 @@ bool lyxreplace(BufferView * bv, bool update = false; - if (!has_deleted) { - int replace_count = 0; - if (all) { - replace_count = replaceAll(bv, search, rplc, casesensitive, matchword); - update = replace_count > 0; - } else { - pair rv = - replaceOne(bv, search, rplc, casesensitive, matchword, forward, findnext); - update = rv.first; - replace_count = rv.second; - } + int replace_count = 0; + if (all) { + replace_count = replaceAll(bv, search, rplc, casesensitive, matchword); + update = replace_count > 0; + } else { + pair rv = + replaceOne(bv, search, rplc, casesensitive, matchword, forward, findnext); + update = rv.first; + replace_count = rv.second; + } - Buffer const & buf = bv->buffer(); - if (!update) { - // emit message signal. - buf.message(_("String not found.")); + Buffer const & buf = bv->buffer(); + if (!update) { + // emit message signal. + buf.message(_("String not found.")); + } else { + if (replace_count == 0) { + buf.message(_("String found.")); + } else if (replace_count == 1) { + buf.message(_("String has been replaced.")); } else { - if (replace_count == 0) { - buf.message(_("String found.")); - } else if (replace_count == 1) { - buf.message(_("String has been replaced.")); - } else { - docstring const str = - bformat(_("%1$d strings have been replaced."), replace_count); - buf.message(str); - } + docstring const str = + bformat(_("%1$d strings have been replaced."), replace_count); + buf.message(str); } - } else if (findnext) { - // if we have deleted characters, we do not replace at all, but - // rather search for the next occurence - if (findOne(bv, search, casesensitive, matchword, forward, true, findnext)) - update = true; - else - bv->message(_("String not found.")); } return update; } @@ -644,139 +650,111 @@ namespace { typedef vector > Escapes; -/// A map of symbols and their escaped equivalent needed within a regex. -/// @note Beware of order -Escapes const & get_regexp_escapes() -{ - typedef std::pair P; - - static Escapes escape_map; - if (escape_map.empty()) { - escape_map.push_back(P("$", "_x_$")); - escape_map.push_back(P("{", "_x_{")); - escape_map.push_back(P("}", "_x_}")); - escape_map.push_back(P("[", "_x_[")); - escape_map.push_back(P("]", "_x_]")); - escape_map.push_back(P("(", "_x_(")); - escape_map.push_back(P(")", "_x_)")); - escape_map.push_back(P("+", "_x_+")); - escape_map.push_back(P("*", "_x_*")); - escape_map.push_back(P(".", "_x_.")); - escape_map.push_back(P("\\", "(?:\\\\|\\\\backslash)")); - escape_map.push_back(P("~", "(?:\\\\textasciitilde|\\\\sim)")); - escape_map.push_back(P("^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\textasciicircum|\\\\mathcircumflex)")); - escape_map.push_back(P("_x_", "\\")); - } - return escape_map; -} - -/// A map of lyx escaped strings and their unescaped equivalent. -Escapes const & get_lyx_unescapes() -{ - typedef std::pair P; - - static Escapes escape_map; - if (escape_map.empty()) { - escape_map.push_back(P("\\%", "%")); - escape_map.push_back(P("\\mathcircumflex ", "^")); - escape_map.push_back(P("\\mathcircumflex", "^")); - escape_map.push_back(P("\\backslash ", "\\")); - escape_map.push_back(P("\\backslash", "\\")); - escape_map.push_back(P("\\\\{", "_x_<")); - escape_map.push_back(P("\\\\}", "_x_>")); - escape_map.push_back(P("\\sim ", "~")); - escape_map.push_back(P("\\sim", "~")); - } - return escape_map; -} - -/// A map of escapes turning a regexp matching text to one matching latex. -Escapes const & get_regexp_latex_escapes() -{ - typedef std::pair P; - - static Escapes escape_map; - if (escape_map.empty()) { - escape_map.push_back(P("\\\\", "(?:\\\\\\\\|\\\\backslash|\\\\textbackslash\\{\\}|\\\\textbackslash)")); - escape_map.push_back(P("(first << " as " << it->second); - unsigned int pos = 0; - while (pos < s.length() && (pos = s.find(it->first, pos)) < s.length()) { - s.replace(pos, it->first.length(), it->second); - LYXERR(Debug::FIND, "After escape: " << s); - pos += it->second.length(); -// LYXERR(Debug::FIND, "pos: " << pos); +string string2regex(string in) +{ + static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\$\\])" }; + string temp = std::regex_replace(in, specialChars, R"(\$&)" ); + string temp2(""); + size_t lastpos = 0; + size_t fl_pos = 0; + int offset = 1; + while (fl_pos < temp.size()) { + fl_pos = temp.find("\\\\foreignlanguage", lastpos + offset); + if (fl_pos == string::npos) + break; + offset = 16; + temp2 += temp.substr(lastpos, fl_pos - lastpos); + temp2 += "\\n"; + lastpos = fl_pos; + } + if (lastpos == 0) + return(temp); + if (lastpos < temp.size()) { + temp2 += temp.substr(lastpos, temp.size() - lastpos); + } + return temp2; +} + +string correctRegex(string t, bool withformat) +{ + /* Convert \backslash => \ + * and \{, \}, \[, \] => {, }, [, ] + */ + string s(""); + regex wordre("(\\\\)*(\\\\((backslash|mathcircumflex) ?|[\\[\\]\\{\\}]))"); + size_t lastpos = 0; + smatch sub; + bool backslashed = false; + for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) { + sub = *it; + string replace; + if ((sub.position(2) - sub.position(0)) % 2 == 1) { + continue; + } + else { + if (sub.str(4) == "backslash") { + replace = "\\"; + if (withformat) { + // transforms '\backslash \{' into '\{' + // and '\{' into '{' + string next = t.substr(sub.position(2) + sub.str(2).length(), 2); + if ((next == "\\{") || (next == "\\}")) { + replace = ""; + backslashed = true; + } + } + } + else if (sub.str(4) == "mathcircumflex") + replace = "^"; + else if (backslashed) { + backslashed = false; + if (withformat && (sub.str(3) == "{")) + replace = accents["braceleft"]; + else if (withformat && (sub.str(3) == "}")) + replace = accents["braceright"]; + else { + // else part should not exist + LASSERT(1, /**/); + } + } + else + replace = sub.str(3); } + if (lastpos < (size_t) sub.position(2)) + s += t.substr(lastpos, sub.position(2) - lastpos); + s += replace; + lastpos = sub.position(2) + sub.length(2); } - LYXERR(Debug::FIND, "Escaped : '" << s << "'"); + if (lastpos == 0) + return t; + else if (lastpos < t.length()) + s += t.substr(lastpos, t.length() - lastpos); return s; } - /// Within \regexp{} apply get_lyx_unescapes() only (i.e., preserve regexp semantics of the string), /// while outside apply get_lyx_unescapes()+get_regexp_escapes(). /// If match_latex is true, then apply regexp_latex_escapes() to \regexp{} contents as well. -string escape_for_regex(string s, bool match_latex) -{ - size_t pos = 0; - while (pos < s.size()) { - size_t new_pos = s.find("\\regexp{", pos); - if (new_pos == string::npos) - new_pos = s.size(); - string t; - if (new_pos > pos) { - LYXERR(Debug::FIND, "new_pos: " << new_pos); - t = apply_escapes(s.substr(pos, new_pos - pos), get_lyx_unescapes()); - LYXERR(Debug::FIND, "t [lyx]: " << t); - t = apply_escapes(t, get_regexp_escapes()); - LYXERR(Debug::FIND, "t [rxp]: " << t); - s.replace(pos, new_pos - pos, t); - new_pos = pos + t.size(); - LYXERR(Debug::FIND, "Regexp after escaping: " << s); - LYXERR(Debug::FIND, "new_pos: " << new_pos); - if (new_pos == s.size()) - break; - } - // Might fail if \\endregexp{} is preceeded by unexpected stuff (weird escapes) - size_t end_pos = s.find("\\endregexp{}}", new_pos + 8); - LYXERR(Debug::FIND, "end_pos: " << end_pos); - t = s.substr(new_pos + 8, end_pos - (new_pos + 8)); - LYXERR(Debug::FIND, "t in regexp : " << t); - t = apply_escapes(t, get_lyx_unescapes()); - LYXERR(Debug::FIND, "t in regexp [lyx]: " << t); - if (match_latex) { - t = apply_escapes(t, get_regexp_latex_escapes()); - LYXERR(Debug::FIND, "t in regexp [ltx]: " << t); +string escape_for_regex(string s, bool withformat) +{ + size_t lastpos = 0; + string result = ""; + while (lastpos < s.size()) { + size_t regex_pos = s.find("\\regexp{", lastpos); + if (regex_pos == string::npos) { + regex_pos = s.size(); } - if (end_pos == s.size()) { - s.replace(new_pos, end_pos - new_pos, t); - LYXERR(Debug::FIND, "Regexp after \\regexp{} removal: " << s); - break; + if (regex_pos > lastpos) { + result += string2regex(s.substr(lastpos, regex_pos-lastpos)); + lastpos = regex_pos; + if (lastpos == s.size()) + break; } - s.replace(new_pos, end_pos + 13 - new_pos, t); - LYXERR(Debug::FIND, "Regexp after \\regexp{...\\endregexp{}} removal: " << s); - pos = new_pos + t.size(); - LYXERR(Debug::FIND, "pos: " << pos); + size_t end_pos = s.find("\\endregexp{}}", regex_pos + 8); + result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)), withformat); + lastpos = end_pos + 13; } - return s; + return result; } @@ -784,72 +762,54 @@ string escape_for_regex(string s, bool match_latex) bool regex_replace(string const & s, string & t, string const & searchstr, string const & replacestr) { - lyx::regex e(searchstr, regex_constants::ECMAScript); + regex e(searchstr, regex_constants::ECMAScript); ostringstream oss; ostream_iterator it(oss); - lyx::regex_replace(it, s.begin(), s.end(), e, replacestr); + regex_replace(it, s.begin(), s.end(), e, replacestr); // tolerate t and s be references to the same variable bool rv = (s != oss.str()); t = oss.str(); return rv; } - -/** Checks if supplied string segment is well-formed from the standpoint of matching open-closed braces. - ** - ** Verify that closed braces exactly match open braces. This avoids that, for example, - ** \frac{.*}{x} matches \frac{x+\frac{y}{x}}{z} with .* being 'x+\frac{y'. - ** - ** @param unmatched - ** Number of open braces that must remain open at the end for the verification to succeed. - **/ -bool braces_match(string::const_iterator const & beg, - string::const_iterator const & end, - int unmatched = 0) -{ - int open_pars = 0; - string::const_iterator it = beg; - LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << string(beg, end) << "'"); - for (; it != end; ++it) { - // Skip escaped braces in the count - if (*it == '\\') { - ++it; - if (it == end) - break; - } else if (*it == '{') { - ++open_pars; - } else if (*it == '}') { - if (open_pars == 0) { - LYXERR(Debug::FIND, "Found unmatched closed brace"); - return false; - } else - --open_pars; - } - } - if (open_pars != unmatched) { - LYXERR(Debug::FIND, "Found " << open_pars - << " instead of " << unmatched - << " unmatched open braces at the end of count"); - return false; - } - LYXERR(Debug::FIND, "Braces match as expected"); - return true; -} - - class MatchResult { public: + enum range { + newIsTooFar, + newIsBetter, + newIsInvalid + }; int match_len; + int match_prefix; int match2end; int pos; - MatchResult(): match_len(0),match2end(0), pos(0) {}; + int leadsize; + int pos_len; + int searched_size; + vector result = vector (); + MatchResult(int len = 0): match_len(len),match_prefix(0),match2end(0), pos(0),leadsize(0),pos_len(-1),searched_size(0) {}; }; +static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres) +{ + if (newres.match2end < oldres.match2end) + return MatchResult::newIsTooFar; + if (newres.match_len < oldres.match_len) + return MatchResult::newIsTooFar; + + if (newres.match_len == oldres.match_len) { + if (newres.match2end == oldres.match2end) + return MatchResult::newIsBetter; + } + return MatchResult::newIsInvalid; +} + /** The class performing a match between a position in the document and the FindAdvOptions. **/ + class MatchStringAdv { public: - MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt); + MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt); /** Tests if text starting at the supplied position matches with the one provided to the MatchStringAdv ** constructor as opt.search, under the opt.* options settings. @@ -862,6 +822,10 @@ public: ** The length of the matching text, or zero if no match was found. **/ MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const; +#if QTSEARCH + bool regexIsValid; + string regexError; +#endif public: /// buffer @@ -874,6 +838,7 @@ public: private: /// Auxiliary find method (does not account for opt.matchword) MatchResult findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const; + void CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string = ""); /** Normalize a stringified or latexified LyX paragraph. ** @@ -888,13 +853,18 @@ private: ** @todo Normalization should also expand macros, if the corresponding ** search option was checked. **/ - string normalize(docstring const & s, bool hack_braces) const; + string normalize(docstring const & s) const; // normalized string to search string par_as_string; // regular expression to use for searching - lyx::regex regexp; - // same as regexp, but prefixed with a ".*?" - lyx::regex regexp2; + // regexp2 is same as regexp, but prefixed with a ".*?" +#if QTSEARCH + QRegularExpression regexp; + QRegularExpression regexp2; +#else + regex regexp; + regex regexp2; +#endif // leading format material as string string lead_as_string; // par_as_string after removal of lead_as_string @@ -904,10 +874,27 @@ private: // number of (.*?) subexpressions added at end of search regexp for closing // environments, math mode, styles, etc... int close_wildcards; +public: // Are we searching with regular expressions ? bool use_regexp; + static int valid_matches; + static vector matches; + void FillResults(MatchResult &found_mr); }; +int MatchStringAdv::valid_matches = 0; +vector MatchStringAdv::matches = vector (10); + +void MatchStringAdv::FillResults(MatchResult &found_mr) +{ + if (found_mr.match_len > 0) { + valid_matches = found_mr.result.size(); + for (size_t i = 0; i < found_mr.result.size(); i++) + matches[i] = found_mr.result[i]; + } + else + valid_matches = 0; +} static docstring buffer_to_latex(Buffer & buffer) { @@ -916,11 +903,14 @@ static docstring buffer_to_latex(Buffer & buffer) odocstringstream ods; otexstream os(ods); runparams.nice = true; - runparams.flavor = OutputParams::XETEX; + runparams.flavor = Flavor::XeTeX; runparams.linelen = 10000; //lyxrc.plaintext_linelen; // No side effect of file copying and image conversion runparams.dryrun = true; - runparams.for_search = true; + if (ignoreFormats.getDeleted()) + runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + else + runparams.for_searchAdv = OutputParams::SearchWithDeleted; pit_type const endpit = buffer.paragraphs().size(); for (pit_type pit = 0; pit != endpit; ++pit) { TeXOnePar(buffer, buffer.text(), pit, os, runparams); @@ -939,21 +929,32 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co // OutputParams runparams(&buffer.params().encoding()); OutputParams runparams(encodings.fromLyXName("utf8")); runparams.nice = true; - runparams.flavor = OutputParams::XETEX; + runparams.flavor = Flavor::XeTeX; runparams.linelen = 10000; //lyxrc.plaintext_linelen; runparams.dryrun = true; - runparams.for_search = true; + int option = AS_STR_INSETS |AS_STR_PLAINTEXT; + if (ignoreFormats.getDeleted()) { + option |= AS_STR_SKIPDELETE; + runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + } + else { + runparams.for_searchAdv = OutputParams::SearchWithDeleted; + } for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) { Paragraph const & par = buffer.paragraphs().at(pit); LYXERR(Debug::FIND, "Adding to search string: '" << par.asString(pos_type(0), par.size(), - AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, + option, &runparams) << "'"); str += par.asString(pos_type(0), par.size(), - AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, + option, &runparams); } + // Even in ignore-format we have to remove "\text{}, \lyxmathsym{}" parts + string t = to_utf8(str); + while (regex_replace(t, t, "\\\\(text|lyxmathsym)\\{([^\\}]*)\\}", "$2")); + str = from_utf8(t); } return str; } @@ -966,11 +967,17 @@ static size_t identifyLeading(string const & s) // @TODO Support \item[text] // Kornel: Added textsl, textsf, textit, texttt and noun // + allow to search for colored text too - while (regex_replace(t, t, REGEX_BOS "\\\\(((footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|emph|noun|minisec|text(bf|md|sl|sf|it|tt))|((textcolor|foreignlanguage|latexenvironment)\\{[a-z]+\\*?\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part|chapter)\\*?)\\{", "") - || regex_replace(t, t, REGEX_BOS "\\$", "") - || regex_replace(t, t, REGEX_BOS "\\\\\\[ ", "") - || regex_replace(t, t, REGEX_BOS " ?\\\\item\\{[a-z]+\\}", "") - || regex_replace(t, t, REGEX_BOS "\\\\begin\\{[a-zA-Z_]*\\*?\\} ", "")) + while (regex_replace(t, t, "^\\\\((" + "(author|title|subtitle|subject|publishers|dedication|uppertitleback|lowertitleback|extratitle|" + "lyxaddress|lyxrightaddress|" + "footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|" + "emph|noun|minisec|text(bf|md|sl|sf|it|tt))|" + "((textcolor|foreignlanguage|latexenvironment)\\{[a-z]+\\*?\\})|" + "(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part|chapter)\\*?)\\{", "") + || regex_replace(t, t, "^\\$", "") + || regex_replace(t, t, "^\\\\\\[", "") + || regex_replace(t, t, "^ ?\\\\item\\{[a-z]+\\}", "") + || regex_replace(t, t, "^\\\\begin\\{[a-zA-Z_]*\\*?\\}", "")) ; LYXERR(Debug::FIND, " after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'"); return s.find(t); @@ -1023,7 +1030,7 @@ static Features identifyFeatures(string const & s) } } } - return(info); + return info; } /* @@ -1037,6 +1044,8 @@ class KeyInfo { noContent, /* Char, like \backslash */ isChar, + /* replace starting backslash with '#' */ + isText, /* \part, \section*, ... */ isSectioning, /* title, author etc */ @@ -1067,39 +1076,26 @@ class KeyInfo { isIgnored, /* like \lettrine[lines=5]{}{} */ cleanToStart, + // like isStandard, but always remove head + headRemove, /* End of arguments marker for lettrine, * so that they can be ignored */ endArguments }; - KeyInfo() - : keytype(invalid), - head(""), - _tokensize(-1), - _tokenstart(-1), - _dataStart(-1), - _dataEnd(-1), - parenthesiscount(1), - disabled(false), - used(false) - {}; + KeyInfo() = default; KeyInfo(KeyType type, int parcount, bool disable) : keytype(type), - _tokensize(-1), - _tokenstart(-1), - _dataStart(-1), - _dataEnd(-1), parenthesiscount(parcount), - disabled(disable), - used(false) {}; - KeyType keytype; + disabled(disable) {} + KeyType keytype = invalid; string head; - int _tokensize; - int _tokenstart; - int _dataStart; - int _dataEnd; - int parenthesiscount; - bool disabled; - bool used; /* by pattern */ + int _tokensize = -1; + int _tokenstart = -1; + int _dataStart = -1; + int _dataEnd = -1; + int parenthesiscount = 1; + bool disabled = false; + bool used = false; /* by pattern */ }; class Border { @@ -1111,11 +1107,11 @@ class Border { #define MAXOPENED 30 class Intervall { - bool isPatternString; + bool isPatternString_; public: explicit Intervall(bool isPattern, string const & p) : - isPatternString(isPattern), par(p), ignoreidx(-1), actualdeptindex(0), - hasTitle(false) + isPatternString_(isPattern), par(p), ignoreidx(-1), actualdeptindex(0), + hasTitle(false), langcount(0) { depts[0] = 0; closes[0] = 0; @@ -1127,19 +1123,20 @@ public: int depts[MAXOPENED]; int closes[MAXOPENED]; int actualdeptindex; - int previousNotIgnored(int); - int nextNotIgnored(int); + int previousNotIgnored(int) const; + int nextNotIgnored(int) const; void handleOpenP(int i); void handleCloseP(int i, bool closingAllowed); void resetOpenedP(int openPos); void addIntervall(int upper); void addIntervall(int low, int upper); /* if explicit */ void removeAccents(); - void setForDefaultLang(KeyInfo &defLang); + void setForDefaultLang(KeyInfo const & defLang) const; int findclosing(int start, int end, char up, char down, int repeat); void handleParentheses(int lastpos, bool closingAllowed); bool hasTitle; - int isOpeningPar(int pos); + int langcount; // Number of disabled language specs up to current position in actual interval + int isOpeningPar(int pos) const; string titleValue; void output(ostringstream &os, int lastpos); // string show(int lastpos); @@ -1147,7 +1144,7 @@ public: vector Intervall::borders = vector(30); -int Intervall::isOpeningPar(int pos) +int Intervall::isOpeningPar(int pos) const { if ((pos < 0) || (size_t(pos) >= par.size())) return 0; @@ -1162,16 +1159,20 @@ int Intervall::isOpeningPar(int pos) return 1; } -void Intervall::setForDefaultLang(KeyInfo &defLang) +void Intervall::setForDefaultLang(KeyInfo const & defLang) const { // Enable the use of first token again if (ignoreidx >= 0) { int value = defLang._tokenstart + defLang._tokensize; + int borderidx = 0; + if (hasTitle) { + borderidx = 1; + } if (value > 0) { - if (borders[0].low < value) - borders[0].low = value; - if (borders[0].upper < value) - borders[0].upper = value; + if (borders[borderidx].low < value) + borders[borderidx].low = value; + if (borders[borderidx].upper < value) + borders[borderidx].upper = value; } } } @@ -1181,12 +1182,12 @@ static void checkDepthIndex(int val) static int maxdepthidx = MAXOPENED-2; static int lastmaxdepth = 0; if (val > lastmaxdepth) { - LYXERR0("Depth reached " << val); + LYXERR(Debug::INFO, "Depth reached " << val); lastmaxdepth = val; } if (val > maxdepthidx) { maxdepthidx = val; - LYXERR0("maxdepthidx now " << val); + LYXERR(Debug::INFO, "maxdepthidx now " << val); } } @@ -1196,7 +1197,7 @@ static void checkIgnoreIdx(int val) { static int lastmaxignore = -1; if ((lastmaxignore < val) && (size_t(val+1) >= borders.size())) { - LYXERR0("IgnoreIdx reached " << val); + LYXERR(Debug::INFO, "IgnoreIdx reached " << val); lastmaxignore = val; } } @@ -1273,9 +1274,6 @@ void Intervall::addIntervall(int low, int upper) } } -typedef map AccentsMap; -static AccentsMap accents = map(); - static void buildaccent(string n, string param, string values) { stringstream s(n); @@ -1283,8 +1281,8 @@ static void buildaccent(string n, string param, string values) const char delim = '|'; while (getline(s, name, delim)) { size_t start = 0; - for (size_t i = 0; i < param.size(); i++) { - string key = name + "{" + param[i] + "}"; + for (char c : param) { + string key = name + "{" + c + "}"; // get the corresponding utf8-value if ((values[start] & 0xc0) != 0xc0) { // should not happen, utf8 encoding starts at least with 11xxxxxx @@ -1292,7 +1290,7 @@ static void buildaccent(string n, string param, string values) if ((values[start] & 0x80) == 0) { // is ascii accents[key] = values.substr(start, 1); - // LYXERR0("" << key << "=" << accents[key]); + // LYXERR(Debug::INFO, "" << key << "=" << accents[key]); } start++; continue; @@ -1307,7 +1305,7 @@ static void buildaccent(string n, string param, string values) // This is the first byte of following utf8 char accents[key] = values.substr(start, j); start += j; - // LYXERR0("" << key << "=" << accents[key]); + // LYXERR(Debug::INFO, "" << key << "=" << accents[key]); break; } } @@ -1315,21 +1313,84 @@ static void buildaccent(string n, string param, string values) } } +// Helper function +static string getutf8(unsigned uchar) +{ + #define maxc 5 + string ret = string(); + char c[maxc] = {0}; + if (uchar <= 0x7f) { + c[maxc-1] = uchar & 0x7f; + } + else { + unsigned char rest = 0x40; + unsigned char first = 0x80; + int start = maxc-1; + for (int i = start; i >=0; --i) { + if (uchar < rest) { + c[i] = first + uchar; + break; + } + c[i] = 0x80 | (uchar & 0x3f); + uchar >>= 6; + rest >>= 1; + first >>= 1; + first |= 0x80; + } + } + for (int i = 0; i < maxc; i++) { + if (c[i] == 0) continue; + ret += c[i]; + } + return(ret); +} + static void buildAccentsMap() { accents["imath"] = "ı"; accents["i"] = "ı"; accents["jmath"] = "È·"; - accents["lyxmathsym{ß}"] = "ß"; - accents["text{ß}"] = "ß"; + accents["cdot"] = "·"; + accents["textasciicircum"] = "^"; + accents["mathcircumflex"] = "^"; + accents["sim"] = "~"; + accents["guillemotright"] = "»"; + accents["guillemotleft"] = "«"; + accents["hairspace"] = getutf8(0xf0000); // select from free unicode plane 15 + accents["thinspace"] = getutf8(0xf0002); // and used _only_ by findadv + accents["negthinspace"] = getutf8(0xf0003); // to omit backslashed latex macros + accents["medspace"] = getutf8(0xf0004); // See https://en.wikipedia.org/wiki/Private_Use_Areas + accents["negmedspace"] = getutf8(0xf0005); + accents["thickspace"] = getutf8(0xf0006); + accents["negthickspace"] = getutf8(0xf0007); + accents["lyx"] = getutf8(0xf0010); // Used logos + accents["LyX"] = getutf8(0xf0010); + accents["tex"] = getutf8(0xf0011); + accents["TeX"] = getutf8(0xf0011); + accents["latex"] = getutf8(0xf0012); + accents["LaTeX"] = getutf8(0xf0012); + accents["latexe"] = getutf8(0xf0013); + accents["LaTeXe"] = getutf8(0xf0013); + accents["lyxarrow"] = getutf8(0xf0020); + accents["braceleft"] = getutf8(0xf0030); + accents["braceright"] = getutf8(0xf0031); + accents["backslash lyx"] = getutf8(0xf0010); // Used logos inserted with starting \backslash + accents["backslash LyX"] = getutf8(0xf0010); + accents["backslash tex"] = getutf8(0xf0011); + accents["backslash TeX"] = getutf8(0xf0011); + accents["backslash latex"] = getutf8(0xf0012); + accents["backslash LaTeX"] = getutf8(0xf0012); + accents["backslash latexe"] = getutf8(0xf0013); + accents["backslash LaTeXe"] = getutf8(0xf0013); + accents["backslash lyxarrow"] = getutf8(0xf0020); accents["ddot{\\imath}"] = "ï"; - buildaccent("ddot", "aAeEiIioOuUyY", - "äÄëËïÏïöÖüÜÿŸ"); // umlaut - buildaccent("dot|.", "cCeEGgIizZaAoObBdDfFyY", - "ċĊėĖĠġİİżŻȧȦȯȮḃḂḋḊḟḞẏẎ"); // dot{i} can only happen if ignoring case, but there is no lowercase of 'Ä°' + buildaccent("ddot", "aAeEhHiIioOtuUwWxXyY", + "äÄëËḧḦïÏïöÖẗüÜẅẄẍẌÿŸ"); // umlaut + buildaccent("dot|.", "aAbBcCdDeEfFGghHIimMnNoOpPrRsStTwWxXyYzZ", + "ȧȦḃḂċĊḋḊėĖḟḞĠġḣḢİİṁṀṅṄȯȮṗṖṙṘṡṠṫṪẇẆẋẊẏẎżŻ"); // dot{i} can only happen if ignoring case, but there is no lowercase of 'Ä°' accents["acute{\\imath}"] = "í"; - buildaccent("acute", "aAcCeElLoOnNrRsSuUyYzZiI", - "áÁćĆéÉĺĹóÓńŃŕŔśŚúÚýÝźŹíÍ"); + buildaccent("acute", "aAcCeEgGkKlLmMoOnNpPrRsSuUwWyYzZiI", + "áÁćĆéÉǵǴḱḰĺĹḿḾóÓńŃṕṔŕŔśŚúÚẃẂýÝźŹíÍ"); buildaccent("dacute|H|h", "oOuU", "őŐűŰ"); // double acute buildaccent("mathring|r", "aAuUwy", "åÅůŮẘẙ"); // ring @@ -1339,14 +1400,14 @@ static void buildAccentsMap() "čČďĎǎǍěĚǐǏǒǑǔǓǧǦǩǨȟȞľĽňŇřŘšŠŤťžŽ"); // caron accents["hat{\\imath}"] = "î"; accents["hat{\\jmath}"] = "ĵ"; - buildaccent("hat|^", "aAeEiIcCgGhHjJsSwWyYzZoOuU", - "âÂêÊîÎĉĈĝĜĥĤĵĴŝŜŵŴŷŶẑẐôÔûÛ"); // circ + buildaccent("hat|^", "aAcCeEgGhHiIjJoOsSuUwWyYzZ", + "âÂĉĈêÊĝĜĥĤîÎĵĴôÔŝŜûÛŵŴŷŶẑẐ"); // circ accents["bar{\\imath}"] = "Ä«"; buildaccent("bar|=", "aAeEiIoOuUyY", "āĀēĒīĪōŌūŪȳȲ"); // macron accents["tilde{\\imath}"] = "Ä©"; - buildaccent("tilde", "aAnNoOiIuU", - "ãÃñÑõÕĩĨũŨ"); // tilde + buildaccent("tilde", "aAeEiInNoOuUvVyY", + "ãÃẽẼĩĨñÑõÕũŨṽṼỹỸ"); // tilde accents["breve{\\imath}"] = "Ä­"; buildaccent("breve|u", "aAeEgGiIoOuU", "ăĂĕĔğĞĭĬŏŎŭŬ"); // breve @@ -1358,7 +1419,7 @@ static void buildAccentsMap() buildaccent("ogonek|k", "AaEeIiUuOo", "ĄąĘęĮįŲųǪǫ"); // ogonek buildaccent("cedilla|c", "CcGgKkLlNnRrSsTtEeDdHh", - "ÇçĢĢĶķĻļŅņŖŗŞşŢţȨȩḐḑḨḩ"); // cedilla + "ÇçĢģĶķĻļŅņŖŗŞşŢţȨȩḐḑḨḩ"); // cedilla buildaccent("subring|textsubring", "Aa", "Ḁḁ"); // subring buildaccent("subhat|textsubcircum", "DdEeLlNnTtUu", @@ -1373,6 +1434,8 @@ static void buildAccentsMap() accents["textroundcap{\\i}"] = "ȉ"; buildaccent("rcap|textroundcap", "AaEeIiOoRrUu", "ȂȃȆȇȊȋȎȏȒȓȖȗ"); // inverted breve + buildaccent("slashed", "oO", + "øØ"); // slashed } /* @@ -1383,25 +1446,32 @@ void Intervall::removeAccents() { if (accents.empty()) buildAccentsMap(); - static regex const accre("\\\\(([\\S]|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))"); + static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|" + "cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}" + "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|(brace|guillemot)(left|right)|textasciicircum|mathcircumflex|sim)(?![a-zA-Z]))"); smatch sub; for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) { sub = *itacc; string key = sub.str(1); - if (accents.find(key) != accents.end()) { - string val = accents[key]; + AccentsIterator it_ac = accents.find(key); + if (it_ac != accents.end()) { + string val = it_ac->second; size_t pos = sub.position(size_t(0)); for (size_t i = 0; i < val.size(); i++) { par[pos+i] = val[i]; } - addIntervall(pos+val.size(), pos + sub.str(0).size()); + // Remove possibly following space too + if (par[pos+sub.str(0).size()] == ' ') + addIntervall(pos+val.size(), pos + sub.str(0).size()+1); + else + addIntervall(pos+val.size(), pos + sub.str(0).size()); for (size_t i = pos+val.size(); i < pos + sub.str(0).size(); i++) { // remove traces of any remaining chars par[i] = ' '; } } else { - LYXERR0("Not added accent for \"" << key << "\""); + LYXERR(Debug::INFO, "Not added accent for \"" << key << "\""); } } } @@ -1436,7 +1506,7 @@ void Intervall::resetOpenedP(int openPos) closes[1] = -1; } -int Intervall::previousNotIgnored(int start) +int Intervall::previousNotIgnored(int start) const { int idx = 0; /* int intervalls */ for (idx = ignoreidx; idx >= 0; --idx) { @@ -1448,7 +1518,7 @@ int Intervall::previousNotIgnored(int start) return start; } -int Intervall::nextNotIgnored(int start) +int Intervall::nextNotIgnored(int start) const { int idx = 0; /* int intervalls */ for (idx = 0; idx <= ignoreidx; idx++) { @@ -1460,91 +1530,92 @@ int Intervall::nextNotIgnored(int start) return start; } -typedef map KeysMap; +typedef unordered_map KeysMap; +typedef unordered_map::const_iterator KeysIterator; typedef vector< KeyInfo> Entries; -static KeysMap keys = map(); +static KeysMap keys = unordered_map(); class LatexInfo { private: - int entidx; - Entries entries; - Intervall interval; + int entidx_; + Entries entries_; + Intervall interval_; void buildKeys(bool); void buildEntries(bool); void makeKey(const string &, KeyInfo, bool isPatternString); void processRegion(int start, int region_end); /* remove {} parts */ - void removeHead(KeyInfo&, int count=0); + void removeHead(KeyInfo const &, int count=0); public: - LatexInfo(string par, bool isPatternString) : entidx(-1), interval(isPatternString, par) + LatexInfo(string const & par, bool isPatternString) + : entidx_(-1), interval_(isPatternString, par) { buildKeys(isPatternString); - entries = vector(); + entries_ = vector(); buildEntries(isPatternString); }; int getFirstKey() { - entidx = 0; - if (entries.empty()) { - return (-1); - } - if (entries[0].keytype == KeyInfo::isTitle) { - if (! entries[0].disabled) { - interval.hasTitle = true; - interval.titleValue = entries[0].head; + entidx_ = 0; + if (entries_.empty()) { + return -1; + } + if (entries_[0].keytype == KeyInfo::isTitle) { + interval_.hasTitle = true; + if (! entries_[0].disabled) { + interval_.titleValue = entries_[0].head; } else { - interval.hasTitle = false; - interval.titleValue = ""; + interval_.titleValue = ""; } - removeHead(entries[0]); - if (entries.size() > 1) - return (1); + removeHead(entries_[0]); + if (entries_.size() > 1) + return 1; else - return (-1); + return -1; } return 0; }; int getNextKey() { - entidx++; - if (int(entries.size()) > entidx) { - return entidx; + entidx_++; + if (int(entries_.size()) > entidx_) { + return entidx_; } else { - return (-1); + return -1; } }; bool setNextKey(int idx) { - if ((idx == entidx) && (entidx >= 0)) { - entidx--; + if ((idx == entidx_) && (entidx_ >= 0)) { + entidx_--; return true; } else return false; }; - int find(int start, KeyInfo::KeyType keytype) { + int find(int start, KeyInfo::KeyType keytype) const { if (start < 0) - return (-1); + return -1; int tmpIdx = start; - while (tmpIdx < int(entries.size())) { - if (entries[tmpIdx].keytype == keytype) + while (tmpIdx < int(entries_.size())) { + if (entries_[tmpIdx].keytype == keytype) return tmpIdx; tmpIdx++; } - return(-1); + return -1; }; - int process(ostringstream &os, KeyInfo &actual); - int dispatch(ostringstream &os, int previousStart, KeyInfo &actual); + int process(ostringstream & os, KeyInfo const & actual); + int dispatch(ostringstream & os, int previousStart, KeyInfo & actual); // string show(int lastpos) { return interval.show(lastpos);}; - int nextNotIgnored(int start) { return interval.nextNotIgnored(start);}; + int nextNotIgnored(int start) { return interval_.nextNotIgnored(start);}; KeyInfo &getKeyInfo(int keyinfo) { static KeyInfo invalidInfo = KeyInfo(); - if ((keyinfo < 0) || ( keyinfo >= int(entries.size()))) + if ((keyinfo < 0) || ( keyinfo >= int(entries_.size()))) return invalidInfo; else - return entries[keyinfo]; + return entries_[keyinfo]; }; - void setForDefaultLang(KeyInfo &defLang) {interval.setForDefaultLang(defLang);}; - void addIntervall(int low, int up) { interval.addIntervall(low, up); }; + void setForDefaultLang(KeyInfo const & defLang) {interval_.setForDefaultLang(defLang);}; + void addIntervall(int low, int up) { interval_.addIntervall(low, up); }; }; @@ -1552,7 +1623,6 @@ int Intervall::findclosing(int start, int end, char up = '{', char down = '}', i { int skip = 0; int depth = 0; - repeat--; for (int i = start; i < end; i += 1 + skip) { char c; c = par[i]; @@ -1563,6 +1633,7 @@ int Intervall::findclosing(int start, int end, char up = '{', char down = '}', i } else if (c == down) { if (depth == 0) { + repeat--; if ((repeat <= 0) || (par[i+1] != up)) return i; } @@ -1580,50 +1651,50 @@ class MathInfo { size_t mathStart; size_t mathSize; }; - size_t actualIdx; - vector entries; + size_t actualIdx_; + vector entries_; public: MathInfo() { - actualIdx = 0; + actualIdx_ = 0; } - void insert(string wait, size_t start, size_t end) { + void insert(string const & wait, size_t start, size_t end) { MathEntry m = MathEntry(); m.wait = wait; m.mathStart = start; m.mathEnd = end; m.mathSize = end - start; - entries.push_back(m); + entries_.push_back(m); } - bool empty() { return entries.empty(); }; - size_t getEndPos() { - if (entries.empty() || (actualIdx >= entries.size())) { + bool empty() const { return entries_.empty(); }; + size_t getEndPos() const { + if (entries_.empty() || (actualIdx_ >= entries_.size())) { return 0; } - return entries[actualIdx].mathEnd; + return entries_[actualIdx_].mathEnd; } - size_t getStartPos() { - if (entries.empty() || (actualIdx >= entries.size())) { + size_t getStartPos() const { + if (entries_.empty() || (actualIdx_ >= entries_.size())) { return 100000; /* definitely enough? */ } - return entries[actualIdx].mathStart; + return entries_[actualIdx_].mathStart; } size_t getFirstPos() { - actualIdx = 0; + actualIdx_ = 0; return getStartPos(); } - size_t getSize() { - if (entries.empty() || (actualIdx >= entries.size())) { + size_t getSize() const { + if (entries_.empty() || (actualIdx_ >= entries_.size())) { return size_t(0); } - return entries[actualIdx].mathSize; + return entries_[actualIdx_].mathSize; } - void incrEntry() { actualIdx++; }; + void incrEntry() { actualIdx_++; }; }; void LatexInfo::buildEntries(bool isPatternString) { - static regex const rmath("\\$|\\\\\\[|\\\\\\]|\\\\(begin|end)\\{((eqnarray|equation|flalign|gather|multline|align|alignat)\\*?)\\}"); - static regex const rkeys("\\$|\\\\\\[|\\\\\\]|\\\\((([a-zA-Z]+\\*?)(\\{([a-z]+\\*?)\\}|=[0-9]+[a-z]+)?))"); + static regex const rmath("(\\\\)*(\\$|\\\\\\[|\\\\\\]|\\\\(begin|end)\\{((eqnarray|equation|flalign|gather|multline|align|alignat)\\*?)\\})"); + static regex const rkeys("(\\\\)*(\\$|\\\\\\[|\\\\\\]|\\\\((([a-zA-Z]+\\*?)(\\{([a-z]+\\*?)\\}|=[0-9]+[a-z]+)?)))"); static bool disableLanguageOverride = false; smatch sub, submath; bool evaluatingRegexp = false; @@ -1638,55 +1709,62 @@ void LatexInfo::buildEntries(bool isPatternString) bool math_end_waiting = false; size_t math_pos = 10000; string math_end; + static vector usedText = vector(); - interval.removeAccents(); + interval_.removeAccents(); - for (sregex_iterator itmath(interval.par.begin(), interval.par.end(), rmath), end; itmath != end; ++itmath) { + for (sregex_iterator itmath(interval_.par.begin(), interval_.par.end(), rmath), end; itmath != end; ++itmath) { submath = *itmath; + if ((submath.position(2) - submath.position(0)) %2 == 1) { + // prefixed by odd count of '\\' + continue; + } if (math_end_waiting) { - size_t pos = submath.position(size_t(0)); + size_t pos = submath.position(size_t(2)); if ((math_end == "$") && - (submath.str(0) == "$") && - (interval.par[pos-1] != '\\')) { + (submath.str(2) == "$")) { mi.insert("$", math_pos, pos + 1); math_end_waiting = false; } else if ((math_end == "\\]") && - (submath.str(0) == "\\]")) { + (submath.str(2) == "\\]")) { mi.insert("\\]", math_pos, pos + 2); math_end_waiting = false; } - else if ((submath.str(1).compare("end") == 0) && - (submath.str(2).compare(math_end) == 0)) { - mi.insert(math_end, math_pos, pos + submath.str(0).length()); + else if ((submath.str(3).compare("end") == 0) && + (submath.str(4).compare(math_end) == 0)) { + mi.insert(math_end, math_pos, pos + submath.str(2).length()); math_end_waiting = false; } else continue; } else { - if (submath.str(1).compare("begin") == 0) { + if (submath.str(3).compare("begin") == 0) { math_end_waiting = true; - math_end = submath.str(2); - math_pos = submath.position(size_t(0)); + math_end = submath.str(4); + math_pos = submath.position(size_t(2)); } - else if (submath.str(0).compare("\\[") == 0) { + else if (submath.str(2).compare("\\[") == 0) { math_end_waiting = true; math_end = "\\]"; - math_pos = submath.position(size_t(0)); + math_pos = submath.position(size_t(2)); } - else if (submath.str(0) == "$") { - size_t pos = submath.position(size_t(0)); - if ((pos == 0) || (interval.par[pos-1] != '\\')) { - math_end_waiting = true; - math_end = "$"; - math_pos = pos; - } + else if (submath.str(2) == "$") { + size_t pos = submath.position(size_t(2)); + math_end_waiting = true; + math_end = "$"; + math_pos = pos; } } } // Ignore language if there is math somewhere in pattern-string if (isPatternString) { + for (auto s: usedText) { + // Remove entries created in previous search runs + keys.erase(s); + } + usedText = vector(); if (! mi.empty()) { // Disable language keys["foreignlanguage"].disabled = true; @@ -1701,32 +1779,44 @@ void LatexInfo::buildEntries(bool isPatternString) } } math_pos = mi.getFirstPos(); - for (sregex_iterator it(interval.par.begin(), interval.par.end(), rkeys), end; it != end; ++it) { + for (sregex_iterator it(interval_.par.begin(), interval_.par.end(), rkeys), end; it != end; ++it) { sub = *it; - string key = sub.str(3); + if ((sub.position(2) - sub.position(0)) %2 == 1) { + // prefixed by odd count of '\\' + continue; + } + string key = sub.str(5); if (key == "") { - if (sub.str(0)[0] == '\\') - key = sub.str(0)[1]; + if (sub.str(2)[0] == '\\') + key = sub.str(2)[1]; else { - key = sub.str(0); - if (key == "$") { - size_t k_pos = sub.position(size_t(0)); - if ((k_pos > 0) && (interval.par[k_pos - 1] == '\\')) { - // Escaped '$', ignoring - continue; - } - } + key = sub.str(2); + } + } + KeysIterator it_key = keys.find(key); + if (it_key != keys.end()) { + if (it_key->second.keytype == KeyInfo::headRemove) { + KeyInfo found1 = it_key->second; + found1.disabled = true; + found1.head = "\\" + key + "{"; + found1._tokenstart = sub.position(size_t(2)); + found1._tokensize = found1.head.length(); + found1._dataStart = found1._tokenstart + found1.head.length(); + int endpos = interval_.findclosing(found1._dataStart, interval_.par.length(), '{', '}', 1); + found1._dataEnd = endpos; + removeHead(found1); + continue; } - }; + } if (evaluatingRegexp) { - if (sub.str(1).compare("endregexp") == 0) { + if (sub.str(3).compare("endregexp") == 0) { evaluatingRegexp = false; // found._tokenstart already set - found._dataEnd = sub.position(size_t(0)) + 13; + found._dataEnd = sub.position(size_t(2)) + 13; found._dataStart = found._dataEnd; found._tokensize = found._dataEnd - found._tokenstart; found.parenthesiscount = 0; - found.head = interval.par.substr(found._tokenstart, found._tokensize); + found.head = interval_.par.substr(found._tokenstart, found._tokensize); } else { continue; @@ -1734,26 +1824,29 @@ void LatexInfo::buildEntries(bool isPatternString) } else { if (evaluatingMath) { - if (size_t(sub.position(size_t(0))) < mi.getEndPos()) + if (size_t(sub.position(size_t(2))) < mi.getEndPos()) continue; evaluatingMath = false; mi.incrEntry(); math_pos = mi.getStartPos(); } - if (keys.find(key) == keys.end()) { + if (it_key == keys.end()) { found = KeyInfo(KeyInfo::isStandard, 0, true); + LYXERR(Debug::INFO, "Undefined key " << key << " ==> will be used as text"); + found = KeyInfo(KeyInfo::isText, 0, false); if (isPatternString) { found.keytype = KeyInfo::isChar; found.disabled = false; found.used = true; } keys[key] = found; + usedText.push_back(key); } else found = keys[key]; if (key.compare("regexp") == 0) { evaluatingRegexp = true; - found._tokenstart = sub.position(size_t(0)); + found._tokenstart = sub.position(size_t(2)); found._tokensize = 0; continue; } @@ -1762,14 +1855,14 @@ void LatexInfo::buildEntries(bool isPatternString) if (found.keytype == KeyInfo::isIgnored) continue; else if (found.keytype == KeyInfo::isMath) { - if (size_t(sub.position(size_t(0))) == math_pos) { + if (size_t(sub.position(size_t(2))) == math_pos) { found = keys[key]; - found._tokenstart = sub.position(size_t(0)); + found._tokenstart = sub.position(size_t(2)); found._tokensize = mi.getSize(); found._dataEnd = found._tokenstart + found._tokensize; found._dataStart = found._dataEnd; found.parenthesiscount = 0; - found.head = interval.par.substr(found._tokenstart, found._tokensize); + found.head = interval_.par.substr(found._tokenstart, found._tokensize); evaluatingMath = true; } else { @@ -1779,24 +1872,24 @@ void LatexInfo::buildEntries(bool isPatternString) bool discardComment; found = keys[key]; found.keytype = KeyInfo::doRemove; - if ((sub.str(5).compare("longtable") == 0) || - (sub.str(5).compare("tabular") == 0)) { + if ((sub.str(7).compare("longtable") == 0) || + (sub.str(7).compare("tabular") == 0)) { discardComment = true; /* '%' */ } else { discardComment = false; static regex const removeArgs("^(multicols|multipar|sectionbox|subsectionbox|tcolorbox)$"); smatch sub2; - string token = sub.str(5); + string token = sub.str(7); if (regex_match(token, sub2, removeArgs)) { found.keytype = KeyInfo::removeWithArg; } } - // discard spaces before pos(0) - int pos = sub.position(size_t(0)); + // discard spaces before pos(2) + int pos = sub.position(size_t(2)); int count; for (count = 0; pos - count > 0; count++) { - char c = interval.par[pos-count-1]; + char c = interval_.par[pos-count-1]; if (discardComment) { if ((c != ' ') && (c != '%')) break; @@ -1805,28 +1898,28 @@ void LatexInfo::buildEntries(bool isPatternString) break; } found._tokenstart = pos - count; - if (sub.str(1).compare(0, 5, "begin") == 0) { - size_t pos1 = pos + sub.str(0).length(); - if (sub.str(5).compare("cjk") == 0) { - pos1 = interval.findclosing(pos1+1, interval.par.length()) + 1; - if ((interval.par[pos1] == '{') && (interval.par[pos1+1] == '}')) + if (sub.str(3).compare(0, 5, "begin") == 0) { + size_t pos1 = pos + sub.str(2).length(); + if (sub.str(7).compare("cjk") == 0) { + pos1 = interval_.findclosing(pos1+1, interval_.par.length()) + 1; + if ((interval_.par[pos1] == '{') && (interval_.par[pos1+1] == '}')) pos1 += 2; found.keytype = KeyInfo::isMain; found._dataStart = pos1; - found._dataEnd = interval.par.length(); + found._dataEnd = interval_.par.length(); found.disabled = keys["foreignlanguage"].disabled; found.used = keys["foreignlanguage"].used; found._tokensize = pos1 - found._tokenstart; - found.head = interval.par.substr(found._tokenstart, found._tokensize); + found.head = interval_.par.substr(found._tokenstart, found._tokensize); } else { // Swallow possible optional params - while (interval.par[pos1] == '[') { - pos1 = interval.findclosing(pos1+1, interval.par.length(), '[', ']')+1; + while (interval_.par[pos1] == '[') { + pos1 = interval_.findclosing(pos1+1, interval_.par.length(), '[', ']')+1; } // Swallow also the eventual parameter - if (interval.par[pos1] == '{') { - found._dataEnd = interval.findclosing(pos1+1, interval.par.length()) + 1; + if (interval_.par[pos1] == '{') { + found._dataEnd = interval_.findclosing(pos1+1, interval_.par.length()) + 1; } else { found._dataEnd = pos1; @@ -1834,32 +1927,32 @@ void LatexInfo::buildEntries(bool isPatternString) found._dataStart = found._dataEnd; found._tokensize = count + found._dataEnd - pos; found.parenthesiscount = 0; - found.head = interval.par.substr(found._tokenstart, found._tokensize); + found.head = interval_.par.substr(found._tokenstart, found._tokensize); found.disabled = true; } } else { // Handle "\end{...}" - found._dataStart = pos + sub.str(0).length(); + found._dataStart = pos + sub.str(2).length(); found._dataEnd = found._dataStart; found._tokensize = count + found._dataEnd - pos; found.parenthesiscount = 0; - found.head = interval.par.substr(found._tokenstart, found._tokensize); + found.head = interval_.par.substr(found._tokenstart, found._tokensize); found.disabled = true; } } } else if (found.keytype != KeyInfo::isRegex) { - found._tokenstart = sub.position(size_t(0)); + found._tokenstart = sub.position(size_t(2)); if (found.parenthesiscount == 0) { // Probably to be discarded - size_t following_pos = sub.position(size_t(0)) + sub.str(3).length() + 1; - char following = interval.par[following_pos]; + size_t following_pos = sub.position(size_t(2)) + sub.str(5).length() + 1; + char following = interval_.par[following_pos]; if (following == ' ') - found.head = "\\" + sub.str(3) + " "; + found.head = "\\" + sub.str(5) + " "; else if (following == '=') { // like \uldepth=1000pt - found.head = sub.str(0); + found.head = sub.str(2); } else found.head = "\\" + key; @@ -1878,23 +1971,34 @@ void LatexInfo::buildEntries(bool isPatternString) } } int optend = params; - while (interval.par[optend] == '[') { + while (interval_.par[optend] == '[') { // discard optional parameters - optend = interval.findclosing(optend+1, interval.par.length(), '[', ']') + 1; + optend = interval_.findclosing(optend+1, interval_.par.length(), '[', ']') + 1; } if (optend > params) { - key += interval.par.substr(params, optend-params); + key += interval_.par.substr(params, optend-params); evaluatingOptional = true; optionalEnd = optend; + if (found.keytype == KeyInfo::isSectioning) { + // Remove optional values (but still keep in header) + interval_.addIntervall(params, optend); + } + } + string token = sub.str(7); + int closings; + if (interval_.par[optend] != '{') { + closings = 0; + found.parenthesiscount = 0; + found.head = "\\" + key; } - string token = sub.str(5); - int closings = found.parenthesiscount; + else + closings = found.parenthesiscount; if (found.parenthesiscount == 1) { found.head = "\\" + key + "{"; } else if (found.parenthesiscount > 1) { if (token != "") { - found.head = sub.str(0) + "{"; + found.head = sub.str(2) + "{"; closings = found.parenthesiscount - 1; } else { @@ -1903,10 +2007,28 @@ void LatexInfo::buildEntries(bool isPatternString) } found._tokensize = found.head.length(); found._dataStart = found._tokenstart + found.head.length(); - if (interval.par.substr(found._dataStart-1, 15).compare("\\endarguments{}") == 0) { + if (found.keytype == KeyInfo::doRemove) { + if (closings > 0) { + size_t endpar = 2 + interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings); + if (endpar >= interval_.par.length()) + found._dataStart = interval_.par.length(); + else + found._dataStart = endpar; + found._tokensize = found._dataStart - found._tokenstart; + } + else { + found._dataStart = found._tokenstart + found._tokensize; + } + closings = 0; + } + if (interval_.par.substr(found._dataStart, 15).compare("\\endarguments{}") == 0) { found._dataStart += 15; } - size_t endpos = interval.findclosing(found._dataStart, interval.par.length(), '{', '}', closings); + size_t endpos; + if (closings < 1) + endpos = found._dataStart - 1; + else + endpos = interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings); if (found.keytype == KeyInfo::isList) { // Check if it really is list env static regex const listre("^([a-z]+)$"); @@ -1928,18 +2050,18 @@ void LatexInfo::buildEntries(bool isPatternString) // Disable this key, treate it as standard found.keytype = KeyInfo::isStandard; found.disabled = true; - if ((codeEnd == interval.par.length()) && + if ((codeEnd +1 >= interval_.par.length()) && (found._tokenstart == codeStart)) { // trickery, because the code inset starts // with \selectlanguage ... codeEnd = endpos; - if (entries.size() > 1) { - entries[entries.size()-1]._dataEnd = codeEnd; + if (entries_.size() > 1) { + entries_[entries_.size()-1]._dataEnd = codeEnd; } } } } - if ((endpos == interval.par.length()) && + if ((endpos == interval_.par.length()) && (found.keytype == KeyInfo::doRemove)) { // Missing closing => error in latex-input? // therefore do not delete remaining data @@ -1953,7 +2075,7 @@ void LatexInfo::buildEntries(bool isPatternString) keys[key].used = true; } } - entries.push_back(found); + entries_.push_back(found); } } @@ -1979,6 +2101,8 @@ void LatexInfo::buildKeys(bool isPatternString) static bool keysBuilt = false; if (keysBuilt && !isPatternString) return; + // Keys to ignore in any case + makeKey("text|textcyrillic|lyxmathsym", KeyInfo(KeyInfo::headRemove, 1, true), true); // Known standard keys with 1 parameter. // Split is done, if not at start of region makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString); @@ -2014,6 +2138,7 @@ void LatexInfo::buildKeys(bool isPatternString) makeKey("quad|qquad|hfill|dotfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); makeKey("textvisiblespace|nobreakspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); makeKey("negthickspace|negmedspace|negthinspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("thickspace|medspace|thinspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); // Skip // makeKey("enskip|smallskip|medskip|bigskip|vfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); // Custom space/skip, remove the content (== length value) @@ -2030,6 +2155,11 @@ void LatexInfo::buildKeys(bool isPatternString) // handle like standard keys with 1 parameter. makeKey("url|href|vref|thanks", KeyInfo(KeyInfo::isStandard, 1, false), isPatternString); + // Ignore deleted text + makeKey("lyxdeleted", KeyInfo(KeyInfo::doRemove, 3, false), isPatternString); + // but preserve added text + makeKey("lyxadded", KeyInfo(KeyInfo::doRemove, 2, false), isPatternString); + // Macros to remove, but let the parameter survive // No split makeKey("menuitem|textmd|textrm", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString); @@ -2055,7 +2185,8 @@ void LatexInfo::buildKeys(bool isPatternString) makeKey("footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge", KeyInfo(KeyInfo::isSize, 0, false), isPatternString); // Survives, like known character - makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + // makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + makeKey("tableofcontents", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); makeKey("item|listitem", KeyInfo(KeyInfo::isList, 1, false), isPatternString); makeKey("begin|end", KeyInfo(KeyInfo::isMath, 1, false), isPatternString); @@ -2130,7 +2261,7 @@ string Intervall::show(int lastpos) if (lastpos > i) { s += par.substr(i, lastpos-i); } - return (s); + return s; } #endif @@ -2171,12 +2302,17 @@ void Intervall::output(ostringstream &os, int lastpos) printed += lastpos-i; } handleParentheses(lastpos, false); - for (int i = actualdeptindex; i > 0; --i) { + int startindex; + if (keys["foreignlanguage"].disabled) + startindex = actualdeptindex-langcount; + else + startindex = actualdeptindex; + for (int i = startindex; i > 0; --i) { os << "}"; } if (hasTitle && (printed > 0)) os << "}"; - if (! isPatternString) + if (! isPatternString_) os << "\n"; handleParentheses(lastpos, true); /* extra closings '}' allowed here */ } @@ -2184,29 +2320,29 @@ void Intervall::output(ostringstream &os, int lastpos) void LatexInfo::processRegion(int start, int region_end) { while (start < region_end) { /* Let {[} and {]} survive */ - int cnt = interval.isOpeningPar(start); + int cnt = interval_.isOpeningPar(start); if (cnt == 1) { // Closing is allowed past the region - int closing = interval.findclosing(start+1, interval.par.length()); - interval.addIntervall(start, start+1); - interval.addIntervall(closing, closing+1); + int closing = interval_.findclosing(start+1, interval_.par.length()); + interval_.addIntervall(start, start+1); + interval_.addIntervall(closing, closing+1); } else if (cnt == 3) start += 2; - start = interval.nextNotIgnored(start+1); + start = interval_.nextNotIgnored(start+1); } } -void LatexInfo::removeHead(KeyInfo &actual, int count) +void LatexInfo::removeHead(KeyInfo const & actual, int count) { if (actual.parenthesiscount == 0) { // "{\tiny{} ...}" ==> "{{} ...}" - interval.addIntervall(actual._tokenstart-count, actual._tokenstart + actual._tokensize); + interval_.addIntervall(actual._tokenstart-count, actual._tokenstart + actual._tokensize); } else { // Remove header hull, that is "\url{abcd}" ==> "abcd" - interval.addIntervall(actual._tokenstart - count, actual._dataStart); - interval.addIntervall(actual._dataEnd, actual._dataEnd+1); + interval_.addIntervall(actual._tokenstart - count, actual._dataStart); + interval_.addIntervall(actual._dataEnd, actual._dataEnd+1); } } @@ -2227,23 +2363,28 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments); if (tmpIdx > 0) { for (int i = nextKeyIdx; i <= tmpIdx; i++) { - entries[i].disabled = true; + entries_[i].disabled = true; } - actual._dataEnd = entries[tmpIdx]._dataEnd; + actual._dataEnd = entries_[tmpIdx]._dataEnd; } - while (interval.par[actual._dataEnd] == ' ') + while (interval_.par[actual._dataEnd] == ' ') actual._dataEnd++; - interval.addIntervall(0, actual._dataEnd+1); - interval.actualdeptindex = 0; - interval.depts[0] = actual._dataEnd+1; - interval.closes[0] = -1; + interval_.addIntervall(0, actual._dataEnd+1); + interval_.actualdeptindex = 0; + interval_.depts[0] = actual._dataEnd+1; + interval_.closes[0] = -1; break; } + case KeyInfo::isText: + interval_.par[actual._tokenstart] = '#'; + //interval_.addIntervall(actual._tokenstart, actual._tokenstart+1); + nextKeyIdx = getNextKey(); + break; case KeyInfo::noContent: { /* char like "\hspace{2cm}" */ if (actual.disabled) - interval.addIntervall(actual._tokenstart, actual._dataEnd); + interval_.addIntervall(actual._tokenstart, actual._dataEnd); else - interval.addIntervall(actual._dataStart, actual._dataEnd); + interval_.addIntervall(actual._dataStart, actual._dataEnd); } // fall through case KeyInfo::isChar: { @@ -2251,42 +2392,46 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) break; } case KeyInfo::isSize: { - if (actual.disabled || (interval.par[actual._dataStart] != '{') || (interval.par[actual._dataStart-1] == ' ')) { - processRegion(actual._dataEnd, actual._dataEnd+1); /* remove possibly following {} */ - interval.addIntervall(actual._tokenstart, actual._dataEnd+1); + if (actual.disabled || (interval_.par[actual._dataStart] != '{') || (interval_.par[actual._dataStart-1] == ' ')) { + if (actual.parenthesiscount == 0) + interval_.addIntervall(actual._tokenstart, actual._dataEnd); + else { + interval_.addIntervall(actual._tokenstart, actual._dataEnd+1); + } nextKeyIdx = getNextKey(); } else { // Here _dataStart points to '{', so correct it actual._dataStart += 1; actual._tokensize += 1; actual.parenthesiscount = 1; - if (interval.par[actual._dataStart] == '}') { + if (interval_.par[actual._dataStart] == '}') { // Determine the end if used like '{\tiny{}...}' - actual._dataEnd = interval.findclosing(actual._dataStart+1, interval.par.length()) + 1; - interval.addIntervall(actual._dataStart, actual._dataStart+1); + actual._dataEnd = interval_.findclosing(actual._dataStart+1, interval_.par.length()) + 1; + interval_.addIntervall(actual._dataStart, actual._dataStart+1); } else { // Determine the end if used like '\tiny{...}' - actual._dataEnd = interval.findclosing(actual._dataStart, interval.par.length()) + 1; + actual._dataEnd = interval_.findclosing(actual._dataStart, interval_.par.length()) + 1; } // Split on this key if not at start - int start = interval.nextNotIgnored(previousStart); + int start = interval_.nextNotIgnored(previousStart); if (start < actual._tokenstart) { - interval.output(os, actual._tokenstart); - interval.addIntervall(start, actual._tokenstart); + interval_.output(os, actual._tokenstart); + interval_.addIntervall(start, actual._tokenstart); } // discard entry if at end of actual nextKeyIdx = process(os, actual); } break; } - case KeyInfo::endArguments: + case KeyInfo::endArguments: { // Remove trailing '{}' too actual._dataStart += 1; actual._dataEnd += 1; - interval.addIntervall(actual._tokenstart, actual._dataEnd+1); + interval_.addIntervall(actual._tokenstart, actual._dataEnd+1); nextKeyIdx = getNextKey(); break; + } case KeyInfo::noMain: // fall through case KeyInfo::isStandard: { @@ -2296,10 +2441,10 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) nextKeyIdx = getNextKey(); } else { // Split on this key if not at datastart of calling entry - int start = interval.nextNotIgnored(previousStart); + int start = interval_.nextNotIgnored(previousStart); if (start < actual._tokenstart) { - interval.output(os, actual._tokenstart); - interval.addIntervall(start, actual._tokenstart); + interval_.output(os, actual._tokenstart); + interval_.addIntervall(start, actual._tokenstart); } // discard entry if at end of actual nextKeyIdx = process(os, actual); @@ -2312,21 +2457,44 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments); if (tmpIdx > 0) { for (int i = nextKeyIdx; i <= tmpIdx; i++) { - entries[i].disabled = true; + entries_[i].disabled = true; } - actual._dataEnd = entries[tmpIdx]._dataEnd; + actual._dataEnd = entries_[tmpIdx]._dataEnd; } - interval.addIntervall(actual._tokenstart, actual._dataEnd+1); + interval_.addIntervall(actual._tokenstart, actual._dataEnd+1); break; } case KeyInfo::doRemove: { // Remove the key with all parameters and following spaces size_t pos; - for (pos = actual._dataEnd+1; pos < interval.par.length(); pos++) { - if ((interval.par[pos] != ' ') && (interval.par[pos] != '%')) + size_t start; + if (interval_.par[actual._dataEnd-1] == ' ') + start = actual._dataEnd; + else + start = actual._dataEnd+1; + for (pos = start; pos < interval_.par.length(); pos++) { + if ((interval_.par[pos] != ' ') && (interval_.par[pos] != '%')) + break; + } + // Remove also enclosing parentheses [] and {} + int numpars = 0; + int spaces = 0; + while (actual._tokenstart > numpars) { + if (pos+numpars >= interval_.par.size()) break; + else if (interval_.par[pos+numpars] == ']' && interval_.par[actual._tokenstart-numpars-1] == '[') + numpars++; + else if (interval_.par[pos+numpars] == '}' && interval_.par[actual._tokenstart-numpars-1] == '{') + numpars++; + else + break; + } + if (numpars > 0) { + if (interval_.par[pos+numpars] == ' ') + spaces++; } - interval.addIntervall(actual._tokenstart, pos); + + interval_.addIntervall(actual._tokenstart-numpars, pos+numpars+spaces); nextKeyIdx = getNextKey(); break; } @@ -2334,7 +2502,7 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) // Discard space before _tokenstart int count; for (count = 0; count < actual._tokenstart; count++) { - if (interval.par[actual._tokenstart-count-1] != ' ') + if (interval_.par[actual._tokenstart-count-1] != ' ') break; } nextKeyIdx = getNextKey(); @@ -2344,44 +2512,44 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) // with arguments // How else can we catch this one? for (int i = nextKeyIdx; i <= tmpIdx; i++) { - entries[i].disabled = true; + entries_[i].disabled = true; } - actual._dataEnd = entries[tmpIdx]._dataEnd; + actual._dataEnd = entries_[tmpIdx]._dataEnd; } else if (nextKeyIdx > 0) { // Ignore any lang entries inside data region - for (int i = nextKeyIdx; i < int(entries.size()) && entries[i]._tokenstart < actual._dataEnd; i++) { - if (entries[i].keytype == KeyInfo::isMain) - entries[i].disabled = true; + for (int i = nextKeyIdx; i < int(entries_.size()) && entries_[i]._tokenstart < actual._dataEnd; i++) { + if (entries_[i].keytype == KeyInfo::isMain) + entries_[i].disabled = true; } } if (actual.disabled) { - interval.addIntervall(actual._tokenstart-count, actual._dataEnd+1); + interval_.addIntervall(actual._tokenstart-count, actual._dataEnd+1); } else { - interval.addIntervall(actual._tokenstart-count, actual._tokenstart); + interval_.addIntervall(actual._tokenstart-count, actual._tokenstart); } - if (interval.par[actual._dataEnd+1] == '[') { - int posdown = interval.findclosing(actual._dataEnd+2, interval.par.length(), '[', ']'); - if ((interval.par[actual._dataEnd+2] == '{') && - (interval.par[posdown-1] == '}')) { - interval.addIntervall(actual._dataEnd+1,actual._dataEnd+3); - interval.addIntervall(posdown-1, posdown+1); + if (interval_.par[actual._dataEnd+1] == '[') { + int posdown = interval_.findclosing(actual._dataEnd+2, interval_.par.length(), '[', ']'); + if ((interval_.par[actual._dataEnd+2] == '{') && + (interval_.par[posdown-1] == '}')) { + interval_.addIntervall(actual._dataEnd+1,actual._dataEnd+3); + interval_.addIntervall(posdown-1, posdown+1); } else { - interval.addIntervall(actual._dataEnd+1, actual._dataEnd+2); - interval.addIntervall(posdown, posdown+1); + interval_.addIntervall(actual._dataEnd+1, actual._dataEnd+2); + interval_.addIntervall(posdown, posdown+1); } - int blk = interval.nextNotIgnored(actual._dataEnd+1); + int blk = interval_.nextNotIgnored(actual._dataEnd+1); if (blk > posdown) { // Discard at most 1 space after empty item int count; for (count = 0; count < 1; count++) { - if (interval.par[blk+count] != ' ') + if (interval_.par[blk+count] != ' ') break; } if (count > 0) - interval.addIntervall(blk, blk+count); + interval_.addIntervall(blk, blk+count); } } break; @@ -2391,8 +2559,8 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) int count; int val = actual._tokenstart; for (count = 0; count < actual._tokenstart;) { - val = interval.previousNotIgnored(val-1); - if (interval.par[val] != ' ') + val = interval_.previousNotIgnored(val-1); + if (val < 0 || interval_.par[val] != ' ') break; else { count = actual._tokenstart - val; @@ -2402,7 +2570,7 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) removeHead(actual, count); nextKeyIdx = getNextKey(); } else { - interval.addIntervall(actual._tokenstart-count, actual._tokenstart); + interval_.addIntervall(actual._tokenstart-count, actual._tokenstart); nextKeyIdx = process(os, actual); } break; @@ -2424,19 +2592,20 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) break; } case KeyInfo::isMain: { - if (interval.par.substr(actual._dataStart, 2) == "% ") - interval.addIntervall(actual._dataStart, actual._dataStart+2); + if (interval_.par.substr(actual._dataStart, 2) == "% ") + interval_.addIntervall(actual._dataStart, actual._dataStart+2); if (actual._tokenstart > 0) { - int prev = interval.previousNotIgnored(actual._tokenstart - 1); - if ((prev >= 0) && interval.par[prev] == '%') - interval.addIntervall(prev, prev+1); + int prev = interval_.previousNotIgnored(actual._tokenstart - 1); + if ((prev >= 0) && interval_.par[prev] == '%') + interval_.addIntervall(prev, prev+1); } if (actual.disabled) { removeHead(actual); - if ((interval.par.substr(actual._dataStart, 3) == " \\[") || - (interval.par.substr(actual._dataStart, 8) == " \\begin{")) { + interval_.langcount++; + if ((interval_.par.substr(actual._dataStart, 3) == " \\[") || + (interval_.par.substr(actual._dataStart, 8) == " \\begin{")) { // Discard also the space before math-equation - interval.addIntervall(actual._dataStart, actual._dataStart+1); + interval_.addIntervall(actual._dataStart, actual._dataStart+1); } nextKeyIdx = getNextKey(); // interval.resetOpenedP(actual._dataStart-1); @@ -2444,17 +2613,18 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) else { if (actual._tokenstart < 26) { // for the first (and maybe dummy) language - interval.setForDefaultLang(actual); + interval_.setForDefaultLang(actual); } - interval.resetOpenedP(actual._dataStart-1); + interval_.resetOpenedP(actual._dataStart-1); } break; } case KeyInfo::invalid: - // This cannot happen, already handled + case KeyInfo::headRemove: + // These two cases cannot happen, already handled // fall through default: { - // LYXERR0("Unhandled keytype"); + // LYXERR(Debug::INFO, "Unhandled keytype"); nextKeyIdx = getNextKey(); break; } @@ -2462,15 +2632,15 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) return nextKeyIdx; } -int LatexInfo::process(ostringstream &os, KeyInfo &actual ) +int LatexInfo::process(ostringstream & os, KeyInfo const & actual ) { - int end = interval.nextNotIgnored(actual._dataEnd); + int end = interval_.nextNotIgnored(actual._dataEnd); int oldStart = actual._dataStart; int nextKeyIdx = getNextKey(); while (true) { if ((nextKeyIdx < 0) || - (entries[nextKeyIdx]._tokenstart >= actual._dataEnd) || - (entries[nextKeyIdx].keytype == KeyInfo::invalid)) { + (entries_[nextKeyIdx]._tokenstart >= actual._dataEnd) || + (entries_[nextKeyIdx].keytype == KeyInfo::invalid)) { if (oldStart <= end) { processRegion(oldStart, end); oldStart = end+1; @@ -2494,7 +2664,7 @@ int LatexInfo::process(ostringstream &os, KeyInfo &actual ) if (oldStart <= end) { processRegion(oldStart, end); } - if (interval.par[end] == '}') { + if (interval_.par.size() > (size_t) end && interval_.par[end] == '}') { end += 1; // This is the normal case. // But if using the firstlanguage, the closing may be missing @@ -2502,23 +2672,25 @@ int LatexInfo::process(ostringstream &os, KeyInfo &actual ) // get minimum of 'end' and 'actual._dataEnd' in case that the nextKey.keytype was 'KeyInfo::isMain' int output_end; if (actual._dataEnd < end) - output_end = interval.nextNotIgnored(actual._dataEnd); + output_end = interval_.nextNotIgnored(actual._dataEnd); + else if (interval_.par.size() > (size_t) end) + output_end = interval_.nextNotIgnored(end); else - output_end = interval.nextNotIgnored(end); + output_end = interval_.par.size(); if ((actual.keytype == KeyInfo::isMain) && actual.disabled) { - interval.addIntervall(actual._tokenstart, actual._tokenstart+actual._tokensize); + interval_.addIntervall(actual._tokenstart, actual._tokenstart+actual._tokensize); } // Remove possible empty data - int dstart = interval.nextNotIgnored(actual._dataStart); - while (interval.isOpeningPar(dstart) == 1) { - interval.addIntervall(dstart, dstart+1); - int dend = interval.findclosing(dstart+1, output_end); - interval.addIntervall(dend, dend+1); - dstart = interval.nextNotIgnored(dstart+1); + int dstart = interval_.nextNotIgnored(actual._dataStart); + while (interval_.isOpeningPar(dstart) == 1) { + interval_.addIntervall(dstart, dstart+1); + int dend = interval_.findclosing(dstart+1, output_end); + interval_.addIntervall(dend, dend+1); + dstart = interval_.nextNotIgnored(dstart+1); } if (dstart < output_end) - interval.output(os, output_end); - interval.addIntervall(actual._tokenstart, end); + interval_.output(os, output_end); + interval_.addIntervall(actual._tokenstart, end); return nextKeyIdx; } @@ -2526,7 +2698,7 @@ string splitOnKnownMacros(string par, bool isPatternString) { ostringstream os; LatexInfo li(par, isPatternString); - // LYXERR0("Berfore split: " << par); + // LYXERR(Debug::INFO, "Berfore split: " << par); KeyInfo DummyKey = KeyInfo(KeyInfo::KeyType::isMain, 2, true); DummyKey.head = ""; DummyKey._tokensize = 0; @@ -2593,7 +2765,7 @@ string splitOnKnownMacros(string par, bool isPatternString) } else s = par; /* no known macros found */ - // LYXERR0("After split: " << s); + // LYXERR(Debug::INFO, "After split: " << s); return s; } @@ -2602,7 +2774,7 @@ string splitOnKnownMacros(string par, bool isPatternString) * Resulting modified string is set to "", if * the searched tex does not contain all the features in the search pattern */ -static string correctlanguagesetting(string par, bool isPatternString, bool withformat) +static string correctlanguagesetting(string par, bool isPatternString, bool withformat, lyx::Buffer *pbuf = nullptr) { static Features regex_f; static int missed = 0; @@ -2622,8 +2794,24 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with // Split the latex input into pieces which // can be digested by our search engine LYXERR(Debug::FIND, "input: \"" << par << "\""); + if (isPatternString && (pbuf != nullptr)) { // Check if we should disable/enable test for language + // We check for polyglossia, because in runparams.flavor we use Flavor::XeTeX + string doclang = pbuf->params().language->polyglossia(); + static regex langre("\\\\(foreignlanguage)\\{([^\\}]+)\\}"); + smatch sub; + bool toIgnoreLang = true; + for (sregex_iterator it(par.begin(), par.end(), langre), end; it != end; ++it) { + sub = *it; + if (sub.str(2) != doclang) { + toIgnoreLang = false; + break; + } + } + setIgnoreFormat("language", toIgnoreLang); + + } result = splitOnKnownMacros(par.substr(0,parlen), isPatternString); - LYXERR(Debug::FIND, "After split: \"" << result << "\""); + LYXERR(Debug::FIND, "After splitOnKnownMacros:\n\"" << result << "\""); } else result = par.substr(0, parlen); @@ -2636,7 +2824,7 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with string a = it->first; regex_with_format = true; features += " " + a; - // LYXERR0("Identified regex format:" << a); + // LYXERR(Debug::INFO, "Identified regex format:" << a); } LYXERR(Debug::FIND, "Identified Features" << features); @@ -2649,14 +2837,15 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with if (b && ! info[a]) { missed++; LYXERR(Debug::FIND, "Missed(" << missed << " " << a <<", srclen = " << parlen ); - return(""); + return ""; } } + } else { - // LYXERR0("No regex formats"); + // LYXERR(Debug::INFO, "No regex formats"); } - return(result); + return result; } @@ -2666,13 +2855,13 @@ static int identifyClosing(string & t) int open_braces = 0; do { LYXERR(Debug::FIND, "identifyClosing(): t now is '" << t << "'"); - if (regex_replace(t, t, "(.*[^\\\\])\\$" REGEX_EOS, "$1")) + if (regex_replace(t, t, "(.*[^\\\\])\\$$", "$1")) continue; - if (regex_replace(t, t, "(.*[^\\\\]) \\\\\\]" REGEX_EOS, "$1")) + if (regex_replace(t, t, "(.*[^\\\\])\\\\\\]$", "$1")) continue; - if (regex_replace(t, t, "(.*[^\\\\]) \\\\end\\{[a-zA-Z_]*\\*?\\}" REGEX_EOS, "$1")) + if (regex_replace(t, t, "(.*[^\\\\])\\\\end\\{[a-zA-Z_]*\\*?\\}$", "$1")) continue; - if (regex_replace(t, t, "(.*[^\\\\])\\}" REGEX_EOS, "$1")) { + if (regex_replace(t, t, "(.*[^\\\\])\\}$", "$1")) { ++open_braces; continue; } @@ -2684,7 +2873,90 @@ static int identifyClosing(string & t) static int num_replaced = 0; static bool previous_single_replace = true; -MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt) +void MatchStringAdv::CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string) +{ +#if QTSEARCH + // Handle \w properly + QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption; + if (! opt.casesensitive) { + popts |= QRegularExpression::CaseInsensitiveOption; + } + regexp = QRegularExpression(QString::fromStdString(regexp_str), popts); + regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts); + regexError = ""; + if (regexp.isValid() && regexp2.isValid()) { + regexIsValid = true; + // Check '{', '}' pairs inside the regex + int balanced = 0; + int skip = 1; + for (unsigned i = 0; i < par_as_string.size(); i+= skip) { + char c = par_as_string[i]; + if (c == '\\') { + skip = 2; + continue; + } + if (c == '{') + balanced++; + else if (c == '}') { + balanced--; + if (balanced < 0) + break; + } + skip = 1; + } + if (balanced != 0) { + regexIsValid = false; + regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\""; + } + } + else { + regexIsValid = false; + if (!regexp.isValid()) + regexError += "Invalid regexp \"" + regexp_str + "\", error = " + regexp.errorString().toStdString(); + else + regexError += "Invalid regexp2 \"" + regexp2_str + "\", error = " + regexp2.errorString().toStdString(); + } +#else + if (opt.casesensitive) { + regexp = regex(regexp_str); + regexp2 = regex(regexp2_str); + } + else { + regexp = regex(regexp_str, std::regex_constants::icase); + regexp2 = regex(regexp2_str, std::regex_constants::icase); + } +#endif +} + +static void modifyRegexForMatchWord(string &t) +{ + string s(""); + regex wordre("(\\\\)*((\\.|\\\\b))"); + size_t lastpos = 0; + smatch sub; + for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) { + sub = *it; + if ((sub.position(2) - sub.position(0)) % 2 == 1) { + continue; + } + else if (sub.str(2) == "\\\\b") + return; + if (lastpos < (size_t) sub.position(2)) + s += t.substr(lastpos, sub.position(2) - lastpos); + s += "\\S"; + lastpos = sub.position(2) + sub.length(2); + } + if (lastpos == 0) { + s = "\\b" + t + "\\b"; + t = s; + return; + } + else if (lastpos < t.length()) + s += t.substr(lastpos, t.length() - lastpos); + t = "\\b" + s + "\\b"; +} + +MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt) : p_buf(&buf), p_first_buf(&buf), opt(opt) { Buffer & find_buf = *theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true); @@ -2699,61 +2971,67 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & previous_single_replace = true; } // When using regexp, braces are hacked already by escape_for_regex() - par_as_string = normalize(ds, !use_regexp); + par_as_string = normalize(ds); open_braces = 0; close_wildcards = 0; size_t lead_size = 0; // correct the language settings - par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat); - if (opt.ignoreformat) { - if (!use_regexp) { - // if par_as_string_nolead were emty, - // the following call to findAux will always *find* the string - // in the checked data, and thus always using the slow - // examining of the current text part. - par_as_string_nolead = par_as_string; + par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat, &buf); + opt.matchAtStart = false; + if (!use_regexp) { + identifyClosing(par_as_string); // Removes math closings ($, ], ...) at end of string + if (opt.ignoreformat) { + lead_size = 0; } - } else { + else { + lead_size = identifyLeading(par_as_string); + } + lead_as_string = par_as_string.substr(0, lead_size); + string lead_as_regex_string = string2regex(lead_as_string); + par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size); + string par_as_regex_string_nolead = string2regex(par_as_string_nolead); + /* Handle whole words too in this case + */ + if (opt.matchword) { + par_as_regex_string_nolead = "\\b" + par_as_regex_string_nolead + "\\b"; + opt.matchword = false; + } + string regexp_str = "(" + lead_as_regex_string + ")()" + par_as_regex_string_nolead; + string regexp2_str = "(" + lead_as_regex_string + ")(.*?)" + par_as_regex_string_nolead; + CreateRegexp(opt, regexp_str, regexp2_str); + use_regexp = true; + LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); + LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); + return; + } + + if (!opt.ignoreformat) { lead_size = identifyLeading(par_as_string); LYXERR(Debug::FIND, "Lead_size: " << lead_size); lead_as_string = par_as_string.substr(0, lead_size); par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size); } - if (!use_regexp) { - open_braces = identifyClosing(par_as_string); - identifyClosing(par_as_string_nolead); - LYXERR(Debug::FIND, "Open braces: " << open_braces); - LYXERR(Debug::FIND, "Built MatchStringAdv object: par_as_string = '" << par_as_string << "'"); - } else { + // Here we are using regexp + LASSERT(use_regexp, /**/); + { string lead_as_regexp; if (lead_size > 0) { - // @todo No need to search for \regexp{} insets in leading material - lead_as_regexp = escape_for_regex(par_as_string.substr(0, lead_size), !opt.ignoreformat); + lead_as_regexp = string2regex(par_as_string.substr(0, lead_size)); + regex_replace(par_as_string_nolead, par_as_string_nolead, "}$", ""); par_as_string = par_as_string_nolead; LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'"); LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); } + // LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'"); par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat); // Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them. - LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); - if ( - // Insert .* before trailing '\$' ('$' has been escaped by escape_for_regex) - regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\$)\\'", "$1(.*?)$2") - // Insert .* before trailing '\\\]' ('\]' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])( \\\\\\\\\\\\\\])\\'", "$1(.*?)$2") - // Insert .* before trailing '\\end\{...}' ('\end{...}' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, - "(.*[^\\\\])( \\\\\\\\end\\\\\\{[a-zA-Z_]*)(\\\\\\*)?(\\\\\\})\\'", "$1(.*?)$2$3$4") - // Insert .* before trailing '\}' ('}' has been escaped by escape_for_regex) - || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\})\\'", "$1(.*?)$2") - ) { - ++close_wildcards; - } + // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); + ++close_wildcards; + size_t lng = par_as_string.size(); if (!opt.ignoreformat) { // Remove extra '\}' at end if not part of \{\.\} - size_t lng = par_as_string.size(); while(lng > 2) { if (par_as_string.substr(lng-2, 2).compare("\\}") == 0) { if (lng >= 6) { @@ -2768,23 +3046,16 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & } if (lng < par_as_string.size()) par_as_string = par_as_string.substr(0,lng); - /* - // save '\.' - regex_replace(par_as_string, par_as_string, "\\\\\\.", "_xxbdotxx_"); - // handle '.' -> '[^]', replace later as '[^\}\{\\]' - regex_replace(par_as_string, par_as_string, "\\.", "[^]"); - // replace '[^...]' with '[^...\}\{\\]' - regex_replace(par_as_string, par_as_string, "\\[\\^([^\\\\\\]]*)\\]", "_xxbrlxx_$1\\}\\{\\\\_xxbrrxx_"); - regex_replace(par_as_string, par_as_string, "_xxbrlxx_", "[^"); - regex_replace(par_as_string, par_as_string, "_xxbrrxx_", "]"); - // restore '\.' - regex_replace(par_as_string, par_as_string, "_xxbdotxx_", "\\."); - */ } - LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); - LYXERR(Debug::FIND, "Open braces: " << open_braces); - LYXERR(Debug::FIND, "Close .*? : " << close_wildcards); - LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string); + LYXERR(Debug::FIND, "par_as_string after correctRegex is '" << par_as_string << "'"); + if ((lng > 0) && (par_as_string[0] == '^')) { + par_as_string = par_as_string.substr(1); + --lng; + opt.matchAtStart = true; + } + // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'"); + // LYXERR(Debug::FIND, "Open braces: " << open_braces); + // LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string); // If entered regexp must match at begin of searched string buffer // Kornel: Added parentheses to use $1 for size of the leading string @@ -2794,97 +3065,69 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & // TODO: Adapt '\[12345678]' in par_as_string to acount for the first '() // Unfortunately is '\1', '\2', etc not working for strings with extra format // so the convert has no effect in that case - for (int i = 8; i > 0; --i) { + for (int i = 7; i > 0; --i) { string orig = "\\\\" + std::to_string(i); - string dest = "\\" + std::to_string(i+1); + string dest = "\\" + std::to_string(i+2); while (regex_replace(par_as_string, par_as_string, orig, dest)); } - regexp_str = "(" + lead_as_regexp + ")" + par_as_string; - regexp2_str = "(" + lead_as_regexp + ").*?" + par_as_string; + if (opt.matchword) { + modifyRegexForMatchWord(par_as_string); + opt.matchword = false; + } + regexp_str = "(" + lead_as_regexp + ")()" + par_as_string; + regexp2_str = "(" + lead_as_regexp + ")(.*?)" + par_as_string; } LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); - regexp = lyx::regex(regexp_str); - LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); - regexp2 = lyx::regex(regexp2_str); - } -} - - -// Count number of characters in string -// {]} ==> 1 -// \& ==> 1 -// --- ==> 1 -// \\[a-zA-Z]+ ==> 1 -static int computeSize(string s, int len) -{ - if (len == 0) - return 0; - int skip = 1; - int count = 0; - for (int i = 0; i < len; i += skip, count++) { - if (s[i] == '\\') { - skip = 2; - if (isalpha(s[i+1])) { - for (int j = 2; i+j < len; j++) { - if (! isalpha(s[i+j])) { - if (s[i+j] == ' ') - skip++; - else if ((s[i+j] == '{') && s[i+j+1] == '}') - skip += 2; - else if ((s[i+j] == '{') && (i + j + 1 >= len)) - skip++; - break; - } - skip++; - } - } - } - else if (s[i] == '{') { - if (s[i+1] == '}') - skip = 2; - else - skip = 3; - } - else if (s[i] == '-') { - if (s[i+1] == '-') { - if (s[i+2] == '-') - skip = 3; - else - skip = 2; - } - else - skip = 1; - } - else { - skip = 1; - } + CreateRegexp(opt, regexp_str, regexp2_str, par_as_string); } - return count; } MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const { MatchResult mres; + mres.searched_size = len; if (at_begin && (opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) ) return mres; docstring docstr = stringifyFromForSearch(opt, cur, len); - string str = normalize(docstr, true); + string str; + str = normalize(docstr); if (!opt.ignoreformat) { str = correctlanguagesetting(str, false, !opt.ignoreformat); + // remove closing '}' and '\n' to allow for use of '$' in regex + size_t lng = str.size(); + while ((lng > 1) && ((str[lng -1] == '}') || (str[lng -1] == '\n'))) + lng--; + if (lng != str.size()) { + str = str.substr(0, lng); + } } if (str.empty()) { mres.match_len = -1; return mres; } - LYXERR(Debug::FIND, "Matching against '" << lyx::to_utf8(docstr) << "'"); - LYXERR(Debug::FIND, "After normalization: '" << str << "'"); + LYXERR(Debug::FIND, "After normalization: Matching against:\n'" << str << "'"); - if (use_regexp) { + LASSERT(use_regexp, /**/); + { + // use_regexp always true LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin); +#if QTSEARCH + QString qstr = QString::fromStdString(str); + QRegularExpression const *p_regexp; + QRegularExpression::MatchType flags = QRegularExpression::NormalMatch; + if (at_begin) { + p_regexp = ®exp; + } else { + p_regexp = ®exp2; + } + QRegularExpressionMatch match = p_regexp->match(qstr, 0, flags); + if (!match.hasMatch()) + return mres; +#else regex const *p_regexp; regex_constants::match_flag_type flags; if (at_begin) { @@ -2898,84 +3141,106 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be if (re_it == sregex_iterator()) return mres; match_results const & m = *re_it; - - if (0) { // Kornel Benko: DO NOT CHECKK - // Check braces on the segment that matched the entire regexp expression, - // plus the last subexpression, if a (.*?) was inserted in the constructor. - if (!braces_match(m[0].first, m[0].second, open_braces)) - return mres; - } - - // Check braces on segments that matched all (.*?) subexpressions, - // except the last "padding" one inserted by lyx. - for (size_t i = 1; i < m.size() - 1; ++i) - if (!braces_match(m[i].first, m[i].second, open_braces)) - return mres; - - // Exclude from the returned match length any length - // due to close wildcards added at end of regexp - // and also the length of the leading (e.g. '\emph{}') +#endif + // Whole found string, including the leading + // std: m[0].second - m[0].first + // Qt: match.capturedEnd(0) - match.capturedStart(0) // - // Whole found string, including the leading: m[0].second - m[0].first - // Size of the leading string: m[1].second - m[1].first + // Size of the leading string + // std: m[1].second - m[1].first + // Qt: match.capturedEnd(1) - match.capturedStart(1) int leadingsize = 0; - if (m.size() > 1) - leadingsize = m[1].second - m[1].first; - int result; - for (size_t i = 0; i < m.size(); i++) { - LYXERR(Debug::FIND, "Match " << i << " is " << m[i].second - m[i].first << " long"); - } - if (close_wildcards == 0) - result = m[0].second - m[0].first; - - else - result = m[m.size() - close_wildcards].first - m[0].first; - - size_t pos = m.position(size_t(0)); - // Ignore last closing characters - while (result > 0) { - if (str[pos+result-1] == '}') - --result; - else - break; +#if QTSEARCH + if (match.lastCapturedIndex() > 0) { + leadingsize = match.capturedEnd(1) - match.capturedStart(1); } - if (result > leadingsize) - result -= leadingsize; - else - result = 0; - mres.match_len = computeSize(str.substr(pos+leadingsize,result), result); - mres.match2end = str.size() - pos - leadingsize; - mres.pos = pos+leadingsize; - return mres; - } - // else !use_regexp: but all code paths above return - LYXERR(Debug::FIND, "Searching in normal mode: par_as_string='" - << par_as_string << "', str='" << str << "'"); - LYXERR(Debug::FIND, "Searching in normal mode: lead_as_string='" - << lead_as_string << "', par_as_string_nolead='" - << par_as_string_nolead << "'"); - - if (at_begin) { - LYXERR(Debug::FIND, "size=" << par_as_string.size() - << ", substr='" << str.substr(0, par_as_string.size()) << "'"); - if (str.substr(0, par_as_string.size()) == par_as_string) { - mres.match_len = par_as_string.size(); - mres.match2end = str.size(); - mres.pos = 0; - return mres; +#else + if (m.size() > 2) { + leadingsize = m[1].second - m[1].first; } - } else { - // Start the search _after_ the leading part - size_t pos = str.find(par_as_string_nolead, lead_as_string.size()); - if (pos != string::npos) { - mres.match_len = par_as_string.size(); - mres.match2end = str.size() - pos; - mres.pos = pos; - return mres; +#endif +#if QTSEARCH + mres.match_prefix = match.capturedEnd(2) - match.capturedStart(2); + mres.match_len = match.capturedEnd(0) - match.capturedEnd(2); + // because of different number of closing at end of string + // we have to 'unify' the length of the post-match. + // Done by ignoring closing parenthesis and linefeeds at string end + int matchend = match.capturedEnd(0); + size_t strsize = qstr.size(); + if (!opt.ignoreformat) { + while (mres.match_len > 0) { + QChar c = qstr.at(matchend - 1); + if ((c == '\n') || (c == '}') || (c == '{')) { + mres.match_len--; + matchend--; + } + else + break; + } + while (strsize > (size_t) match.capturedEnd(0)) { + QChar c = qstr.at(strsize-1); + if ((c == '\n') || (c == '}')) { + --strsize; + } + else + break; + } } + // LYXERR0(qstr.toStdString()); + mres.match2end = strsize - matchend; + mres.pos = match.capturedStart(2); +#else + mres.match_prefix = m[2].second - m[2].first; + mres.match_len = m[0].second - m[2].second; + // ignore closing parenthesis and linefeeds at string end + size_t strend = m[0].second - m[0].first; + int matchend = strend; + size_t strsize = str.size(); + if (!opt.ignoreformat) { + while (mres.match_len > 0) { + char c = str.at(matchend - 1); + if ((c == '\n') || (c == '}') || (c == '{')) { + mres.match_len--; + matchend--; + } + else + break; + } + while (strsize > strend) { + if ((str.at(strsize-1) == '}') || (str.at(strsize-1) == '\n')) { + --strsize; + } + else + break; + } + } + // LYXERR0(str); + mres.match2end = strsize - matchend; + mres.pos = m[2].first - m[0].first;; +#endif + if (mres.match2end < 0) + mres.match_len = 0; + mres.leadsize = leadingsize; +#if QTSEARCH + if (mres.match_len > 0) { + string a0 = match.captured(0).mid(mres.pos + mres.match_prefix, mres.match_len).toStdString(); + mres.result.push_back(a0); + for (int i = 3; i <= match.lastCapturedIndex(); i++) { + mres.result.push_back(match.captured(i).toStdString()); + } + } +#else + if (mres.match_len > 0) { + string a0 = m[0].str().substr(mres.pos + mres.match_prefix, mres.match_len); + mres.result.push_back(a0); + for (size_t i = 3; i < m.size(); i++) { + mres.result.push_back(m[i]); + } + } +#endif + return mres; } - return mres; } @@ -2985,48 +3250,48 @@ MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at int res = mres.match_len; LYXERR(Debug::FIND, "res=" << res << ", at_begin=" << at_begin - << ", matchword=" << opt.matchword + << ", matchAtStart=" << opt.matchAtStart << ", inTexted=" << cur.inTexted()); - if (res == 0 || !at_begin || !opt.matchword || !cur.inTexted()) + if (opt.matchAtStart) { + if (cur.pos() != 0) + mres.match_len = 0; + else if (mres.match_prefix > 0) + mres.match_len = 0; + return mres; + } + else return mres; - if ((len > 0) && (res < len)) { - mres.match_len = 0; - return mres; - } - Paragraph const & par = cur.paragraph(); - bool ws_left = (cur.pos() > 0) - ? par.isWordSeparator(cur.pos() - 1) - : true; - bool ws_right = (cur.pos() + len < par.size()) - ? par.isWordSeparator(cur.pos() + len) - : true; - LYXERR(Debug::FIND, - "cur.pos()=" << cur.pos() << ", res=" << res - << ", separ: " << ws_left << ", " << ws_right - << ", len: " << len - << endl); - if (ws_left && ws_right) { - // Check for word separators inside the found 'word' - for (int i = 0; i < len; i++) { - if (par.isWordSeparator(cur.pos() + i)) { - mres.match_len = 0; - return mres; - } - } - return mres; - } - mres.match_len = 0; - return mres; } +#if 0 +static bool simple_replace(string &t, string from, string to) +{ + regex repl("(\\\\)*(" + from + ")"); + string s(""); + size_t lastpos = 0; + smatch sub; + for (sregex_iterator it(t.begin(), t.end(), repl), end; it != end; ++it) { + sub = *it; + if ((sub.position(2) - sub.position(0)) % 2 == 1) + continue; + if (lastpos < (size_t) sub.position(2)) + s += t.substr(lastpos, sub.position(2) - lastpos); + s += to; + lastpos = sub.position(2) + sub.length(2); + } + if (lastpos == 0) + return false; + else if (lastpos < t.length()) + s += t.substr(lastpos, t.length() - lastpos); + t = s; + return true; +} +#endif -string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const +string MatchStringAdv::normalize(docstring const & s) const { string t; - if (! opt.casesensitive) - t = lyx::to_utf8(lowercase(s)); - else - t = lyx::to_utf8(s); + t = lyx::to_utf8(s); // Remove \n at begin while (!t.empty() && t[0] == '\n') t = t.substr(1); @@ -3057,24 +3322,12 @@ string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify // Kornel: Added textsl, textsf, textit, texttt and noun // + allow to seach for colored text too - LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, \\*section{} macros from: " << t); + LYXERR(Debug::FIND, "Removing stale empty macros from: " << t); while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", "")) LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", "")) LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t); - while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", "")); - // FIXME - check what preceeds the brace - if (hack_braces) { - if (opt.ignoreformat) - while (regex_replace(t, t, "\\{", "_x_<") - || regex_replace(t, t, "\\}", "_x_>")) - LYXERR(Debug::FIND, "After {} replacement: '" << t << "'"); - else - while (regex_replace(t, t, "\\\\\\{", "_x_<") - || regex_replace(t, t, "\\\\\\}", "_x_>")) - LYXERR(Debug::FIND, "After {} replacement: '" << t << "'"); - } return t; } @@ -3092,15 +3345,22 @@ docstring stringifyFromCursor(DocIterator const & cur, int len) // OutputParams runparams(&cur.buffer()->params().encoding()); OutputParams runparams(encodings.fromLyXName("utf8")); runparams.nice = true; - runparams.flavor = OutputParams::XETEX; + runparams.flavor = Flavor::XeTeX; runparams.linelen = 10000; //lyxrc.plaintext_linelen; // No side effect of file copying and image conversion runparams.dryrun = true; - runparams.for_search = true; + int option = AS_STR_INSETS | AS_STR_PLAINTEXT; + if (ignoreFormats.getDeleted()) { + option |= AS_STR_SKIPDELETE; + runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + } + else { + runparams.for_searchAdv = OutputParams::SearchWithDeleted; + } LYXERR(Debug::FIND, "Stringifying with cur: " << cur << ", from pos: " << cur.pos() << ", end: " << end); return par.asString(cur.pos(), end, - AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT, + option, &runparams); } else if (cur.inMathed()) { CursorSlice cs = cur.top(); @@ -3128,9 +3388,11 @@ docstring stringifyFromCursor(DocIterator const & cur, int len) */ docstring latexifyFromCursor(DocIterator const & cur, int len) { + /* LYXERR(Debug::FIND, "Latexifying with len=" << len << " from cursor at pos: " << cur); LYXERR(Debug::FIND, " with cur.lastpost=" << cur.lastpos() << ", cur.lastrow=" << cur.lastrow() << ", cur.lastcol=" << cur.lastcol()); + */ Buffer const & buf = *cur.buffer(); odocstringstream ods; @@ -3138,11 +3400,16 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) //OutputParams runparams(&buf.params().encoding()); OutputParams runparams(encodings.fromLyXName("utf8")); runparams.nice = false; - runparams.flavor = OutputParams::XETEX; + runparams.flavor = Flavor::XeTeX; runparams.linelen = 8000; //lyxrc.plaintext_linelen; // No side effect of file copying and image conversion runparams.dryrun = true; - runparams.for_search = true; + if (ignoreFormats.getDeleted()) { + runparams.for_searchAdv = OutputParams::SearchWithoutDeleted; + } + else { + runparams.for_searchAdv = OutputParams::SearchWithDeleted; + } if (cur.inTexted()) { // @TODO what about searching beyond/across paragraph breaks ? @@ -3159,7 +3426,7 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) for (int s = cur.depth() - 1; s >= 0; --s) { CursorSlice const & cs = cur[s]; if (cs.asInsetMath() && cs.asInsetMath()->asHullInset()) { - WriteStream ws(os); + TeXMathStream ws(os); cs.asInsetMath()->asHullInset()->header_write(ws); break; } @@ -3183,7 +3450,7 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) CursorSlice const & cs2 = cur[s]; InsetMath * inset = cs2.asInsetMath(); if (inset && inset->asHullInset()) { - WriteStream ws(os); + TeXMathStream ws(os); inset->asHullInset()->footer_write(ws); break; } @@ -3195,93 +3462,161 @@ docstring latexifyFromCursor(DocIterator const & cur, int len) return ods.str(); } +#if defined(ResultsDebug) +// Debugging output +static void displayMResult(MatchResult &mres, string from, DocIterator & cur) +{ + LYXERR0( "from:\t\t\t" << from); + string status; + if (mres.pos_len > 0) { + // Set in finalize + status = "FINALSEARCH"; + } + else { + if (mres.match_len > 0) { + if ((mres.match_prefix == 0) && (mres.pos == mres.leadsize)) + status = "Good Match"; + else + status = "Matched in"; + } + else + status = "MissedSearch"; + } + + LYXERR0( status << "(" << cur.pos() << " ... " << mres.searched_size + cur.pos() << ") cur.lastpos(" << cur.lastpos() << ")"); + if ((mres.leadsize > 0) || (mres.match_len > 0) || (mres.match2end > 0)) + LYXERR0( "leadsize(" << mres.leadsize << ") match_len(" << mres.match_len << ") match2end(" << mres.match2end << ")"); + if ((mres.pos > 0) || (mres.match_prefix > 0)) + LYXERR0( "pos(" << mres.pos << ") match_prefix(" << mres.match_prefix << ")"); + for (size_t i = 0; i < mres.result.size(); i++) + LYXERR0( "Match " << i << " = \"" << mres.result[i] << "\""); +} + #define displayMres(s, txt, cur) displayMResult(s, txt, cur); +#else + #define displayMres(s, txt, cur) +#endif /** Finalize an advanced find operation, advancing the cursor to the innermost ** position that matches, plus computing the length of the matching text to ** be selected + ** Return the cur.pos() difference between start and end of found match **/ -int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match) +MatchResult findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, MatchResult const & expected = MatchResult(-1)) { // Search the foremost position that matches (avoids find of entire math // inset when match at start of it) - size_t d; DocIterator old_cur(cur.buffer()); - do { - LYXERR(Debug::FIND, "Forwarding one step (searching for innermost match)"); - d = cur.depth(); + MatchResult mres; + static MatchResult fail = MatchResult(); + MatchResult max_match; + // If (prefix_len > 0) means that forwarding 1 position will remove the complete entry + // Happens with e.g. hyperlinks + // either one sees "http://www.bla.bla" or nothing + // so the search for "www" gives prefix_len = 7 (== sizeof("http://") + // and although we search for only 3 chars, we find the whole hyperlink inset + bool at_begin = (expected.match_prefix == 0); + if (!match.opt.forward && match.opt.ignoreformat) { + if (expected.pos > 0) + return fail; + } + LASSERT(at_begin, /**/); + if (expected.match_len > 0 && at_begin) { + // Search for deepest match old_cur = cur; - cur.forwardPos(); - } while (cur && cur.depth() > d && match(cur).match_len > 0); - cur = old_cur; - int max_match = match(cur).match_len; /* match valid only if not searching whole words */ - if (max_match <= 0) return 0; + max_match = expected; + do { + size_t d = cur.depth(); + cur.forwardPos(); + if (!cur) + break; + if (cur.depth() < d) + break; + if (cur.depth() == d) + break; + size_t lastd = d; + while (cur && cur.depth() > lastd) { + lastd = cur.depth(); + mres = match(cur, -1, at_begin); + displayMres(mres, "Checking innermost", cur); + if (mres.match_len > 0) + break; + // maybe deeper? + cur.forwardPos(); + } + if (mres.match_len < expected.match_len) + break; + max_match = mres; + old_cur = cur;; + } while(1); + cur = old_cur; + } + else { + // (expected.match_len <= 0) + mres = match(cur); /* match valid only if not searching whole words */ + displayMres(mres, "Start with negative match", cur); + max_match = mres; + } + if (max_match.match_len <= 0) return fail; LYXERR(Debug::FIND, "Ok"); // Compute the match length - int len = 1; + int len = 1; if (cur.pos() + len > cur.lastpos()) - return 0; - if (match.opt.matchword) { - LYXERR(Debug::FIND, "verifying unmatch with len = " << len); - while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) { - ++len; - LYXERR(Debug::FIND, "verifying unmatch with len = " << len); - } - // Length of matched text (different from len param) - int old_match = match(cur, len).match_len; - if (old_match < 0) - old_match = 0; - int new_match; + return fail; + + LASSERT(match.use_regexp, /**/); + { + int minl = 1; + int maxl = cur.lastpos() - cur.pos(); // Greedy behaviour while matching regexps - while ((new_match = match(cur, len + 1).match_len) > old_match) { - ++len; - old_match = new_match; - LYXERR(Debug::FIND, "verifying match with len = " << len); + while (maxl > minl) { + MatchResult mres2; + mres2 = match(cur, len, at_begin); + displayMres(mres2, "Finalize loop", cur); + int actual_match_len = mres2.match_len; + if (actual_match_len >= max_match.match_len) { + // actual_match_len > max_match _can_ happen, + // if the search area splits + // some following word so that the regex + // (e.g. 'r.*r\b' matches 'r' from the middle of the + // splitted word) + // This means, the len value is too big + actual_match_len = max_match.match_len; + max_match = mres2; + max_match.match_len = actual_match_len; + maxl = len; + if (maxl - minl < 4) + len = (int)((maxl + minl)/2); + else + len = (int)(minl + (maxl - minl + 3)/4); + } + else { + // (actual_match_len < max_match.match_len) + minl = len + 1; + len = (int)((maxl + minl)/2); + } } - if (old_match == 0) - len = 0; - } - else { - int minl = 1; - int maxl = cur.lastpos() - cur.pos(); - // Greedy behaviour while matching regexps - while (maxl > minl) { - int actual_match = match(cur, len).match_len; - if (actual_match >= max_match) { - // actual_match > max_match _can_ happen, - // if the search area splits - // some following word so that the regex - // (e.g. 'r.*r\b' matches 'r' from the middle of the - // splitted word) - // This means, the len value is too big - maxl = len; - len = (int)((maxl + minl)/2); - } - else { - // (actual_match < max_match) - minl = len + 1; - len = (int)((maxl + minl)/2); - } - } + len = minl; old_cur = cur; // Search for real start of matched characters while (len > 1) { - int actual_match; + MatchResult actual_match; do { cur.forwardPos(); } while (cur.depth() > old_cur.depth()); /* Skip inner insets */ if (cur.depth() < old_cur.depth()) { // Outer inset? - LYXERR0("cur.depth() < old_cur.depth(), this should never happen"); + LYXERR(Debug::INFO, "cur.depth() < old_cur.depth(), this should never happen"); break; } if (cur.pos() != old_cur.pos()) { // OK, forwarded 1 pos in actual inset - actual_match = match(cur, len-1).match_len; - if (actual_match == max_match) { + actual_match = match(cur, len-1, at_begin); + if (actual_match.match_len == max_match.match_len) { // Ha, got it! The shorter selection has the same match length len--; old_cur = cur; + max_match = actual_match; } else { // OK, the shorter selection matches less chars, revert to previous value @@ -3290,110 +3625,132 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match) } } else { - LYXERR0("cur.pos() == old_cur.pos(), this should never happen"); - actual_match = match(cur, len).match_len; - if (actual_match == max_match) + LYXERR(Debug::INFO, "cur.pos() == old_cur.pos(), this should never happen"); + actual_match = match(cur, len, at_begin); + if (actual_match.match_len == max_match.match_len) { old_cur = cur; + max_match = actual_match; + } } } - } - return len; + if (len == 0) + return fail; + else { + max_match.pos_len = len; + displayMres(max_match, "SEARCH RESULT", cur) + return max_match; + } + } } - /// Finds forward int findForwardAdv(DocIterator & cur, MatchStringAdv & match) { if (!cur) return 0; + bool repeat = false; + DocIterator orig_cur; // to be used if repeat not successful + MatchResult orig_mres; while (!theApp()->longOperationCancelled() && cur) { + //(void) findAdvForwardInnermost(cur); LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur); MatchResult mres = match(cur, -1, false); + string msg = "Starting"; + if (repeat) + msg = "Repeated"; + displayMres(mres, msg + " findForwardAdv", cur) int match_len = mres.match_len; - LYXERR(Debug::FIND, "match_len: " << match_len); if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) { - LYXERR0("BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end); + LYXERR(Debug::INFO, "BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end); match_len = 0; } - if (match_len > 0) { + if (match_len <= 0) { + // This should exit nested insets, if any, or otherwise undefine the currsor. + cur.pos() = cur.lastpos(); + LYXERR(Debug::FIND, "Advancing pos: cur=" << cur); + cur.forwardPos(); + } + else { // match_len > 0 // Try to find the begin of searched string - int increment = mres.pos/2; - while (mres.pos > 5 && (increment > 5)) { + int increment; + int firstInvalid = cur.lastpos() - cur.pos(); + { + int incrmatch = (mres.match_prefix + mres.pos - mres.leadsize + 1)*3/4; + int incrcur = (firstInvalid + 1 )*3/4; + if (incrcur < incrmatch) + increment = incrcur; + else + increment = incrmatch; + if (increment < 1) + increment = 1; + } + LYXERR(Debug::FIND, "Set increment to " << increment); + while (increment > 0) { DocIterator old_cur = cur; - for (int i = 0; i < increment && cur; cur.forwardPos(), i++) { - } - if (! cur || (cur.pit() > old_cur.pit())) { - // Are we outside of the paragraph? - // This can happen if moving past some UTF8-encoded chars - cur = old_cur; + if (cur.pos() + increment >= cur.lastpos()) { increment /= 2; + continue; } - else { - MatchResult mres2 = match(cur, -1, false); - if ((mres2.match2end < mres.match2end) || - (mres2.match_len < mres.match_len)) { + cur.pos() = cur.pos() + increment; + MatchResult mres2 = match(cur, -1, false); + displayMres(mres2, "findForwardAdv loop", cur) + switch (interpretMatch(mres, mres2)) { + case MatchResult::newIsTooFar: + // behind the expected match + firstInvalid = increment; cur = old_cur; increment /= 2; - } - else { + break; + case MatchResult::newIsBetter: + // not reached yet, but cur.pos()+increment is bettert mres = mres2; - increment -= 2; - if (increment > mres.pos/2) - increment = mres.pos/2; - } - } - } - int match_len_zero_count = 0; - for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) { - if (i++ > 10) { - int remaining_len = match(cur, -1, false).match_len; - if (remaining_len <= 0) { - // Apparently the searched string is not in the remaining part + firstInvalid -= increment; + if (increment > firstInvalid*3/4) + increment = firstInvalid*3/4; + if ((mres2.pos == mres2.leadsize) && (increment >= mres2.match_prefix)) { + if (increment >= mres2.match_prefix) + increment = (mres2.match_prefix+1)*3/4; + } break; - } - else { - i = 0; - } + default: + // Todo@ + // Handle not like MatchResult::newIsTooFar + LYXERR0( "Probably too far: Increment = " << increment << " match_prefix = " << mres.match_prefix); + firstInvalid--; + increment = increment*3/4; + cur = old_cur; + break; } - LYXERR(Debug::FIND, "Advancing cur: " << cur); - int match_len3 = match(cur, 1).match_len; - if (match_len3 < 0) + } + if (mres.match_len > 0) { + if (mres.match_prefix + mres.pos - mres.leadsize > 0) { + // The match seems to indicate some deeper level + repeat = true; + orig_cur = cur; + orig_mres = mres; + cur.forwardPos(); continue; - int match_len2 = match(cur).match_len; - LYXERR(Debug::FIND, "match_len2: " << match_len2); - if (match_len2 > 0) { - // Sometimes in finalize we understand it wasn't a match - // and we need to continue the outest loop - int len = findAdvFinalize(cur, match); - if (len > 0) { - return len; - } - } - if (match_len2 >= 0) { - if (match_len2 == 0) - match_len_zero_count++; - else - match_len_zero_count = 0; - } - else { - if (++match_len_zero_count > 3) { - LYXERR(Debug::FIND, "match_len2_zero_count: " << match_len_zero_count << ", match_len was " << match_len); - match_len_zero_count = 0; - } - break; } } - if (!cur) - return 0; - } - if (match_len >= 0 && cur.pit() < cur.lastpit()) { - LYXERR(Debug::FIND, "Advancing par: cur=" << cur); - cur.forwardPar(); - } else { - // This should exit nested insets, if any, or otherwise undefine the currsor. - cur.pos() = cur.lastpos(); - LYXERR(Debug::FIND, "Advancing pos: cur=" << cur); - cur.forwardPos(); + else if (repeat) { + // should never be reached. + cur = orig_cur; + mres = orig_mres; + } + // LYXERR0("Leaving first loop"); + LYXERR(Debug::FIND, "Finalizing 1"); + MatchResult found_match = findAdvFinalize(cur, match, mres); + if (found_match.match_len > 0) { + LASSERT(found_match.pos_len > 0, /**/); + match.FillResults(found_match); + return found_match.pos_len; + } + else { + // try next possible match + cur.forwardPos(); + repeat = false; + continue; + } } } return 0; @@ -3401,11 +3758,12 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match) /// Find the most backward consecutive match within same paragraph while searching backwards. -int findMostBackwards(DocIterator & cur, MatchStringAdv const & match) +MatchResult findMostBackwards(DocIterator & cur, MatchStringAdv const & match, MatchResult &expected) { - DocIterator cur_begin = doc_iterator_begin(cur.buffer()); + DocIterator cur_begin = cur; + cur_begin.pos() = 0; DocIterator tmp_cur = cur; - int len = findAdvFinalize(tmp_cur, match); + MatchResult mr = findAdvFinalize(tmp_cur, match, expected); Inset & inset = cur.inset(); for (; cur != cur_begin; cur.backwardPos()) { LYXERR(Debug::FIND, "findMostBackwards(): cur=" << cur); @@ -3413,13 +3771,13 @@ int findMostBackwards(DocIterator & cur, MatchStringAdv const & match) new_cur.backwardPos(); if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len) break; - int new_len = findAdvFinalize(new_cur, match); - if (new_len == len) + MatchResult new_mr = findAdvFinalize(new_cur, match, expected); + if (new_mr.match_len == mr.match_len) break; - len = new_len; + mr = new_mr; } LYXERR(Debug::FIND, "findMostBackwards(): exiting with cur=" << cur); - return len; + return mr; } @@ -3437,9 +3795,9 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) bool pit_changed = false; do { cur.pos() = 0; - bool found_match = (match(cur, -1, false).match_len > 0); + MatchResult found_match = match(cur, -1, false); - if (found_match) { + if (found_match.match_len > 0) { if (pit_changed) cur.pos() = cur.lastpos(); else @@ -3447,11 +3805,16 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match) LYXERR(Debug::FIND, "findBackAdv2: cur: " << cur); DocIterator cur_prev_iter; do { - found_match = (match(cur).match_len > 0); + found_match = match(cur); LYXERR(Debug::FIND, "findBackAdv3: found_match=" - << found_match << ", cur: " << cur); - if (found_match) - return findMostBackwards(cur, match); + << (found_match.match_len > 0) << ", cur: " << cur); + if (found_match.match_len > 0) { + MatchResult found_mr = findMostBackwards(cur, match, found_match); + if (found_mr.pos_len > 0) { + match.FillResults(found_mr); + return found_mr.pos_len; + } + } // Stop if begin of document reached if (cur == cur_begin) @@ -3488,13 +3851,13 @@ docstring stringifyFromForSearch(FindAndReplaceOptions const & opt, FindAndReplaceOptions::FindAndReplaceOptions( - docstring const & find_buf_name, bool casesensitive, - bool matchword, bool forward, bool expandmacros, bool ignoreformat, - docstring const & repl_buf_name, bool keep_case, - SearchScope scope, SearchRestriction restr, bool replace_all) - : find_buf_name(find_buf_name), casesensitive(casesensitive), matchword(matchword), - forward(forward), expandmacros(expandmacros), ignoreformat(ignoreformat), - repl_buf_name(repl_buf_name), keep_case(keep_case), scope(scope), restr(restr), replace_all(replace_all) + docstring const & _find_buf_name, bool _casesensitive, + bool _matchword, bool _forward, bool _expandmacros, bool _ignoreformat, + docstring const & _repl_buf_name, bool _keep_case, + SearchScope _scope, SearchRestriction _restr, bool _replace_all) + : find_buf_name(_find_buf_name), casesensitive(_casesensitive), matchword(_matchword), + forward(_forward), expandmacros(_expandmacros), ignoreformat(_ignoreformat), + repl_buf_name(_repl_buf_name), keep_case(_keep_case), scope(_scope), restr(_restr), replace_all(_replace_all) { } @@ -3546,20 +3909,47 @@ static bool firstUppercase(Cursor const & cur) static void changeFirstCase(Buffer & buffer, TextCase first_case, TextCase others_case) { ParagraphList::iterator pit = buffer.paragraphs().begin(); - LASSERT(pit->size() >= 1, /**/); + LASSERT(!pit->empty(), /**/); pos_type right = pos_type(1); pit->changeCase(buffer.params(), pos_type(0), right, first_case); right = pit->size(); pit->changeCase(buffer.params(), pos_type(1), right, others_case); } - } // namespace +static bool replaceMatches(string &t, int maxmatchnum, vector const & replacements) +{ + // Should replace the string "$" + std::to_string(matchnum) with replacement + // if the char '$' is not prefixed with odd number of char '\\' + static regex const rematch("(\\\\)*(\\$\\$([0-9]))"); + string s; + size_t lastpos = 0; + smatch sub; + for (sregex_iterator it(t.begin(), t.end(), rematch), end; it != end; ++it) { + sub = *it; + if ((sub.position(2) - sub.position(0)) % 2 == 1) + continue; + int num = stoi(sub.str(3), nullptr, 10); + if (num >= maxmatchnum) + continue; + if (lastpos < (size_t) sub.position(2)) + s += t.substr(lastpos, sub.position(2) - lastpos); + s += replacements[num]; + lastpos = sub.position(2) + sub.length(2); + } + if (lastpos == 0) + return false; + else if (lastpos < t.length()) + s += t.substr(lastpos, t.length() - lastpos); + t = s; + return true; +} + /// static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv) { Cursor & cur = bv->cursor(); - if (opt.repl_buf_name == docstring() + if (opt.repl_buf_name.empty() || theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true) == 0 || theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true) == 0) return 0; @@ -3581,10 +3971,13 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma return 0; // Build a copy of the replace buffer, adapted to the KeepCase option - Buffer & repl_buffer_orig = *theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true); + Buffer const & repl_buffer_orig = *theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true); ostringstream oss; repl_buffer_orig.write(oss); string lyx = oss.str(); + if (matchAdv.valid_matches > 0) { + replaceMatches(lyx, matchAdv.valid_matches, matchAdv.matches); + } Buffer repl_buffer("", false); repl_buffer.setUnnamed(true); LASSERT(repl_buffer.readString(lyx), return 0); @@ -3615,7 +4008,7 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma // OutputParams runparams(&repl_buffer.params().encoding()); OutputParams runparams(encodings.fromLyXName("utf8")); runparams.nice = false; - runparams.flavor = OutputParams::XETEX; + runparams.flavor = Flavor::XeTeX; runparams.linelen = 8000; //lyxrc.plaintext_linelen; runparams.dryrun = true; TeXOnePar(repl_buffer, repl_buffer.text(), 0, os, runparams); @@ -3645,10 +4038,10 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma /// Perform a FindAdv operation. -bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) +bool findAdv(BufferView * bv, FindAndReplaceOptions & opt) { DocIterator cur; - int match_len = 0; + int pos_len = 0; // e.g., when invoking word-findadv from mini-buffer wither with // wrong options syntax or before ever opening advanced F&R pane @@ -3657,22 +4050,27 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) try { MatchStringAdv matchAdv(bv->buffer(), opt); +#if QTSEARCH + if (!matchAdv.regexIsValid) { + bv->message(lyx::from_utf8(matchAdv.regexError)); + return(false); + } +#endif int length = bv->cursor().selectionEnd().pos() - bv->cursor().selectionBegin().pos(); if (length > 0) bv->putSelectionAt(bv->cursor().selectionBegin(), length, !opt.forward); num_replaced += findAdvReplace(bv, opt, matchAdv); cur = bv->cursor(); if (opt.forward) - match_len = findForwardAdv(cur, matchAdv); + pos_len = findForwardAdv(cur, matchAdv); else - match_len = findBackwardsAdv(cur, matchAdv); - } catch (...) { - // This may only be raised by lyx::regex() - bv->message(_("Invalid regular expression!")); + pos_len = findBackwardsAdv(cur, matchAdv); + } catch (exception & ex) { + bv->message(from_utf8(ex.what())); return false; } - if (match_len == 0) { + if (pos_len == 0) { if (num_replaced > 0) { switch (num_replaced) { @@ -3699,8 +4097,13 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) else bv->message(_("Match found.")); - LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << match_len); - bv->putSelectionAt(cur, match_len, !opt.forward); + if (cur.pos() + pos_len > cur.lastpos()) { + // Prevent crash in bv->putSelectionAt() + // Should never happen, maybe LASSERT() here? + pos_len = cur.lastpos() - cur.pos(); + } + LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << pos_len); + bv->putSelectionAt(cur, pos_len, !opt.forward); return true; } @@ -3728,7 +4131,7 @@ ostringstream & operator<<(ostringstream & os, FindAndReplaceOptions const & opt istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) { - LYXERR(Debug::FIND, "parsing"); + // LYXERR(Debug::FIND, "parsing"); string s; string line; getline(is, line); @@ -3740,7 +4143,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) break; getline(is, line); } - LYXERR(Debug::FIND, "file_buf_name: '" << s << "'"); + // LYXERR(Debug::FIND, "file_buf_name: '" << s << "'"); opt.find_buf_name = from_utf8(s); is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat >> opt.replace_all; is.get(); // Waste space before replace string @@ -3754,7 +4157,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) break; getline(is, line); } - LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'"); + // LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'"); opt.repl_buf_name = from_utf8(s); is >> opt.keep_case; int i; @@ -3763,9 +4166,11 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt) is >> i; opt.restr = FindAndReplaceOptions::SearchRestriction(i); + /* LYXERR(Debug::FIND, "parsed: " << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' ' << opt.expandmacros << ' ' << opt.ignoreformat << ' ' << opt.keep_case << ' ' << opt.scope << ' ' << opt.restr); + */ return is; }