From f500a287d4c2a5d18321762ffe65d207d5678d24 Mon Sep 17 00:00:00 2001 From: Kornel Benko Date: Fri, 1 Jan 2021 21:50:36 +0100 Subject: [PATCH] FindAdv: Try to make regex search with format enabled somehow faster --- src/lyxfind.cpp | 155 ++++++++++++++++++++++++++++-------------------- 1 file changed, 92 insertions(+), 63 deletions(-) diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp index 3e8f5e8594..5038662ec5 100644 --- a/src/lyxfind.cpp +++ b/src/lyxfind.cpp @@ -51,9 +51,17 @@ #include #include -#include // sets QT_VERSION -#if (QT_VERSION >= 0x050000) -#include +#define USE_QT_FOR_SEARCH +#if defined(USE_QT_FOR_SEARCH) + #include // sets QT_VERSION + #if (QT_VERSION >= 0x050000) + #include + #define QTSEARCH 1 + #else + #define QTSEARCH 0 + #endif +#else + #define QTSEARCH 0 #endif using namespace std; @@ -797,7 +805,7 @@ bool regex_replace(string const & s, string & t, string const & searchstr, ** @param unmatched ** Number of open braces that must remain open at the end for the verification to succeed. **/ -#if (QT_VERSION >= 0x050000) +#if QTSEARCH bool braces_match(QString const & beg, int unmatched = 0) #else @@ -806,7 +814,7 @@ bool braces_match(string const & beg, #endif { int open_pars = 0; -#if (QT_VERSION >= 0x050000) +#if QTSEARCH LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg.toStdString() << "'"); #else LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg << "'"); @@ -814,7 +822,7 @@ bool braces_match(string const & beg, int lastidx = beg.size(); for (int i=0; i < lastidx; ++i) { // Skip escaped braces in the count -#if (QT_VERSION >= 0x050000) +#if QTSEARCH QChar c = beg.at(i); #else char c = beg.at(i); @@ -847,9 +855,11 @@ bool braces_match(string const & beg, class MatchResult { public: int match_len; + int match_prefix; int match2end; int pos; - MatchResult(): match_len(0),match2end(0), pos(0) {}; + int leadsize; + MatchResult(): match_len(0),match_prefix(0),match2end(0), pos(0),leadsize(0) {}; }; /** The class performing a match between a position in the document and the FindAdvOptions. @@ -870,7 +880,7 @@ public: ** The length of the matching text, or zero if no match was found. **/ MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const; -#if (QT_VERSION >= 0x050000) +#if QTSEARCH bool regexIsValid; string regexError; #endif @@ -905,7 +915,7 @@ private: string par_as_string; // regular expression to use for searching // regexp2 is same as regexp, but prefixed with a ".*?" -#if (QT_VERSION >= 0x050000) +#if QTSEARCH QRegularExpression regexp; QRegularExpression regexp2; #else @@ -2372,6 +2382,7 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual) } case KeyInfo::isText: interval_.addIntervall(actual._tokenstart, actual._tokenstart+1); + nextKeyIdx = getNextKey(); break; case KeyInfo::noContent: { /* char like "\hspace{2cm}" */ if (actual.disabled) @@ -2952,17 +2963,17 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & // TODO: Adapt '\[12345678]' in par_as_string to acount for the first '() // Unfortunately is '\1', '\2', etc not working for strings with extra format // so the convert has no effect in that case - for (int i = 8; i > 0; --i) { + for (int i = 7; i > 0; --i) { string orig = "\\\\" + std::to_string(i); - string dest = "\\" + std::to_string(i+1); + string dest = "\\" + std::to_string(i+2); while (regex_replace(par_as_string, par_as_string, orig, dest)); } - regexp_str = "(" + lead_as_regexp + ")" + par_as_string; - regexp2_str = "(" + lead_as_regexp + ").*?" + par_as_string; + regexp_str = "(" + lead_as_regexp + ")()" + par_as_string; + regexp2_str = "(" + lead_as_regexp + ")(.*?)" + par_as_string; } LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); -#if (QT_VERSION >= 0x050000) +#if QTSEARCH // Handle \w properly QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption; if (! opt.casesensitive) { @@ -3022,7 +3033,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & // \& ==> 1 // --- ==> 1 // \\[a-zA-Z]+ ==> 1 -#if (QT_VERSION >= 0x050000) +#if QTSEARCH static int computeSize(QStringRef s, int len) #define isLyxAlpha(arg) arg.isLetter() #else @@ -3103,7 +3114,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be if (use_regexp) { LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin); -#if (QT_VERSION >= 0x050000) +#if QTSEARCH QString qstr = QString::fromStdString(str); QRegularExpression const *p_regexp; QRegularExpression::MatchType flags = QRegularExpression::NormalMatch; @@ -3117,7 +3128,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be return mres; // Check braces on segments that matched all (.*?) subexpressions, // except the last "padding" one inserted by lyx. - for (int i = 1; i < match.lastCapturedIndex(); ++i) + for (int i = 3; i < match.lastCapturedIndex(); ++i) if (!braces_match(match.captured(i), open_braces)) return mres; #else @@ -3136,7 +3147,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be match_results const & m = *re_it; // Check braces on segments that matched all (.*?) subexpressions, // except the last "padding" one inserted by lyx. - for (size_t i = 1; i < m.size() - 1; ++i) + for (size_t i = 3; i < m.size() - 1; ++i) if (!braces_match(m[i], open_braces)) return mres; #endif @@ -3149,10 +3160,10 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be int leadingsize = 0; int result; size_t pos; -#if (QT_VERSION >= 0x050000) - if (match.lastCapturedIndex() > 0) +#if QTSEARCH + if (match.lastCapturedIndex() > 0) { leadingsize = match.capturedEnd(1) - match.capturedStart(1); - + } int lastidx = match.lastCapturedIndex(); for (int i = 0; i <= lastidx; i++) { LYXERR(Debug::FIND, "Match " << i << " is " << match.capturedEnd(i) - match.capturedStart(i) << " long"); @@ -3162,17 +3173,11 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be else result = match.capturedStart(lastidx + 1 - close_wildcards) - match.capturedStart(0); - pos = match.capturedStart(0); - // Ignore last closing characters - while (result > 0) { - if (qstr.at(pos+result-1) == '}') - --result; - else - break; - } + pos = match.capturedStart(1); #else - if (m.size() > 1) + if (m.size() > 2) { leadingsize = m[1].second - m[1].first; + } for (size_t i = 0; i < m.size(); i++) { LYXERR(Debug::FIND, "Match " << i << " is " << m[i].second - m[i].first << " long"); } @@ -3180,27 +3185,23 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be result = m[0].second - m[0].first; else result = m[m.size() - close_wildcards].first - m[0].first; - pos = m.position(size_t(0)); - // Ignore last closing characters - while (result > 0) { - if (str[pos+result-1] == '}') - --result; - else - break; - } + pos = m.position(size_t(1)); #endif if (result > leadingsize) result -= leadingsize; else result = 0; -#if (QT_VERSION >= 0x050000) - mres.match_len = computeSize(QStringRef(&qstr, pos+leadingsize,result), result); - mres.match2end = qstr.size() - pos - leadingsize; +#if QTSEARCH + mres.match_prefix = match.capturedEnd(2) - match.capturedStart(2); + mres.match_len = computeSize(QStringRef(&qstr, pos+leadingsize,result), result) - mres.match_prefix; + mres.match2end = qstr.size() - pos - leadingsize - mres.match_prefix; #else - mres.match_len = computeSize(str.substr(pos+leadingsize,result), result); - mres.match2end = str.size() - pos - leadingsize; + mres.match_prefix = = m[2].second - m[2].first; + mres.match_len = computeSize(str.substr(pos+leadingsize,result), result) - mres.match_prefix; + mres.match2end = str.size() - pos - leadingsize - mres.match_prefix; #endif - mres.pos = pos+leadingsize; + mres.pos = pos+leadingsize + mres.match_prefix; + mres.leadsize = leadingsize; return mres; } @@ -3558,6 +3559,20 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match) } +#if 0 +static void displayMResult(MatchResult &mres) +{ + LYXERR0( "pos: " << mres.pos); + LYXERR0( "leadsize: " << mres.leadsize); + LYXERR0( "match_len: " << mres.match_len); + LYXERR0( "match_prefix: " << mres.match_prefix); + LYXERR0( "match2end: " << mres.match2end); +} + #define displayMres(s) displayMResult(s); +#else + #define displayMres(s) +#endif + /// Finds forward int findForwardAdv(DocIterator & cur, MatchStringAdv const & match) { @@ -3566,16 +3581,18 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv const & match) while (!theApp()->longOperationCancelled() && cur) { LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur); MatchResult mres = match(cur, -1, false); + displayMres(mres) int match_len = mres.match_len; - LYXERR(Debug::FIND, "match_len: " << match_len); if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) { LYXERR(Debug::INFO, "BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end); match_len = 0; } if (match_len > 0) { // Try to find the begin of searched string - int increment = mres.pos/2; - while (mres.pos > 5 && (increment > 5)) { + int increment; + increment = mres.match_prefix/2; + LYXERR(Debug::FIND, "Set increment to " << increment); + while (mres.match_prefix > 1 && (increment > 1)) { DocIterator old_cur = cur; for (int i = 0; i < increment && cur; cur.forwardPos(), i++) { } @@ -3587,23 +3604,32 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv const & match) } else { MatchResult mres2 = match(cur, -1, false); + displayMres(mres2) if ((mres2.match2end < mres.match2end) || (mres2.match_len < mres.match_len)) { cur = old_cur; increment /= 2; } - else { + else if ((mres2.match2end == mres.match2end) && (mres2.match_len == mres.match_len)) { + // next part with the same increment as before mres = mres2; - increment -= 2; - if (increment > mres.pos/2) - increment = mres.pos/2; + if (increment > mres.match_prefix/2) + increment = mres.match_prefix/2; + } + else { + // Something wrong here + LYXERR0( "Increment = " << increment << " match_prefix = " << mres.match_prefix); + break; } } } int match_len_zero_count = 0; + MatchResult mres3; for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) { - if (i++ > 10) { - int remaining_len = match(cur, -1, false).match_len; + if (i++ > 3) { + mres3 = match(cur, -1, false); + displayMres(mres3) + int remaining_len = mres3.match_len; if (remaining_len <= 0) { // Apparently the searched string is not in the remaining part break; @@ -3613,26 +3639,29 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv const & match) } } LYXERR(Debug::FIND, "Advancing cur: " << cur); - int match_len3 = match(cur, 1).match_len; + mres3 = match(cur, 1); + displayMres(mres3) + int match_len3 = mres3.match_len; if (match_len3 < 0) continue; - int match_len2 = match(cur).match_len; + mres3 = match(cur); + displayMres(mres3) + int match_len2 = mres3.match_len; LYXERR(Debug::FIND, "match_len2: " << match_len2); if (match_len2 > 0) { // Sometimes in finalize we understand it wasn't a match // and we need to continue the outest loop + LYXERR(Debug::FIND, "Finalizing"); int len = findAdvFinalize(cur, match); if (len > 0) { return len; } } - if (match_len2 >= 0) { - if (match_len2 == 0) - match_len_zero_count++; - else - match_len_zero_count = 0; - } - else { + if (match_len2 > 0) + match_len_zero_count = 0; + else if (match_len2 == 0) + match_len_zero_count++; + if (match_len2 < 0) { if (++match_len_zero_count > 3) { LYXERR(Debug::FIND, "match_len2_zero_count: " << match_len_zero_count << ", match_len was " << match_len); } @@ -3913,7 +3942,7 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt) try { MatchStringAdv matchAdv(bv->buffer(), opt); -#if (QT_VERSION >= 0x050000) +#if QTSEARCH if (!matchAdv.regexIsValid) { bv->message(lyx::from_utf8(matchAdv.regexError)); return(false); -- 2.39.5