static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres)
{
- if (newres.match2end < oldres.match2end)
+ int range = oldres.match_len;
+ if (range < 2) range = 2;
+ if (newres.match2end < oldres.match2end - oldres.match_len)
return MatchResult::newIsTooFar;
if (newres.match_len < oldres.match_len)
return MatchResult::newIsTooFar;
- if ((newres.match_len == oldres.match_len) && (newres.match2end == oldres.match2end))
- return MatchResult::newIsBetter;
- if ((newres.match_len == oldres.match_len) && (newres.match2end -2 == oldres.match2end)) {
- // The string contained for instance "\usepackage...fontenc ..."
- // and now after moved 9 char forward contains "ge...{fontenc} ..."
- // so we accept it as OK
+ if ((newres.match_len == oldres.match_len) &&
+ (newres.match2end < oldres.match2end + range) &&
+ (newres.match2end > oldres.match2end - range)) {
return MatchResult::newIsBetter;
}
return MatchResult::newIsInvalid;
void LatexInfo::buildEntries(bool isPatternString)
{
- static regex const rmath("\\$|\\\\\\[|\\\\\\]|\\\\(begin|end)\\{((eqnarray|equation|flalign|gather|multline|align|alignat)\\*?)\\}");
- static regex const rkeys("\\$|\\\\\\[|\\\\\\]|\\\\((([a-zA-Z]+\\*?)(\\{([a-z]+\\*?)\\}|=[0-9]+[a-z]+)?))");
+ static regex const rmath("(\\\\)*(\\$|\\\\\\[|\\\\\\]|\\\\(begin|end)\\{((eqnarray|equation|flalign|gather|multline|align|alignat)\\*?)\\})");
+ static regex const rkeys("(\\\\)*(\\$|\\\\\\[|\\\\\\]|\\\\((([a-zA-Z]+\\*?)(\\{([a-z]+\\*?)\\}|=[0-9]+[a-z]+)?)))");
static bool disableLanguageOverride = false;
smatch sub, submath;
bool evaluatingRegexp = false;
for (sregex_iterator itmath(interval_.par.begin(), interval_.par.end(), rmath), end; itmath != end; ++itmath) {
submath = *itmath;
+ if ((submath.position(2) - submath.position(0)) %2 == 1) {
+ // prefixed by odd count of '\\'
+ continue;
+ }
if (math_end_waiting) {
- size_t pos = submath.position(size_t(0));
+ size_t pos = submath.position(size_t(2));
if ((math_end == "$") &&
- (submath.str(0) == "$") &&
- (interval_.par[pos-1] != '\\')) {
+ (submath.str(2) == "$")) {
mi.insert("$", math_pos, pos + 1);
math_end_waiting = false;
}
else if ((math_end == "\\]") &&
- (submath.str(0) == "\\]")) {
+ (submath.str(2) == "\\]")) {
mi.insert("\\]", math_pos, pos + 2);
math_end_waiting = false;
}
- else if ((submath.str(1).compare("end") == 0) &&
- (submath.str(2).compare(math_end) == 0)) {
- mi.insert(math_end, math_pos, pos + submath.str(0).length());
+ else if ((submath.str(3).compare("end") == 0) &&
+ (submath.str(4).compare(math_end) == 0)) {
+ mi.insert(math_end, math_pos, pos + submath.str(2).length());
math_end_waiting = false;
}
else
continue;
}
else {
- if (submath.str(1).compare("begin") == 0) {
+ if (submath.str(3).compare("begin") == 0) {
math_end_waiting = true;
- math_end = submath.str(2);
- math_pos = submath.position(size_t(0));
+ math_end = submath.str(4);
+ math_pos = submath.position(size_t(2));
}
- else if (submath.str(0).compare("\\[") == 0) {
+ else if (submath.str(2).compare("\\[") == 0) {
math_end_waiting = true;
math_end = "\\]";
- math_pos = submath.position(size_t(0));
+ math_pos = submath.position(size_t(2));
}
- else if (submath.str(0) == "$") {
- size_t pos = submath.position(size_t(0));
- if ((pos == 0) || (interval_.par[pos-1] != '\\')) {
- math_end_waiting = true;
- math_end = "$";
- math_pos = pos;
- }
+ else if (submath.str(2) == "$") {
+ size_t pos = submath.position(size_t(2));
+ math_end_waiting = true;
+ math_end = "$";
+ math_pos = pos;
}
}
}
math_pos = mi.getFirstPos();
for (sregex_iterator it(interval_.par.begin(), interval_.par.end(), rkeys), end; it != end; ++it) {
sub = *it;
- string key = sub.str(3);
+ if ((sub.position(2) - sub.position(0)) %2 == 1) {
+ // prefixed by odd count of '\\'
+ continue;
+ }
+ string key = sub.str(5);
if (key == "") {
- if (sub.str(0)[0] == '\\')
- key = sub.str(0)[1];
+ if (sub.str(2)[0] == '\\')
+ key = sub.str(2)[1];
else {
- key = sub.str(0);
+ key = sub.str(2);
if (key == "$") {
- size_t k_pos = sub.position(size_t(0));
+ size_t k_pos = sub.position(size_t(2));
if ((k_pos > 0) && (interval_.par[k_pos - 1] == '\\')) {
// Escaped '$', ignoring
continue;
}
}
}
- };
+ }
if (keys.find(key) != keys.end()) {
if (keys[key].keytype == KeyInfo::headRemove) {
KeyInfo found1 = keys[key];
found1.disabled = true;
found1.head = "\\" + key + "{";
- found1._tokenstart = sub.position(size_t(0));
+ found1._tokenstart = sub.position(size_t(2));
found1._tokensize = found1.head.length();
found1._dataStart = found1._tokenstart + found1.head.length();
int endpos = interval_.findclosing(found1._dataStart, interval_.par.length(), '{', '}', 1);
}
}
if (evaluatingRegexp) {
- if (sub.str(1).compare("endregexp") == 0) {
+ if (sub.str(3).compare("endregexp") == 0) {
evaluatingRegexp = false;
// found._tokenstart already set
- found._dataEnd = sub.position(size_t(0)) + 13;
+ found._dataEnd = sub.position(size_t(2)) + 13;
found._dataStart = found._dataEnd;
found._tokensize = found._dataEnd - found._tokenstart;
found.parenthesiscount = 0;
}
else {
if (evaluatingMath) {
- if (size_t(sub.position(size_t(0))) < mi.getEndPos())
+ if (size_t(sub.position(size_t(2))) < mi.getEndPos())
continue;
evaluatingMath = false;
mi.incrEntry();
found = keys[key];
if (key.compare("regexp") == 0) {
evaluatingRegexp = true;
- found._tokenstart = sub.position(size_t(0));
+ found._tokenstart = sub.position(size_t(2));
found._tokensize = 0;
continue;
}
if (found.keytype == KeyInfo::isIgnored)
continue;
else if (found.keytype == KeyInfo::isMath) {
- if (size_t(sub.position(size_t(0))) == math_pos) {
+ if (size_t(sub.position(size_t(2))) == math_pos) {
found = keys[key];
- found._tokenstart = sub.position(size_t(0));
+ found._tokenstart = sub.position(size_t(2));
found._tokensize = mi.getSize();
found._dataEnd = found._tokenstart + found._tokensize;
found._dataStart = found._dataEnd;
bool discardComment;
found = keys[key];
found.keytype = KeyInfo::doRemove;
- if ((sub.str(5).compare("longtable") == 0) ||
- (sub.str(5).compare("tabular") == 0)) {
+ if ((sub.str(7).compare("longtable") == 0) ||
+ (sub.str(7).compare("tabular") == 0)) {
discardComment = true; /* '%' */
}
else {
discardComment = false;
static regex const removeArgs("^(multicols|multipar|sectionbox|subsectionbox|tcolorbox)$");
smatch sub2;
- string token = sub.str(5);
+ string token = sub.str(7);
if (regex_match(token, sub2, removeArgs)) {
found.keytype = KeyInfo::removeWithArg;
}
}
- // discard spaces before pos(0)
- int pos = sub.position(size_t(0));
+ // discard spaces before pos(2)
+ int pos = sub.position(size_t(2));
int count;
for (count = 0; pos - count > 0; count++) {
char c = interval_.par[pos-count-1];
break;
}
found._tokenstart = pos - count;
- if (sub.str(1).compare(0, 5, "begin") == 0) {
- size_t pos1 = pos + sub.str(0).length();
- if (sub.str(5).compare("cjk") == 0) {
+ if (sub.str(3).compare(0, 5, "begin") == 0) {
+ size_t pos1 = pos + sub.str(2).length();
+ if (sub.str(7).compare("cjk") == 0) {
pos1 = interval_.findclosing(pos1+1, interval_.par.length()) + 1;
if ((interval_.par[pos1] == '{') && (interval_.par[pos1+1] == '}'))
pos1 += 2;
}
else {
// Handle "\end{...}"
- found._dataStart = pos + sub.str(0).length();
+ found._dataStart = pos + sub.str(2).length();
found._dataEnd = found._dataStart;
found._tokensize = count + found._dataEnd - pos;
found.parenthesiscount = 0;
}
}
else if (found.keytype != KeyInfo::isRegex) {
- found._tokenstart = sub.position(size_t(0));
+ found._tokenstart = sub.position(size_t(2));
if (found.parenthesiscount == 0) {
// Probably to be discarded
- size_t following_pos = sub.position(size_t(0)) + sub.str(3).length() + 1;
+ size_t following_pos = sub.position(size_t(2)) + sub.str(5).length() + 1;
char following = interval_.par[following_pos];
if (following == ' ')
- found.head = "\\" + sub.str(3) + " ";
+ found.head = "\\" + sub.str(5) + " ";
else if (following == '=') {
// like \uldepth=1000pt
- found.head = sub.str(0);
+ found.head = sub.str(2);
}
else
found.head = "\\" + key;
evaluatingOptional = true;
optionalEnd = optend;
}
- string token = sub.str(5);
+ string token = sub.str(7);
int closings;
if (interval_.par[optend] != '{') {
closings = 0;
}
else if (found.parenthesiscount > 1) {
if (token != "") {
- found.head = sub.str(0) + "{";
+ found.head = sub.str(2) + "{";
closings = found.parenthesiscount - 1;
}
else {
if (found.keytype == KeyInfo::doRemove) {
if (closings > 0) {
size_t endpar = 2 + interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings);
- if (endpar >= interval_.par.length())
- found._dataStart = interval_.par.length();
- else
- found._dataStart = endpar;
+ if (endpar >= interval_.par.length())
+ found._dataStart = interval_.par.length();
+ else
+ found._dataStart = endpar;
found._tokensize = found._dataStart - found._tokenstart;
}
else {
string lead_as_regexp;
if (lead_size > 0) {
// @todo No need to search for \regexp{} insets in leading material
- lead_as_regexp = escape_for_regex(par_as_string.substr(0, lead_size), !opt.ignoreformat);
+ static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\\])" };
+ lead_as_regexp = std::regex_replace(par_as_string.substr(0, lead_size), specialChars, R"(\$&)" );
+ // lead_as_regexp = escape_for_regex(par_as_string.substr(0, lead_size), !opt.ignoreformat);
par_as_string = par_as_string_nolead;
LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'");
LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
return mres;
}
+static bool simple_replace(string &t, string from, string to)
+{
+ regex repl("(\\\\)*(" + from + ")");
+ string s("");
+ size_t lastpos = 0;
+ smatch sub;
+ for (sregex_iterator it(t.begin(), t.end(), repl), end; it != end; ++it) {
+ sub = *it;
+ if ((sub.position(2) - sub.position(0)) % 2 == 1)
+ continue;
+ if (lastpos < (size_t) sub.position(2))
+ s += t.substr(lastpos, sub.position(2) - lastpos);
+ s += to;
+ lastpos = sub.position(2) + sub.length(2);
+ }
+ if (lastpos == 0)
+ return false;
+ else if (lastpos < t.length())
+ s += t.substr(lastpos, t.length() - lastpos);
+ t = s;
+ return true;
+}
string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const
{
while (regex_replace(t, t, "\\{", "_x_<")
|| regex_replace(t, t, "\\}", "_x_>"))
LYXERR(Debug::FIND, "After {} replacement: '" << t << "'");
- else
- while (regex_replace(t, t, "\\\\\\{", "_x_<")
- || regex_replace(t, t, "\\\\\\}", "_x_>"))
- LYXERR(Debug::FIND, "After {} replacement: '" << t << "'");
+ else {
+ simple_replace(t, "\\\\\\{", "_x_<");
+ simple_replace(t, "\\\\\\}", "_x_>");
+ LYXERR(Debug::FIND, "After {} replacement: '" << t << "'");
+ }
}
return t;
#define displayMres(s,i)
#endif
-/** Finalize an advanced find operation, advancing the cursor to the innermost
- ** position that matches, plus computing the length of the matching text to
- ** be selected
- **/
-int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, int expected_len)
+static bool findAdvForwardInnermost(DocIterator & cur)
{
- // Search the foremost position that matches (avoids find of entire math
- // inset when match at start of it)
size_t d;
DocIterator old_cur(cur.buffer());
- MatchResult mres;
+ int forwardCount = 0;
do {
- LYXERR(Debug::FIND, "Forwarding one step (searching for innermost match)");
d = cur.depth();
old_cur = cur;
cur.forwardPos();
- if (!cur)
+ if (!cur) {
break;
- if (cur.depth() > d)
+ }
+ if (cur.depth() > d) {
+ forwardCount++;
continue;
+ }
if (cur.depth() == d)
break;
- mres = match(cur);
- displayMres(mres, 1);
+ } while(1);
+ cur = old_cur;
+ if (forwardCount > 0) {
+ LYXERR(Debug::FIND, "Forwarded " << forwardCount << " step(s) (searching for innermost match)");
+ return true;;
+ }
+ else
+ return false;
+}
+
+/** Finalize an advanced find operation, advancing the cursor to the innermost
+ ** position that matches, plus computing the length of the matching text to
+ ** be selected
+ **/
+int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, int expected_len, int prefix_len = 0)
+{
+ // Search the foremost position that matches (avoids find of entire math
+ // inset when match at start of it)
+ DocIterator old_cur(cur.buffer());
+ MatchResult mres;
+ int max_match;
+ // If (prefix_len > 0) means that forwarding 1 position will remove the complete entry
+ // Happens with e.g. hyperlinks
+ // either one sees "http://www.bla.bla" or nothing
+ // so the search for "www" gives prefix_len = 7 (== sizeof("http://")
+ // and although we search for only 3 chars, we find the whole hyperlink inset
+ bool at_begin = (prefix_len == 0);
+ if (findAdvForwardInnermost(cur)) {
+ mres = match(cur, -1, at_begin);
+ displayMres(mres, 0);
if (expected_len > 0) {
if (mres.match_len < expected_len)
- break;
+ return 0;
}
else {
if (mres.match_len <= 0)
- break;
+ return 0;
}
- } while (1);
- cur = old_cur;
- mres = match(cur); /* match valid only if not searching whole words */
- int max_match = mres.match_len;
+ max_match = mres.match_len;
+ }
+ else if (expected_len < 0) {
+ mres = match(cur); /* match valid only if not searching whole words */
+ displayMres(mres, 0);
+ max_match = mres.match_len;
+ }
+ else {
+ max_match = expected_len;
+ }
if (max_match <= 0) return 0;
LYXERR(Debug::FIND, "Ok");
LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
}
// Length of matched text (different from len param)
- int old_match = match(cur, len).match_len;
+ int old_match = match(cur, len, at_begin).match_len;
if (old_match < 0)
old_match = 0;
int new_match;
// Greedy behaviour while matching regexps
- while ((new_match = match(cur, len + 1).match_len) > old_match) {
+ while ((new_match = match(cur, len + 1, at_begin).match_len) > old_match) {
++len;
old_match = new_match;
LYXERR(Debug::FIND, "verifying match with len = " << len);
int maxl = cur.lastpos() - cur.pos();
// Greedy behaviour while matching regexps
while (maxl > minl) {
- int actual_match = match(cur, len).match_len;
+ MatchResult mres2;
+ mres2 = match(cur, len, at_begin);
+ displayMres(mres2, len);
+ int actual_match = mres2.match_len;
if (actual_match >= max_match) {
// actual_match > max_match _can_ happen,
// if the search area splits
}
if (cur.pos() != old_cur.pos()) {
// OK, forwarded 1 pos in actual inset
- actual_match = match(cur, len-1).match_len;
+ actual_match = match(cur, len-1, at_begin).match_len;
if (actual_match == max_match) {
// Ha, got it! The shorter selection has the same match length
len--;
}
else {
LYXERR(Debug::INFO, "cur.pos() == old_cur.pos(), this should never happen");
- actual_match = match(cur, len).match_len;
+ actual_match = match(cur, len, at_begin).match_len;
if (actual_match == max_match)
old_cur = cur;
}
if (!cur)
return 0;
while (!theApp()->longOperationCancelled() && cur) {
- {
- // forward to
- size_t d;
- DocIterator old_cur(cur.buffer());
- do {
- d = cur.depth();
- old_cur = cur;
- cur.forwardPos();
- if (!cur)
- break;
- if (cur.depth() > d)
- continue;
- if (cur.depth() == d)
- break;
- } while (1);
- cur = old_cur;
- }
-
+ (void) findAdvForwardInnermost(cur);
LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
MatchResult mres = match(cur, -1, false);
displayMres(mres,-1)
// LYXERR0("Leaving first loop");
{
LYXERR(Debug::FIND, "Finalizing 1");
- int len = findAdvFinalize(cur, match, mres.match_len);
+ int len = findAdvFinalize(cur, match, mres.match_len, mres.match_prefix);
if (len > 0)
return len;
else {
match_len = findForwardAdv(cur, matchAdv);
else
match_len = findBackwardsAdv(cur, matchAdv);
- } catch (...) {
- // This may only be raised by lyx::regex()
- bv->message(_("Invalid regular expression!"));
+ } catch (exception & ex) {
+ bv->message(from_utf8(ex.what()));
return false;
}