namespace lyx {
+typedef map<string, string> AccentsMap;
+static AccentsMap accents = map<string, string>();
// Helper class for deciding what should be ignored
class IgnoreFormats {
// This causes a minor bug as undo will restore this selection,
// which the user did not create (#8986).
cur.innerText()->selectWord(cur, WHOLE_WORD);
- searchstr = cur.selectionAsString(false);
+ searchstr = cur.selectionAsString(false, true);
}
// if we still don't have a search string, report the error
return make_pair(false, 0);
bool have_selection = cur.selection();
- docstring const selected = cur.selectionAsString(false);
+ docstring const selected = cur.selectionAsString(false, true);
bool match =
case_sens
? searchstr == selected
bool matchword = parse_bool(howto);
bool forward = parse_bool(howto);
- return findOne(bv, search, casesensitive, matchword, forward, true, true);
+ return findOne(bv, search, casesensitive, matchword, forward, false, true);
}
-bool lyxreplace(BufferView * bv,
- FuncRequest const & ev, bool has_deleted)
+bool lyxreplace(BufferView * bv, FuncRequest const & ev)
{
if (!bv || ev.action() != LFUN_WORD_REPLACE)
return false;
bool update = false;
- if (!has_deleted) {
- int replace_count = 0;
- if (all) {
- replace_count = replaceAll(bv, search, rplc, casesensitive, matchword);
- update = replace_count > 0;
- } else {
- pair<bool, int> rv =
- replaceOne(bv, search, rplc, casesensitive, matchword, forward, findnext);
- update = rv.first;
- replace_count = rv.second;
- }
+ int replace_count = 0;
+ if (all) {
+ replace_count = replaceAll(bv, search, rplc, casesensitive, matchword);
+ update = replace_count > 0;
+ } else {
+ pair<bool, int> rv =
+ replaceOne(bv, search, rplc, casesensitive, matchword, forward, findnext);
+ update = rv.first;
+ replace_count = rv.second;
+ }
- Buffer const & buf = bv->buffer();
- if (!update) {
- // emit message signal.
- buf.message(_("String not found."));
+ Buffer const & buf = bv->buffer();
+ if (!update) {
+ // emit message signal.
+ buf.message(_("String not found."));
+ } else {
+ if (replace_count == 0) {
+ buf.message(_("String found."));
+ } else if (replace_count == 1) {
+ buf.message(_("String has been replaced."));
} else {
- if (replace_count == 0) {
- buf.message(_("String found."));
- } else if (replace_count == 1) {
- buf.message(_("String has been replaced."));
- } else {
- docstring const str =
- bformat(_("%1$d strings have been replaced."), replace_count);
- buf.message(str);
- }
+ docstring const str =
+ bformat(_("%1$d strings have been replaced."), replace_count);
+ buf.message(str);
}
- } else if (findnext) {
- // if we have deleted characters, we do not replace at all, but
- // rather search for the next occurence
- if (findOne(bv, search, casesensitive, matchword, forward, true, findnext))
- update = true;
- else
- bv->message(_("String not found."));
}
return update;
}
typedef vector<pair<string, string> > Escapes;
-/// A map of symbols and their escaped equivalent needed within a regex.
-/// @note Beware of order
-Escapes const & get_regexp_escapes()
-{
- typedef std::pair<std::string, std::string> P;
-
- static Escapes escape_map;
- if (escape_map.empty()) {
- escape_map.push_back(P("$", "_x_$"));
- escape_map.push_back(P("{", "_x_{"));
- escape_map.push_back(P("}", "_x_}"));
- escape_map.push_back(P("[", "_x_["));
- escape_map.push_back(P("]", "_x_]"));
- escape_map.push_back(P("(", "_x_("));
- escape_map.push_back(P(")", "_x_)"));
- escape_map.push_back(P("+", "_x_+"));
- escape_map.push_back(P("*", "_x_*"));
- escape_map.push_back(P(".", "_x_."));
- escape_map.push_back(P("\\", "(?:\\\\|\\\\backslash)"));
- escape_map.push_back(P("~", "(?:\\\\textasciitilde|\\\\sim)"));
- escape_map.push_back(P("^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\textasciicircum|\\\\mathcircumflex)"));
- escape_map.push_back(P("_x_", "\\"));
- }
- return escape_map;
-}
-
-/// A map of lyx escaped strings and their unescaped equivalent.
-Escapes const & get_lyx_unescapes()
+string string2regex(string in)
{
- typedef std::pair<std::string, std::string> P;
-
- static Escapes escape_map;
- if (escape_map.empty()) {
- escape_map.push_back(P("\\%", "%"));
- escape_map.push_back(P("\\{", "{"));
- escape_map.push_back(P("\\}", "}"));
- escape_map.push_back(P("\\mathcircumflex ", "^"));
- escape_map.push_back(P("\\mathcircumflex", "^"));
- escape_map.push_back(P("\\backslash ", "\\"));
- escape_map.push_back(P("\\backslash", "\\"));
- escape_map.push_back(P("\\sim ", "~"));
- escape_map.push_back(P("\\sim", "~"));
+ static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\$\\])" };
+ string temp = std::regex_replace(in, specialChars, R"(\$&)" );
+ string temp2("");
+ size_t lastpos = 0;
+ size_t fl_pos = 0;
+ int offset = 1;
+ while (fl_pos < temp.size()) {
+ fl_pos = temp.find("\\\\foreignlanguage", lastpos + offset);
+ if (fl_pos == string::npos)
+ break;
+ offset = 16;
+ temp2 += temp.substr(lastpos, fl_pos - lastpos);
+ temp2 += "\\n";
+ lastpos = fl_pos;
}
- return escape_map;
-}
-
-/// A map of escapes turning a regexp matching text to one matching latex.
-Escapes const & get_regexp_latex_escapes()
-{
- typedef std::pair<std::string, std::string> P;
-
- static Escapes escape_map;
- if (escape_map.empty()) {
- escape_map.push_back(P("\\\\", "(?:\\\\\\\\|\\\\backslash|\\\\textbackslash\\{\\}|\\\\textbackslash)"));
- escape_map.push_back(P("(<?!\\\\\\\\textbackslash)\\{", "\\\\\\{"));
- escape_map.push_back(P("(<?!\\\\\\\\textbackslash\\\\\\{)\\}", "\\\\\\}"));
- escape_map.push_back(P("\\[", "\\{\\[\\}"));
- escape_map.push_back(P("\\]", "\\{\\]\\}"));
- escape_map.push_back(P("\\^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\textasciicircum|\\\\mathcircumflex)"));
- escape_map.push_back(P("%", "\\\\\\%"));
- escape_map.push_back(P("#", "\\\\#"));
+ if (lastpos == 0)
+ return(temp);
+ if (lastpos < temp.size()) {
+ temp2 += temp.substr(lastpos, temp.size() - lastpos);
}
- return escape_map;
+ return temp2;
}
-/** @todo Probably the maps need to be migrated to regexps, in order to distinguish if
- ** the found occurrence were escaped.
- **/
-string apply_escapes(string s, Escapes const & escape_map)
+string correctRegex(string t, bool withformat)
{
- LYXERR(Debug::FIND, "Escaping: '" << s << "'");
- Escapes::const_iterator it;
- for (it = escape_map.begin(); it != escape_map.end(); ++it) {
-// LYXERR(Debug::FIND, "Escaping " << it->first << " as " << it->second);
- unsigned int pos = 0;
- while (pos < s.length() && (pos = s.find(it->first, pos)) < s.length()) {
- s.replace(pos, it->first.length(), it->second);
- LYXERR(Debug::FIND, "After escape: " << s);
- pos += it->second.length();
-// LYXERR(Debug::FIND, "pos: " << pos);
+ /* Convert \backslash => \
+ * and \{, \}, \[, \] => {, }, [, ]
+ */
+ string s("");
+ regex wordre("(\\\\)*(\\\\((backslash|mathcircumflex) ?|[\\[\\]\\{\\}]))");
+ size_t lastpos = 0;
+ smatch sub;
+ bool backslashed = false;
+ for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) {
+ sub = *it;
+ string replace;
+ if ((sub.position(2) - sub.position(0)) % 2 == 1) {
+ continue;
}
+ else {
+ if (sub.str(4) == "backslash") {
+ replace = "\\";
+ if (withformat) {
+ // transforms '\backslash \{' into '\{'
+ // and '\{' into '{'
+ string next = t.substr(sub.position(2) + sub.str(2).length(), 2);
+ if ((next == "\\{") || (next == "\\}")) {
+ replace = "";
+ backslashed = true;
+ }
+ }
+ }
+ else if (sub.str(4) == "mathcircumflex")
+ replace = "^";
+ else if (backslashed) {
+ backslashed = false;
+ if (withformat && (sub.str(3) == "{"))
+ replace = accents["braceleft"];
+ else if (withformat && (sub.str(3) == "}"))
+ replace = accents["braceright"];
+ else {
+ // else part should not exist
+ LASSERT(1, /**/);
+ }
+ }
+ else
+ replace = sub.str(3);
+ }
+ if (lastpos < (size_t) sub.position(2))
+ s += t.substr(lastpos, sub.position(2) - lastpos);
+ s += replace;
+ lastpos = sub.position(2) + sub.length(2);
}
- LYXERR(Debug::FIND, "Escaped : '" << s << "'");
+ if (lastpos == 0)
+ return t;
+ else if (lastpos < t.length())
+ s += t.substr(lastpos, t.length() - lastpos);
return s;
}
-
/// Within \regexp{} apply get_lyx_unescapes() only (i.e., preserve regexp semantics of the string),
/// while outside apply get_lyx_unescapes()+get_regexp_escapes().
/// If match_latex is true, then apply regexp_latex_escapes() to \regexp{} contents as well.
-string escape_for_regex(string s, bool match_latex)
+string escape_for_regex(string s, bool withformat)
{
- size_t pos = 0;
- while (pos < s.size()) {
- size_t new_pos = s.find("\\regexp{", pos);
- if (new_pos == string::npos)
- new_pos = s.size();
- string t;
- if (new_pos > pos) {
- // outside regexp
- LYXERR(Debug::FIND, "new_pos: " << new_pos);
- t = apply_escapes(s.substr(pos, new_pos - pos), get_lyx_unescapes());
- LYXERR(Debug::FIND, "t [lyx]: " << t);
- t = apply_escapes(t, get_regexp_escapes());
- LYXERR(Debug::FIND, "t [rxp]: " << t);
- s.replace(pos, new_pos - pos, t);
- new_pos = pos + t.size();
- LYXERR(Debug::FIND, "Regexp after escaping: " << s);
- LYXERR(Debug::FIND, "new_pos: " << new_pos);
- if (new_pos == s.size())
- break;
- }
- // Might fail if \\endregexp{} is preceeded by unexpected stuff (weird escapes)
- size_t end_pos = s.find("\\endregexp{}}", new_pos + 8);
- LYXERR(Debug::FIND, "end_pos: " << end_pos);
- t = s.substr(new_pos + 8, end_pos - (new_pos + 8));
- LYXERR(Debug::FIND, "t in regexp : " << t);
- t = apply_escapes(t, get_lyx_unescapes());
- LYXERR(Debug::FIND, "t in regexp after unescapes [lyx]: " << t);
- if (match_latex) {
- t = apply_escapes(t, get_regexp_latex_escapes());
- LYXERR(Debug::FIND, "t in regexp after latex_escapes [ltx]: " << t);
+ size_t lastpos = 0;
+ string result = "";
+ while (lastpos < s.size()) {
+ size_t regex_pos = s.find("\\regexp{", lastpos);
+ if (regex_pos == string::npos) {
+ regex_pos = s.size();
}
- if (end_pos == s.size()) {
- s.replace(new_pos, end_pos - new_pos, t);
- LYXERR(Debug::FIND, "Regexp after \\regexp{} removal: " << s);
- break;
+ if (regex_pos > lastpos) {
+ result += string2regex(s.substr(lastpos, regex_pos-lastpos));
+ lastpos = regex_pos;
+ if (lastpos == s.size())
+ break;
}
- s.replace(new_pos, end_pos + 13 - new_pos, t);
- LYXERR(Debug::FIND, "Regexp after \\regexp{...\\endregexp{}} removal: " << s);
- pos = new_pos + t.size();
- LYXERR(Debug::FIND, "pos: " << pos);
+ size_t end_pos = s.find("\\endregexp{}}", regex_pos + 8);
+ result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)), withformat);
+ lastpos = end_pos + 13;
}
- return s;
+ return result;
}
return rv;
}
-#if 0
-/** Checks if supplied string segment is well-formed from the standpoint of matching open-closed braces.
- **
- ** Verify that closed braces exactly match open braces. This avoids that, for example,
- ** \frac{.*}{x} matches \frac{x+\frac{y}{x}}{z} with .* being 'x+\frac{y'.
- **
- ** @param unmatched
- ** Number of open braces that must remain open at the end for the verification to succeed.
- **/
-#if QTSEARCH
-bool braces_match(QString const & beg,
- int unmatched = 0)
-#else
-bool braces_match(string const & beg,
- int unmatched = 0)
-#endif
-{
- int open_pars = 0;
-#if QTSEARCH
- LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg.toStdString() << "'");
-#else
- LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg << "'");
-#endif
- int lastidx = beg.size();
- for (int i=0; i < lastidx; ++i) {
- // Skip escaped braces in the count
-#if QTSEARCH
- QChar c = beg.at(i);
-#else
- char c = beg.at(i);
-#endif
- if (c == '\\') {
- ++i;
- if (i >= lastidx)
- break;
- } else if (c == '{') {
- ++open_pars;
- } else if (c == '}') {
- if (open_pars == 0) {
- LYXERR(Debug::FIND, "Found unmatched closed brace");
- return false;
- } else
- --open_pars;
- }
- }
- if (open_pars != unmatched) {
- LYXERR(Debug::FIND, "Found " << open_pars
- << " instead of " << unmatched
- << " unmatched open braces at the end of count");
- return false;
- }
- LYXERR(Debug::FIND, "Braces match as expected");
- return true;
-}
-#endif
-
class MatchResult {
public:
enum range {
int pos;
int leadsize;
int pos_len;
+ int searched_size;
vector <string> result = vector <string>();
- MatchResult(int len = 0): match_len(len),match_prefix(0),match2end(0), pos(0),leadsize(0),pos_len(-1) {};
+ MatchResult(int len = 0): match_len(len),match_prefix(0),match2end(0), pos(0),leadsize(0),pos_len(-1),searched_size(0) {};
};
static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres)
{
int range = oldres.match_len;
- if (range < 2) range = 2;
+ if (range > 0) range--;
if (newres.match2end < oldres.match2end - oldres.match_len)
return MatchResult::newIsTooFar;
if (newres.match_len < oldres.match_len)
}
}
-typedef map<string, string> AccentsMap;
-static AccentsMap accents = map<string, string>();
-
static void buildaccent(string n, string param, string values)
{
stringstream s(n);
accents["i"] = "ı";
accents["jmath"] = "È·";
accents["cdot"] = "·";
+ accents["textasciicircum"] = "^";
+ accents["mathcircumflex"] = "^";
+ accents["sim"] = "~";
accents["guillemotright"] = "»";
accents["guillemotleft"] = "«";
accents["hairspace"] = getutf8(0xf0000); // select from free unicode plane 15
accents["latexe"] = getutf8(0xf0013);
accents["LaTeXe"] = getutf8(0xf0013);
accents["lyxarrow"] = getutf8(0xf0020);
+ accents["braceleft"] = getutf8(0xf0030);
+ accents["braceright"] = getutf8(0xf0031);
accents["backslash lyx"] = getutf8(0xf0010); // Used logos inserted with starting \backslash
accents["backslash LyX"] = getutf8(0xf0010);
accents["backslash tex"] = getutf8(0xf0011);
buildAccentsMap();
static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|"
"cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}"
- "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|guillemot(left|right))(?![a-zA-Z]))");
+ "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|(brace|guillemot)(left|right)|textasciicircum|mathcircumflex|sim)(?![a-zA-Z]))");
smatch sub;
for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
sub = *itacc;
key += interval_.par.substr(params, optend-params);
evaluatingOptional = true;
optionalEnd = optend;
+ if (found.keytype == KeyInfo::isSectioning) {
+ // Remove optional values (but still keep in header)
+ interval_.addIntervall(params, optend);
+ }
}
string token = sub.str(7);
int closings;
}
else {
found._dataStart = found._tokenstart + found._tokensize;
- }
+ }
closings = 0;
}
- if (interval_.par.substr(found._dataStart-1, 15).compare("\\endarguments{}") == 0) {
+ if (interval_.par.substr(found._dataStart, 15).compare("\\endarguments{}") == 0) {
found._dataStart += 15;
}
size_t endpos;
case KeyInfo::doRemove: {
// Remove the key with all parameters and following spaces
size_t pos;
- for (pos = actual._dataEnd+1; pos < interval_.par.length(); pos++) {
+ size_t start;
+ if (interval_.par[actual._dataEnd-1] == ' ')
+ start = actual._dataEnd;
+ else
+ start = actual._dataEnd+1;
+ for (pos = start; pos < interval_.par.length(); pos++) {
if ((interval_.par[pos] != ' ') && (interval_.par[pos] != '%'))
break;
}
return "";
}
}
+
}
else {
// LYXERR(Debug::INFO, "No regex formats");
s += "\\S";
lastpos = sub.position(2) + sub.length(2);
}
- if (lastpos == 0)
+ if (lastpos == 0) {
+ s = "\\b" + t + "\\b";
+ t = s;
return;
+ }
else if (lastpos < t.length())
s += t.substr(lastpos, t.length() - lastpos);
t = "\\b" + s + "\\b";
MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
: p_buf(&buf), p_first_buf(&buf), opt(opt)
{
- static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\\])" };
Buffer & find_buf = *theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true);
docstring const & ds = stringifySearchBuffer(find_buf, opt);
use_regexp = lyx::to_utf8(ds).find("\\regexp{") != std::string::npos;
size_t lead_size = 0;
// correct the language settings
par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat);
+ opt.matchstart = false;
if (!use_regexp) {
identifyClosing(par_as_string); // Removes math closings ($, ], ...) at end of string
if (opt.ignoreformat) {
lead_size = identifyLeading(par_as_string);
}
lead_as_string = par_as_string.substr(0, lead_size);
- string lead_as_regex_string = std::regex_replace(lead_as_string, specialChars, R"(\$&)" );
+ string lead_as_regex_string = string2regex(lead_as_string);
par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size);
- string par_as_regex_string_nolead = std::regex_replace(par_as_string_nolead, specialChars, R"(\$&)" );
+ string par_as_regex_string_nolead = string2regex(par_as_string_nolead);
/* Handle whole words too in this case
*/
if (opt.matchword) {
string regexp2_str = "(" + lead_as_regex_string + ")(.*?)" + par_as_regex_string_nolead;
CreateRegexp(opt, regexp_str, regexp2_str);
use_regexp = true;
+ LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'");
+ LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'");
return;
}
{
string lead_as_regexp;
if (lead_size > 0) {
- // @todo No need to search for \regexp{} insets in leading material
- lead_as_regexp = std::regex_replace(par_as_string.substr(0, lead_size), specialChars, R"(\$&)" );
- // lead_as_regexp = escape_for_regex(par_as_string.substr(0, lead_size), !opt.ignoreformat);
+ lead_as_regexp = string2regex(par_as_string.substr(0, lead_size));
+ regex_replace(par_as_string_nolead, par_as_string_nolead, "}$", "");
par_as_string = par_as_string_nolead;
LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'");
LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat);
// Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them.
LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
- if (
- // Insert .* before trailing '\$' ('$' has been escaped by escape_for_regex)
- regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\$)\\'", "$1(.*?)$2")
- // Insert .* before trailing '\\\]' ('\]' has been escaped by escape_for_regex)
- || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])( \\\\\\\\\\\\\\])\\'", "$1(.*?)$2")
- // Insert .* before trailing '\\end\{...}' ('\end{...}' has been escaped by escape_for_regex)
- || regex_replace(par_as_string, par_as_string,
- "(.*[^\\\\])( \\\\\\\\end\\\\\\{[a-zA-Z_]*)(\\\\\\*)?(\\\\\\})\\'", "$1(.*?)$2$3$4")
- // Insert .* before trailing '\}' ('}' has been escaped by escape_for_regex)
- || regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\})\\'", "$1(.*?)$2")
- ) {
- ++close_wildcards;
- }
+ LYXERR(Debug::FIND, "par_as_string after correctRegex is '" << par_as_string << "'");
+ ++close_wildcards;
+ size_t lng = par_as_string.size();
if (!opt.ignoreformat) {
// Remove extra '\}' at end if not part of \{\.\}
- size_t lng = par_as_string.size();
while(lng > 2) {
if (par_as_string.substr(lng-2, 2).compare("\\}") == 0) {
if (lng >= 6) {
if (lng < par_as_string.size())
par_as_string = par_as_string.substr(0,lng);
}
+ if ((lng > 0) && (par_as_string[0] == '^')) {
+ par_as_string = par_as_string.substr(1);
+ --lng;
+ opt.matchstart = true;
+ }
LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
LYXERR(Debug::FIND, "Open braces: " << open_braces);
- LYXERR(Debug::FIND, "Close .*? : " << close_wildcards);
LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string);
// If entered regexp must match at begin of searched string buffer
string dest = "\\" + std::to_string(i+2);
while (regex_replace(par_as_string, par_as_string, orig, dest));
}
- /* opt.matchword is ignored if using regex
- but expanding par_as_string with "\\b" is not appropriate here
- if regex contains for instance '.*' or '.+'
- 1.) Nothing to do, if 'par_as_string' contains "\\b" already.
- (Means, that the user knows how to handle whole words
- 2.) else replace '.' with "\\S" and prepend + append "\\b"
- */
if (opt.matchword) {
modifyRegexForMatchWord(par_as_string);
opt.matchword = false;
}
}
-#if 0
-// Count number of characters in string
-// {]} ==> 1
-// \& ==> 1
-// --- ==> 1
-// \\[a-zA-Z]+ ==> 1
-#if QTSEARCH
-static int computeSize(QStringRef s, int len)
-#define isLyxAlpha(arg) arg.isLetter()
-#else
-static int computeSize(string s, int len)
-#define isLyxAlpha(arg) isalpha(arg)
-#endif
-{
- if (len == 0)
- return 0;
- int skip = 1;
- int count = 0;
- for (int i = 0; i < len; i += skip, count++) {
- if (s.at(i) == '\\') {
- skip = 2;
- if (i + 1 < len && isLyxAlpha(s.at(i+1))) {
- for (int j = 2; i+j < len; j++) {
- if (! isLyxAlpha(s.at(i+j))) {
- if (s.at(i+j) == ' ')
- skip++;
- else if (s.at(i+j) == '{') {
- if (i+j+1 < len && s.at(i+j+1) == '}')
- skip += 2;
- else if (i + j + 1 >= len)
- skip++;
- }
- break;
- }
- skip++;
- }
- }
- }
- else if (s.at(i) == '{') {
- if (i + 1 < len && s.at(i+1) == '}')
- skip = 2;
- else
- skip = 3;
- }
- else if (s.at(i) == '-') {
- if (i+1 < len && s.at(i+1) == '-') {
- if (i + 2 < len && s.at(i+2) == '-')
- skip = 3;
- else
- skip = 2;
- }
- else
- skip = 1;
- }
- else {
- skip = 1;
- }
- }
- return count;
-}
-#endif
-
MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const
{
MatchResult mres;
+ mres.searched_size = len;
if (at_begin &&
(opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) )
return mres;
docstring docstr = stringifyFromForSearch(opt, cur, len);
string str;
- if (use_regexp || opt.casesensitive)
- str = normalize(docstr);
- else
- str = normalize(lowercase(docstr));
+ str = normalize(docstr);
if (!opt.ignoreformat) {
str = correctlanguagesetting(str, false, !opt.ignoreformat);
+ // remove closing '}' and '\n' to allow for use of '$' in regex
+ size_t lng = str.size();
+ while ((lng > 1) && ((str[lng -1] == '}') || (str[lng -1] == '\n')))
+ lng--;
+ if (lng != str.size()) {
+ str = str.substr(0, lng);
+ }
}
if (str.empty()) {
mres.match_len = -1;
LYXERR(Debug::FIND, "Matching against '" << lyx::to_utf8(docstr) << "'");
LYXERR(Debug::FIND, "After normalization: '" << str << "'");
- if (use_regexp) {
+ LASSERT(use_regexp, /**/);
+ {
+ // use_regexp always true
LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin);
#if QTSEARCH
QString qstr = QString::fromStdString(str);
#endif
return mres;
}
-
- // else !use_regexp: but all code paths above return
- LYXERR(Debug::FIND, "Searching in normal mode: par_as_string='"
- << par_as_string << "', str='" << str << "'");
- LYXERR(Debug::FIND, "Searching in normal mode: lead_as_string='"
- << lead_as_string << "', par_as_string_nolead='"
- << par_as_string_nolead << "'");
-
- if (at_begin) {
- LYXERR(Debug::FIND, "size=" << par_as_string.size()
- << ", substr='" << str.substr(0, par_as_string.size()) << "'");
- if (str.substr(0, par_as_string.size()) == par_as_string) {
- mres.match_len = par_as_string.size();
- mres.match2end = str.size();
- mres.pos = 0;
- return mres;
- }
- } else {
- // Start the search _after_ the leading part
- size_t pos = str.find(par_as_string_nolead, lead_as_string.size());
- if (pos != string::npos) {
- mres.match_len = par_as_string.size();
- mres.match2end = str.size() - pos;
- mres.pos = pos;
- return mres;
- }
- }
- return mres;
}
int res = mres.match_len;
LYXERR(Debug::FIND,
"res=" << res << ", at_begin=" << at_begin
- << ", matchword=" << opt.matchword
+ << ", matchstart=" << opt.matchstart
<< ", inTexted=" << cur.inTexted());
- if (res == 0 || !at_begin || !opt.matchword || !cur.inTexted())
+ if (opt.matchstart) {
+ if (cur.pos() != 0)
+ mres.match_len = 0;
+ else if (mres.match_prefix > 0)
+ mres.match_len = 0;
return mres;
- if ((len > 0) && (res < len)) {
- mres.match_len = 0;
- return mres;
- }
- Paragraph const & par = cur.paragraph();
- bool ws_left = (cur.pos() > 0)
- ? par.isWordSeparator(cur.pos() - 1)
- : true;
- bool ws_right;
- if (len < 0)
- ws_right = true;
- else {
- ws_right = (cur.pos() + len < par.size())
- ? par.isWordSeparator(cur.pos() + len)
- : true;
- }
- LYXERR(Debug::FIND,
- "cur.pos()=" << cur.pos() << ", res=" << res
- << ", separ: " << ws_left << ", " << ws_right
- << ", len: " << len
- << endl);
- if (ws_left && ws_right) {
- // Check for word separators inside the found 'word'
- for (int i = 0; i < len; i++) {
- if (par.isWordSeparator(cur.pos() + i)) {
- mres.match_len = 0;
- return mres;
- }
- }
- return mres;
}
- mres.match_len = 0;
- return mres;
+ else
+ return mres;
}
#if 0
#if defined(ResultsDebug)
// Debugging output
-static void displayMResult(MatchResult &mres, int increment)
-{
- LYXERR0( "pos: " << mres.pos << " increment " << increment);
- LYXERR0( "leadsize: " << mres.leadsize);
- LYXERR0( "match_len: " << mres.match_len);
- LYXERR0( "match_prefix: " << mres.match_prefix);
- LYXERR0( "match2end: " << mres.match2end);
- LYXERR0( "pos_len: " << mres.pos_len); // Set in finalize
- for (size_t i = 0; i < mres.result.size(); i++)
- LYXERR0( "Match " << i << " = \"" << mres.result[i] << "\"");
-}
- #define displayMres(s,i) displayMResult(s,i);
-#else
- #define displayMres(s,i)
-#endif
-
-static bool findAdvForwardInnermost(DocIterator & cur)
+static void displayMResult(MatchResult &mres, string from, DocIterator & cur)
{
- size_t d;
- DocIterator old_cur(cur.buffer());
- int forwardCount = 0;
- do {
- d = cur.depth();
- old_cur = cur;
- cur.forwardPos();
- if (!cur) {
- break;
- }
- if (cur.depth() > d) {
- forwardCount++;
- continue;
+ LYXERR0( "from:\t\t\t" << from);
+ string status;
+ if (mres.pos_len > 0) {
+ // Set in finalize
+ status = "FINALSEARCH";
+ }
+ else {
+ if (mres.match_len > 0) {
+ if ((mres.match_prefix == 0) && (mres.pos == mres.leadsize))
+ status = "Good Match";
+ else
+ status = "Matched in";
}
- if (cur.depth() == d)
- break;
- } while(1);
- cur = old_cur;
- if (forwardCount > 0) {
- LYXERR(Debug::FIND, "Forwarded " << forwardCount << " step(s) (searching for innermost match)");
- return true;;
+ else
+ status = "MissedSearch";
}
- else
- return false;
+
+ LYXERR0( status << "(" << cur.pos() << " ... " << mres.searched_size + cur.pos() << ") cur.lastpos(" << cur.lastpos() << ")");
+ if ((mres.leadsize > 0) || (mres.match_len > 0) || (mres.match2end > 0))
+ LYXERR0( "leadsize(" << mres.leadsize << ") match_len(" << mres.match_len << ") match2end(" << mres.match2end << ")");
+ if ((mres.pos > 0) || (mres.match_prefix > 0))
+ LYXERR0( "pos(" << mres.pos << ") match_prefix(" << mres.match_prefix << ")");
+ for (size_t i = 0; i < mres.result.size(); i++)
+ LYXERR0( "Match " << i << " = \"" << mres.result[i] << "\"");
}
+ #define displayMres(s, txt, cur) displayMResult(s, txt, cur);
+#else
+ #define displayMres(s, txt, cur)
+#endif
/** Finalize an advanced find operation, advancing the cursor to the innermost
** position that matches, plus computing the length of the matching text to
// so the search for "www" gives prefix_len = 7 (== sizeof("http://")
// and although we search for only 3 chars, we find the whole hyperlink inset
bool at_begin = (expected.match_prefix == 0);
- if (findAdvForwardInnermost(cur)) {
- mres = match(cur, -1, at_begin);
- displayMres(mres, 0);
- if (expected.match_len > 0) {
+ LASSERT(at_begin, /**/);
+ if (expected.match_len > 0 && at_begin) {
+ // Search for deepest match
+ old_cur = cur;
+ max_match = expected;
+ do {
+ size_t d = cur.depth();
+ cur.forwardPos();
+ if (!cur)
+ break;
+ if (cur.depth() < d)
+ break;
+ if (cur.depth() == d)
+ break;
+ size_t lastd = d;
+ while (cur && cur.depth() > lastd) {
+ lastd = cur.depth();
+ mres = match(cur, -1, at_begin);
+ displayMres(mres, "Checking innermost", cur);
+ if (mres.match_len > 0)
+ break;
+ // maybe deeper?
+ cur.forwardPos();
+ }
if (mres.match_len < expected.match_len)
- return fail;
- }
- else {
- if (mres.match_len <= 0)
- return fail;
- }
- max_match = mres.match_len;
+ break;
+ max_match = mres;
+ old_cur = cur;;
+ } while(1);
+ cur = old_cur;
}
- else if (expected.match_len < 0) {
+ else {
+ // (expected.match_len <= 0)
mres = match(cur); /* match valid only if not searching whole words */
- displayMres(mres, 0);
+ displayMres(mres, "Start with negative match", cur);
max_match = mres;
}
- else {
- max_match = expected;
- }
if (max_match.match_len <= 0) return fail;
LYXERR(Debug::FIND, "Ok");
int len = 1;
if (cur.pos() + len > cur.lastpos())
return fail;
- // regexp should use \w+, \S+, or \b(some string)\b
- // to search for whole words
- if (match.opt.matchword && !match.use_regexp) {
- LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
- while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) {
- ++len;
- LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
- }
- // Length of matched text (different from len param)
- static MatchResult old_match = match(cur, len, at_begin);
- if (old_match.match_len < 0)
- old_match = fail;
- MatchResult new_match;
- // Greedy behaviour while matching regexps
- while ((new_match = match(cur, len + 1, at_begin)).match_len > old_match.match_len) {
- ++len;
- old_match = new_match;
- LYXERR(Debug::FIND, "verifying match with len = " << len);
- }
- return old_match;
- }
- else {
+
+ LASSERT(match.use_regexp, /**/);
+ {
int minl = 1;
int maxl = cur.lastpos() - cur.pos();
// Greedy behaviour while matching regexps
while (maxl > minl) {
MatchResult mres2;
mres2 = match(cur, len, at_begin);
- displayMres(mres2, len);
- int actual_match = mres2.match_len;
- if (actual_match >= max_match.match_len) {
- // actual_match > max_match _can_ happen,
+ displayMres(mres2, "Finalize loop", cur);
+ int actual_match_len = mres2.match_len;
+ if (actual_match_len >= max_match.match_len) {
+ // actual_match_len > max_match _can_ happen,
// if the search area splits
// some following word so that the regex
// (e.g. 'r.*r\b' matches 'r' from the middle of the
// splitted word)
// This means, the len value is too big
+ actual_match_len = max_match.match_len;
+ max_match = mres2;
+ max_match.match_len = actual_match_len;
maxl = len;
if (maxl - minl < 4)
len = (int)((maxl + minl)/2);
len = (int)(minl + (maxl - minl + 3)/4);
}
else {
- // (actual_match < max_match)
+ // (actual_match_len < max_match.match_len)
minl = len + 1;
len = (int)((maxl + minl)/2);
}
}
+ len = minl;
old_cur = cur;
// Search for real start of matched characters
while (len > 1) {
// Ha, got it! The shorter selection has the same match length
len--;
old_cur = cur;
+ max_match = actual_match;
}
else {
// OK, the shorter selection matches less chars, revert to previous value
return fail;
else {
max_match.pos_len = len;
+ displayMres(max_match, "SEARCH RESULT", cur)
return max_match;
}
}
{
if (!cur)
return 0;
+ bool repeat = false;
while (!theApp()->longOperationCancelled() && cur) {
- (void) findAdvForwardInnermost(cur);
+ //(void) findAdvForwardInnermost(cur);
LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
MatchResult mres = match(cur, -1, false);
- displayMres(mres,-1)
+ string msg = "Starting";
+ if (repeat)
+ msg = "Repeated";
+ displayMres(mres, msg + " findForwardAdv", cur)
int match_len = mres.match_len;
if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) {
LYXERR(Debug::INFO, "BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end);
match_len = 0;
}
- if (match_len > 0) {
+ if (match_len <= 0) {
+ // This should exit nested insets, if any, or otherwise undefine the currsor.
+ cur.pos() = cur.lastpos();
+ LYXERR(Debug::FIND, "Advancing pos: cur=" << cur);
+ cur.forwardPos();
+ }
+ else { // match_len > 0
// Try to find the begin of searched string
int increment;
int firstInvalid = 100000;
- if (mres.match_prefix + mres.pos - mres.leadsize > 1)
- increment = (mres.match_prefix + mres.pos - mres.leadsize + 1)*3/4;
- else
- increment = 10;
+ {
+ int incrmatch = (mres.match_prefix + mres.pos - mres.leadsize + 1)*3/4;
+ int incrcur = (cur.lastpos() - cur.pos() + 1 )*3/4;
+ if (incrcur < incrmatch)
+ increment = incrcur;
+ else
+ increment = incrmatch;
+ if (increment < 1)
+ increment = 1;
+ }
LYXERR(Debug::FIND, "Set increment to " << increment);
while (increment > 0) {
DocIterator old_cur = cur;
- for (int i = 0; i < increment && cur; cur.forwardPos(), i++) {
+ size_t skipping = cur.depth();
+ for (int i = 0; i < increment && cur; i++) {
+ cur.forwardPos();
+ while (cur && cur.depth() > skipping) {
+ cur.pos() = cur.lastpos();
+ cur.forwardPos();
+ }
}
if (! cur || (cur.pit() > old_cur.pit())) {
// Are we outside of the paragraph?
}
else {
MatchResult mres2 = match(cur, -1, false);
- displayMres(mres2,increment)
+ displayMres(mres2, "findForwardAdv loop", cur)
switch (interpretMatch(mres, mres2)) {
case MatchResult::newIsTooFar:
// behind the expected match
increment /= 2;
break;
case MatchResult::newIsBetter:
- // not reached ye, but cur.pos()+increment is bettert
+ // not reached yet, but cur.pos()+increment is bettert
mres = mres2;
firstInvalid -= increment;
if (increment > firstInvalid*3/4)
default:
// Todo@
// Handle not like MatchResult::newIsTooFar
- // LYXERR0( "Something is wrong: Increment = " << increment << " match_prefix = " << mres.match_prefix);
+ LYXERR0( "Something is wrong: Increment = " << increment << " match_prefix = " << mres.match_prefix);
firstInvalid--;
increment = increment*3/4;
cur = old_cur;
}
}
}
+ if (mres.match_len > 0 && mres.match_prefix + mres.pos - mres.leadsize > 0) {
+ repeat = true;
+ cur.forwardPos();
+ continue;
+ }
// LYXERR0("Leaving first loop");
- {
- LYXERR(Debug::FIND, "Finalizing 1");
- MatchResult found_match = findAdvFinalize(cur, match, mres);
- if (found_match.match_len > 0) {
- LASSERT(found_match.pos_len > 0, /**/);
- match.FillResults(found_match);
- return found_match.pos_len;
- }
- else {
- // try next possible match
- cur.forwardPos();
- continue;
- }
+ LYXERR(Debug::FIND, "Finalizing 1");
+ MatchResult found_match = findAdvFinalize(cur, match, mres);
+ if (found_match.match_len > 0) {
+ LASSERT(found_match.pos_len > 0, /**/);
+ match.FillResults(found_match);
+ return found_match.pos_len;
}
- // The following code is newer reached
- // but parts of it may be needed in future
- int match_len_zero_count = 0;
- MatchResult mres3;
- for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) {
- if (i++ > 3) {
- mres3 = match(cur, -1, false);
- displayMres(mres3, 1)
- int remaining_len = mres3.match_len;
- if (remaining_len <= 0) {
- // Apparently the searched string is not in the remaining part
- break;
- }
- else {
- i = 0;
- }
- }
- LYXERR(Debug::FIND, "Advancing cur: " << cur);
- mres3 = match(cur, 1);
- displayMres(mres3, 1)
- int match_len3 = mres3.match_len;
- if (match_len3 < 0)
- continue;
- mres3 = match(cur);
- displayMres(mres3, 1)
- int match_len2 = mres3.match_len;
- LYXERR(Debug::FIND, "match_len2: " << match_len2);
- if (match_len2 > 0) {
- // Sometimes in finalize we understand it wasn't a match
- // and we need to continue the outest loop
- LYXERR(Debug::FIND, "Finalizing 2");
- MatchResult mres4 = findAdvFinalize(cur, match, mres.match_len);
- if (mres4.match_len > 0) {
- match.FillResults(mres4);
- LASSERT(mres4.pos_len > 0, /**/);
- return mres4.pos_len;
- }
- }
- if (match_len2 > 0)
- match_len_zero_count = 0;
- else if (match_len2 == 0)
- match_len_zero_count++;
- if (match_len2 < 0) {
- if (++match_len_zero_count > 3) {
- LYXERR(Debug::FIND, "match_len2_zero_count: " << match_len_zero_count << ", match_len was " << match_len);
- }
- break;
- }
+ else {
+ // try next possible match
+ cur.forwardPos();
+ repeat = false;
+ continue;
}
- if (!cur)
- return 0;
- }
- if (match_len >= 0 && cur.pit() < cur.lastpit()) {
- LYXERR(Debug::FIND, "Advancing par: cur=" << cur);
- cur.forwardPar();
- } else {
- // This should exit nested insets, if any, or otherwise undefine the currsor.
- cur.pos() = cur.lastpos();
- LYXERR(Debug::FIND, "Advancing pos: cur=" << cur);
- cur.forwardPos();
}
}
return 0;
}
} // namespace
-#if 1
static bool replaceMatches(string &t, int maxmatchnum, vector <string> const & replacements)
{
// Should replace the string "$" + std::to_string(matchnum) with replacement
t = s;
return true;
}
-#endif
///
static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv)