#include <map>
#include <regex>
+
+//#define ResultsDebug
#define USE_QT_FOR_SEARCH
#if defined(USE_QT_FOR_SEARCH)
#include <QtCore> // sets QT_VERSION
// This causes a minor bug as undo will restore this selection,
// which the user did not create (#8986).
cur.innerText()->selectWord(cur, WHOLE_WORD);
- searchstr = cur.selectionAsString(false);
+ searchstr = cur.selectionAsString(false, true);
}
// if we still don't have a search string, report the error
return make_pair(false, 0);
bool have_selection = cur.selection();
- docstring const selected = cur.selectionAsString(false);
+ docstring const selected = cur.selectionAsString(false, true);
bool match =
case_sens
? searchstr == selected
bool matchword = parse_bool(howto);
bool forward = parse_bool(howto);
- return findOne(bv, search, casesensitive, matchword, forward, true, true);
+ return findOne(bv, search, casesensitive, matchword, forward, false, true);
}
-bool lyxreplace(BufferView * bv,
- FuncRequest const & ev, bool has_deleted)
+bool lyxreplace(BufferView * bv, FuncRequest const & ev)
{
if (!bv || ev.action() != LFUN_WORD_REPLACE)
return false;
bool update = false;
- if (!has_deleted) {
- int replace_count = 0;
- if (all) {
- replace_count = replaceAll(bv, search, rplc, casesensitive, matchword);
- update = replace_count > 0;
- } else {
- pair<bool, int> rv =
- replaceOne(bv, search, rplc, casesensitive, matchword, forward, findnext);
- update = rv.first;
- replace_count = rv.second;
- }
+ int replace_count = 0;
+ if (all) {
+ replace_count = replaceAll(bv, search, rplc, casesensitive, matchword);
+ update = replace_count > 0;
+ } else {
+ pair<bool, int> rv =
+ replaceOne(bv, search, rplc, casesensitive, matchword, forward, findnext);
+ update = rv.first;
+ replace_count = rv.second;
+ }
- Buffer const & buf = bv->buffer();
- if (!update) {
- // emit message signal.
- buf.message(_("String not found."));
+ Buffer const & buf = bv->buffer();
+ if (!update) {
+ // emit message signal.
+ buf.message(_("String not found."));
+ } else {
+ if (replace_count == 0) {
+ buf.message(_("String found."));
+ } else if (replace_count == 1) {
+ buf.message(_("String has been replaced."));
} else {
- if (replace_count == 0) {
- buf.message(_("String found."));
- } else if (replace_count == 1) {
- buf.message(_("String has been replaced."));
- } else {
- docstring const str =
- bformat(_("%1$d strings have been replaced."), replace_count);
- buf.message(str);
- }
+ docstring const str =
+ bformat(_("%1$d strings have been replaced."), replace_count);
+ buf.message(str);
}
- } else if (findnext) {
- // if we have deleted characters, we do not replace at all, but
- // rather search for the next occurence
- if (findOne(bv, search, casesensitive, matchword, forward, true, findnext))
- update = true;
- else
- bv->message(_("String not found."));
}
return update;
}
/// A map of symbols and their escaped equivalent needed within a regex.
/// @note Beware of order
+/*
Escapes const & get_regexp_escapes()
{
typedef std::pair<std::string, std::string> P;
}
return escape_map;
}
+*/
/// A map of lyx escaped strings and their unescaped equivalent.
+/*
Escapes const & get_lyx_unescapes()
{
typedef std::pair<std::string, std::string> P;
}
return escape_map;
}
+*/
/// A map of escapes turning a regexp matching text to one matching latex.
+/*
Escapes const & get_regexp_latex_escapes()
{
typedef std::pair<std::string, std::string> P;
}
return escape_map;
}
+*/
/** @todo Probably the maps need to be migrated to regexps, in order to distinguish if
** the found occurrence were escaped.
**/
+/*
string apply_escapes(string s, Escapes const & escape_map)
{
LYXERR(Debug::FIND, "Escaping: '" << s << "'");
LYXERR(Debug::FIND, "Escaped : '" << s << "'");
return s;
}
+*/
+
+string string2regex(string in)
+{
+ static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\$\\])" };
+ string temp = std::regex_replace(in, specialChars, R"(\$&)" );
+ string temp2("");
+ size_t lastpos = 0;
+ size_t fl_pos = 0;
+ int offset = 1;
+ while (fl_pos < temp.size()) {
+ fl_pos = temp.find("\\\\foreignlanguage", lastpos + offset);
+ if (fl_pos == string::npos)
+ break;
+ offset = 16;
+ temp2 += temp.substr(lastpos, fl_pos - lastpos);
+ temp2 += "\\n";
+ lastpos = fl_pos;
+ }
+ if (lastpos == 0)
+ return(temp);
+ if (lastpos < temp.size()) {
+ temp2 += temp.substr(lastpos, temp.size() - lastpos);
+ }
+ return temp2;
+}
+
+string correctRegex(string t)
+{
+ /* Convert \backslash => \
+ * and \{, \}, \[, \] => {, }, [, ]
+ */
+ string s("");
+ regex wordre("(\\\\)*(\\\\((backslash|mathcircumflex) ?|[\\[\\]\\{\\}]))");
+ size_t lastpos = 0;
+ smatch sub;
+ for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) {
+ sub = *it;
+ string replace;
+ if ((sub.position(2) - sub.position(0)) % 2 == 1) {
+ continue;
+ }
+ else {
+ if (sub.str(4) == "backslash")
+ replace = "\\";
+ else if (sub.str(4) == "mathcircumflex")
+ replace = "^";
+ else
+ replace = sub.str(3);
+ }
+ if (lastpos < (size_t) sub.position(2))
+ s += t.substr(lastpos, sub.position(2) - lastpos);
+ s += replace;
+ lastpos = sub.position(2) + sub.length(2);
+ }
+ if (lastpos == 0)
+ return t;
+ else if (lastpos < t.length())
+ s += t.substr(lastpos, t.length() - lastpos);
+ return s;
+}
/// Within \regexp{} apply get_lyx_unescapes() only (i.e., preserve regexp semantics of the string),
/// while outside apply get_lyx_unescapes()+get_regexp_escapes().
/// If match_latex is true, then apply regexp_latex_escapes() to \regexp{} contents as well.
-string escape_for_regex(string s, bool match_latex)
+string escape_for_regex(string s)
{
- size_t pos = 0;
- while (pos < s.size()) {
- size_t new_pos = s.find("\\regexp{", pos);
- if (new_pos == string::npos)
- new_pos = s.size();
- string t;
- if (new_pos > pos) {
- // outside regexp
- LYXERR(Debug::FIND, "new_pos: " << new_pos);
- t = apply_escapes(s.substr(pos, new_pos - pos), get_lyx_unescapes());
- LYXERR(Debug::FIND, "t [lyx]: " << t);
- t = apply_escapes(t, get_regexp_escapes());
- LYXERR(Debug::FIND, "t [rxp]: " << t);
- s.replace(pos, new_pos - pos, t);
- new_pos = pos + t.size();
- LYXERR(Debug::FIND, "Regexp after escaping: " << s);
- LYXERR(Debug::FIND, "new_pos: " << new_pos);
- if (new_pos == s.size())
- break;
- }
- // Might fail if \\endregexp{} is preceeded by unexpected stuff (weird escapes)
- size_t end_pos = s.find("\\endregexp{}}", new_pos + 8);
- LYXERR(Debug::FIND, "end_pos: " << end_pos);
- t = s.substr(new_pos + 8, end_pos - (new_pos + 8));
- LYXERR(Debug::FIND, "t in regexp : " << t);
- t = apply_escapes(t, get_lyx_unescapes());
- LYXERR(Debug::FIND, "t in regexp after unescapes [lyx]: " << t);
- if (match_latex) {
- t = apply_escapes(t, get_regexp_latex_escapes());
- LYXERR(Debug::FIND, "t in regexp after latex_escapes [ltx]: " << t);
+ size_t lastpos = 0;
+ string result = "";
+ while (lastpos < s.size()) {
+ size_t regex_pos = s.find("\\regexp{", lastpos);
+ if (regex_pos == string::npos) {
+ regex_pos = s.size();
}
- if (end_pos == s.size()) {
- s.replace(new_pos, end_pos - new_pos, t);
- LYXERR(Debug::FIND, "Regexp after \\regexp{} removal: " << s);
- break;
+ if (regex_pos > lastpos) {
+ result += string2regex(s.substr(lastpos, regex_pos-lastpos));
+ lastpos = regex_pos;
+ if (lastpos == s.size())
+ break;
}
- s.replace(new_pos, end_pos + 13 - new_pos, t);
- LYXERR(Debug::FIND, "Regexp after \\regexp{...\\endregexp{}} removal: " << s);
- pos = new_pos + t.size();
- LYXERR(Debug::FIND, "pos: " << pos);
+ size_t end_pos = s.find("\\endregexp{}}", regex_pos + 8);
+ result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)));
+ lastpos = end_pos + 13;
}
- return s;
+ return result;
}
return rv;
}
-
+#if 0
/** Checks if supplied string segment is well-formed from the standpoint of matching open-closed braces.
**
** Verify that closed braces exactly match open braces. This avoids that, for example,
LYXERR(Debug::FIND, "Braces match as expected");
return true;
}
-
+#endif
class MatchResult {
public:
int match2end;
int pos;
int leadsize;
- MatchResult(): match_len(0),match_prefix(0),match2end(0), pos(0),leadsize(0) {};
+ int pos_len;
+ vector <string> result = vector <string>();
+ MatchResult(int len = 0): match_len(len),match_prefix(0),match2end(0), pos(0),leadsize(0),pos_len(-1) {};
};
static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres)
class MatchStringAdv {
public:
- MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt);
+ MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt);
/** Tests if text starting at the supplied position matches with the one provided to the MatchStringAdv
** constructor as opt.search, under the opt.* options settings.
private:
/// Auxiliary find method (does not account for opt.matchword)
MatchResult findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const;
+ void CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string = "");
/** Normalize a stringified or latexified LyX paragraph.
**
** @todo Normalization should also expand macros, if the corresponding
** search option was checked.
**/
- string normalize(docstring const & s, bool hack_braces) const;
+ string normalize(docstring const & s) const;
// normalized string to search
string par_as_string;
// regular expression to use for searching
public:
// Are we searching with regular expressions ?
bool use_regexp;
+ static int valid_matches;
+ static vector <string> matches;
+ void FillResults(MatchResult &found_mr);
};
+int MatchStringAdv::valid_matches = 0;
+vector <string> MatchStringAdv::matches = vector <string>(10);
+
+void MatchStringAdv::FillResults(MatchResult &found_mr)
+{
+ if (found_mr.match_len > 0) {
+ valid_matches = found_mr.result.size();
+ for (size_t i = 0; i < found_mr.result.size(); i++)
+ matches[i] = found_mr.result[i];
+ }
+ else
+ valid_matches = 0;
+}
static docstring buffer_to_latex(Buffer & buffer)
{
accents["i"] = "ı";
accents["jmath"] = "ȷ";
accents["cdot"] = "·";
+ accents["textasciicircum"] = "^";
+ accents["mathcircumflex"] = "^";
+ accents["sim"] = "~";
+ accents["guillemotright"] = "»";
+ accents["guillemotleft"] = "«";
accents["hairspace"] = getutf8(0xf0000); // select from free unicode plane 15
accents["thinspace"] = getutf8(0xf0002); // and used _only_ by findadv
accents["negthinspace"] = getutf8(0xf0003); // to omit backslashed latex macros
buildAccentsMap();
static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|"
"cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}"
- "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow)))(?![a-zA-Z]))");
+ "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|guillemot(left|right)|textasciicircum|mathcircumflex|sim)(?![a-zA-Z]))");
smatch sub;
for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
sub = *itacc;
key = sub.str(2)[1];
else {
key = sub.str(2);
- if (key == "$") {
- size_t k_pos = sub.position(size_t(2));
- if ((k_pos > 0) && (interval_.par[k_pos - 1] == '\\')) {
- // Escaped '$', ignoring
- continue;
- }
- }
}
}
if (keys.find(key) != keys.end()) {
key += interval_.par.substr(params, optend-params);
evaluatingOptional = true;
optionalEnd = optend;
+ if (found.keytype == KeyInfo::isSectioning) {
+ // Remove optional values (but still keep in header)
+ interval_.addIntervall(params, optend);
+ }
}
string token = sub.str(7);
int closings;
}
else {
found._dataStart = found._tokenstart + found._tokensize;
- }
+ }
closings = 0;
}
- if (interval_.par.substr(found._dataStart-1, 15).compare("\\endarguments{}") == 0) {
+ if (interval_.par.substr(found._dataStart, 15).compare("\\endarguments{}") == 0) {
found._dataStart += 15;
}
size_t endpos;
makeKey("textasciicircum|textasciitilde", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("textasciiacute|texemdash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("dots|ldots", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
- makeKey("guillemotright|guillemotleft", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
// Spaces
makeKey("quad|qquad|hfill|dotfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("textvisiblespace|nobreakspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
static int num_replaced = 0;
static bool previous_single_replace = true;
-MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt)
+void MatchStringAdv::CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string)
+{
+#if QTSEARCH
+ // Handle \w properly
+ QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption;
+ if (! opt.casesensitive) {
+ popts |= QRegularExpression::CaseInsensitiveOption;
+ }
+ regexp = QRegularExpression(QString::fromStdString(regexp_str), popts);
+ regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts);
+ regexError = "";
+ if (regexp.isValid() && regexp2.isValid()) {
+ regexIsValid = true;
+ // Check '{', '}' pairs inside the regex
+ int balanced = 0;
+ int skip = 1;
+ for (unsigned i = 0; i < par_as_string.size(); i+= skip) {
+ char c = par_as_string[i];
+ if (c == '\\') {
+ skip = 2;
+ continue;
+ }
+ if (c == '{')
+ balanced++;
+ else if (c == '}') {
+ balanced--;
+ if (balanced < 0)
+ break;
+ }
+ skip = 1;
+ }
+ if (balanced != 0) {
+ regexIsValid = false;
+ regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\"";
+ }
+ }
+ else {
+ regexIsValid = false;
+ if (!regexp.isValid())
+ regexError += "Invalid regexp \"" + regexp_str + "\", error = " + regexp.errorString().toStdString();
+ else
+ regexError += "Invalid regexp2 \"" + regexp2_str + "\", error = " + regexp2.errorString().toStdString();
+ }
+#else
+ if (opt.casesensitive) {
+ regexp = regex(regexp_str);
+ regexp2 = regex(regexp2_str);
+ }
+ else {
+ regexp = regex(regexp_str, std::regex_constants::icase);
+ regexp2 = regex(regexp2_str, std::regex_constants::icase);
+ }
+#endif
+}
+
+static void modifyRegexForMatchWord(string &t)
+{
+ string s("");
+ regex wordre("(\\\\)*((\\.|\\\\b))");
+ size_t lastpos = 0;
+ smatch sub;
+ for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) {
+ sub = *it;
+ if ((sub.position(2) - sub.position(0)) % 2 == 1) {
+ continue;
+ }
+ else if (sub.str(2) == "\\\\b")
+ return;
+ if (lastpos < (size_t) sub.position(2))
+ s += t.substr(lastpos, sub.position(2) - lastpos);
+ s += "\\S";
+ lastpos = sub.position(2) + sub.length(2);
+ }
+ if (lastpos == 0)
+ return;
+ else if (lastpos < t.length())
+ s += t.substr(lastpos, t.length() - lastpos);
+ t = "\\b" + s + "\\b";
+}
+
+MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
: p_buf(&buf), p_first_buf(&buf), opt(opt)
{
Buffer & find_buf = *theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true);
previous_single_replace = true;
}
// When using regexp, braces are hacked already by escape_for_regex()
- par_as_string = normalize(ds, !use_regexp);
+ par_as_string = normalize(ds);
open_braces = 0;
close_wildcards = 0;
size_t lead_size = 0;
// correct the language settings
par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat);
- if (opt.ignoreformat) {
- if (!use_regexp) {
- // if par_as_string_nolead were emty,
- // the following call to findAux will always *find* the string
- // in the checked data, and thus always using the slow
- // examining of the current text part.
- par_as_string_nolead = par_as_string;
+ opt.matchstart = false;
+ if (!use_regexp) {
+ identifyClosing(par_as_string); // Removes math closings ($, ], ...) at end of string
+ if (opt.ignoreformat) {
+ lead_size = 0;
}
- } else {
+ else {
+ lead_size = identifyLeading(par_as_string);
+ }
+ lead_as_string = par_as_string.substr(0, lead_size);
+ string lead_as_regex_string = string2regex(lead_as_string);
+ par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size);
+ string par_as_regex_string_nolead = string2regex(par_as_string_nolead);
+ /* Handle whole words too in this case
+ */
+ if (opt.matchword) {
+ par_as_regex_string_nolead = "\\b" + par_as_regex_string_nolead + "\\b";
+ opt.matchword = false;
+ }
+ string regexp_str = "(" + lead_as_regex_string + ")()" + par_as_regex_string_nolead;
+ string regexp2_str = "(" + lead_as_regex_string + ")(.*?)" + par_as_regex_string_nolead;
+ CreateRegexp(opt, regexp_str, regexp2_str);
+ use_regexp = true;
+ LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'");
+ LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'");
+ return;
+ }
+
+ if (!opt.ignoreformat) {
lead_size = identifyLeading(par_as_string);
LYXERR(Debug::FIND, "Lead_size: " << lead_size);
lead_as_string = par_as_string.substr(0, lead_size);
par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size);
}
- if (!use_regexp) {
- open_braces = identifyClosing(par_as_string);
- identifyClosing(par_as_string_nolead);
- LYXERR(Debug::FIND, "Open braces: " << open_braces);
- LYXERR(Debug::FIND, "Built MatchStringAdv object: par_as_string = '" << par_as_string << "'");
- } else {
+ // Here we are using regexp
+ LASSERT(use_regexp, /**/);
+ {
string lead_as_regexp;
if (lead_size > 0) {
- // @todo No need to search for \regexp{} insets in leading material
- static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\\])" };
- lead_as_regexp = std::regex_replace(par_as_string.substr(0, lead_size), specialChars, R"(\$&)" );
- // lead_as_regexp = escape_for_regex(par_as_string.substr(0, lead_size), !opt.ignoreformat);
+ lead_as_regexp = string2regex(par_as_string.substr(0, lead_size));
+ regex_replace(par_as_string_nolead, par_as_string_nolead, "}$", "");
par_as_string = par_as_string_nolead;
LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'");
LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
}
LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'");
- par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat);
+ par_as_string = escape_for_regex(par_as_string);
// Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them.
LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
+ LYXERR(Debug::FIND, "par_as_string after correctRegex is '" << par_as_string << "'");
+ ++close_wildcards;
+ /*
if (
// Insert .* before trailing '\$' ('$' has been escaped by escape_for_regex)
regex_replace(par_as_string, par_as_string, "(.*[^\\\\])(\\\\\\$)\\'", "$1(.*?)$2")
) {
++close_wildcards;
}
+ */
+ size_t lng = par_as_string.size();
if (!opt.ignoreformat) {
// Remove extra '\}' at end if not part of \{\.\}
- size_t lng = par_as_string.size();
while(lng > 2) {
if (par_as_string.substr(lng-2, 2).compare("\\}") == 0) {
if (lng >= 6) {
}
if (lng < par_as_string.size())
par_as_string = par_as_string.substr(0,lng);
- /*
- // save '\.'
- regex_replace(par_as_string, par_as_string, "\\\\\\.", "_xxbdotxx_");
- // handle '.' -> '[^]', replace later as '[^\}\{\\]'
- regex_replace(par_as_string, par_as_string, "\\.", "[^]");
- // replace '[^...]' with '[^...\}\{\\]'
- regex_replace(par_as_string, par_as_string, "\\[\\^([^\\\\\\]]*)\\]", "_xxbrlxx_$1\\}\\{\\\\_xxbrrxx_");
- regex_replace(par_as_string, par_as_string, "_xxbrlxx_", "[^");
- regex_replace(par_as_string, par_as_string, "_xxbrrxx_", "]");
- // restore '\.'
- regex_replace(par_as_string, par_as_string, "_xxbdotxx_", "\\.");
- */
+ }
+ if ((lng > 0) && (par_as_string[0] == '^')) {
+ par_as_string = par_as_string.substr(1);
+ --lng;
+ opt.matchstart = true;
}
LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
LYXERR(Debug::FIND, "Open braces: " << open_braces);
- LYXERR(Debug::FIND, "Close .*? : " << close_wildcards);
LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string);
// If entered regexp must match at begin of searched string buffer
string dest = "\\" + std::to_string(i+2);
while (regex_replace(par_as_string, par_as_string, orig, dest));
}
+ /* opt.matchword is ignored if using regex
+ so expanding par_as_string with "\\b" seems appropriate here
+ if regex contains for instance '.*' or '.+'
+ 1.) Nothing to do, if 'par_as_string' contains "\\b" already.
+ (Means, that the user knows how to handle whole words
+ 2.) else replace '.' with "\\S" and wrap the regex with "\\b"
+ */
+ if (opt.matchword) {
+ modifyRegexForMatchWord(par_as_string);
+ opt.matchword = false;
+ }
regexp_str = "(" + lead_as_regexp + ")()" + par_as_string;
regexp2_str = "(" + lead_as_regexp + ")(.*?)" + par_as_string;
}
LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'");
LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'");
-#if QTSEARCH
- // Handle \w properly
- QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption;
- if (! opt.casesensitive) {
- popts |= QRegularExpression::CaseInsensitiveOption;
- }
- regexp = QRegularExpression(QString::fromStdString(regexp_str), popts);
- regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts);
- regexError = "";
- if (regexp.isValid() && regexp2.isValid()) {
- regexIsValid = true;
- // Check '{', '}' pairs inside the regex
- int balanced = 0;
- int skip = 1;
- for (unsigned i = 0; i < par_as_string.size(); i+= skip) {
- char c = par_as_string[i];
- if (c == '\\') {
- skip = 2;
- continue;
- }
- if (c == '{')
- balanced++;
- else if (c == '}') {
- balanced--;
- if (balanced < 0)
- break;
- }
- skip = 1;
- }
- if (balanced != 0) {
- regexIsValid = false;
- regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\"";
- }
- }
- else {
- regexIsValid = false;
- if (!regexp.isValid())
- regexError += "Invalid regexp \"" + regexp_str + "\", error = " + regexp.errorString().toStdString();
- if (!regexp2.isValid())
- regexError += "Invalid regexp2 \"" + regexp2_str + "\", error = " + regexp2.errorString().toStdString();
- }
-#else
- if (opt.casesensitive) {
- regexp = regex(regexp_str);
- regexp2 = regex(regexp2_str);
- }
- else {
- regexp = regex(regexp_str, std::regex_constants::icase);
- regexp2 = regex(regexp2_str, std::regex_constants::icase);
- }
-#endif
+ CreateRegexp(opt, regexp_str, regexp2_str, par_as_string);
}
}
docstring docstr = stringifyFromForSearch(opt, cur, len);
string str;
if (use_regexp || opt.casesensitive)
- str = normalize(docstr, true);
+ str = normalize(docstr);
else
- str = normalize(lowercase(docstr), true);
+ str = normalize(lowercase(docstr));
if (!opt.ignoreformat) {
str = correctlanguagesetting(str, false, !opt.ignoreformat);
}
QRegularExpressionMatch match = p_regexp->match(qstr, 0, flags);
if (!match.hasMatch())
return mres;
- // Check braces on segments that matched all (.*?) subexpressions,
- // except the last "padding" one inserted by lyx.
- for (int i = 3; i < match.lastCapturedIndex(); ++i)
- if (!braces_match(match.captured(i), open_braces))
- return mres;
#else
regex const *p_regexp;
regex_constants::match_flag_type flags;
if (re_it == sregex_iterator())
return mres;
match_results<string::const_iterator> const & m = *re_it;
- // Check braces on segments that matched all (.*?) subexpressions,
- // except the last "padding" one inserted by lyx.
- for (size_t i = 3; i < m.size() - 1; ++i)
- if (!braces_match(m[i], open_braces))
- return mres;
#endif
- // Exclude from the returned match length any length
- // due to close wildcards added at end of regexp
- // and also the length of the leading (e.g. '\emph{}')
+ // Whole found string, including the leading
+ // std: m[0].second - m[0].first
+ // Qt: match.capturedEnd(0) - match.capturedStart(0)
//
- // Whole found string, including the leading: m[0].second - m[0].first
- // Size of the leading string: m[1].second - m[1].first
+ // Size of the leading string
+ // std: m[1].second - m[1].first
+ // Qt: match.capturedEnd(1) - match.capturedStart(1)
int leadingsize = 0;
- int result;
#if QTSEARCH
if (match.lastCapturedIndex() > 0) {
leadingsize = match.capturedEnd(1) - match.capturedStart(1);
}
- int lastidx = match.lastCapturedIndex();
- for (int i = 0; i <= lastidx; i++) {
- LYXERR(Debug::FIND, "Match " << i << " is " << match.capturedEnd(i) - match.capturedStart(i) << " long");
- }
- if (close_wildcards == 0)
- result = match.capturedEnd(0) - match.capturedStart(0);
- else
- result = match.capturedStart(lastidx + 1 - close_wildcards) - match.capturedStart(0);
#else
if (m.size() > 2) {
leadingsize = m[1].second - m[1].first;
}
- for (size_t i = 0; i < m.size(); i++) {
- LYXERR(Debug::FIND, "Match " << i << " is " << m[i].second - m[i].first << " long");
- }
- if (close_wildcards == 0)
- result = m[0].second - m[0].first;
- else
- result = m[m.size() - close_wildcards].first - m[0].first;
#endif
- if (result > leadingsize)
- result -= leadingsize;
- else
- result = 0;
#if QTSEARCH
mres.match_prefix = match.capturedEnd(2) - match.capturedStart(2);
mres.match_len = match.capturedEnd(0) - match.capturedEnd(2);
if (mres.match2end < 0)
mres.match_len = 0;
mres.leadsize = leadingsize;
+#if QTSEARCH
+ if (mres.match_len > 0) {
+ string a0 = match.captured(0).mid(mres.pos + mres.match_prefix, mres.match_len).toStdString();
+ mres.result.push_back(a0);
+ for (int i = 3; i <= match.lastCapturedIndex(); i++) {
+ mres.result.push_back(match.captured(i).toStdString());
+ }
+ }
+#else
+ if (mres.match_len > 0) {
+ string a0 = m[0].str().substr(mres.pos + mres.match_prefix, mres.match_len);
+ mres.result.push_back(a0);
+ for (size_t i = 3; i < m.size(); i++) {
+ mres.result.push_back(m[i]);
+ }
+ }
+#endif
return mres;
}
int res = mres.match_len;
LYXERR(Debug::FIND,
"res=" << res << ", at_begin=" << at_begin
- << ", matchword=" << opt.matchword
+ << ", matchstart=" << opt.matchstart
<< ", inTexted=" << cur.inTexted());
+ if (opt.matchstart) {
+ if (cur.pos() != 0)
+ mres.match_len = 0;
+ else if (mres.match_prefix > 0)
+ mres.match_len = 0;
+ return mres;
+ }
+ else
+ return mres;
+ /* DEAD CODE follows
if (res == 0 || !at_begin || !opt.matchword || !cur.inTexted())
return mres;
if ((len > 0) && (res < len)) {
}
mres.match_len = 0;
return mres;
+ */
}
+#if 0
static bool simple_replace(string &t, string from, string to)
{
regex repl("(\\\\)*(" + from + ")");
t = s;
return true;
}
+#endif
-string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const
+string MatchStringAdv::normalize(docstring const & s) const
{
string t;
t = lyx::to_utf8(s);
while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", ""));
- // FIXME - check what preceeds the brace
- if (hack_braces) {
- if (opt.ignoreformat)
- while (regex_replace(t, t, "\\{", "_x_<")
- || regex_replace(t, t, "\\}", "_x_>"))
- LYXERR(Debug::FIND, "After {} replacement: '" << t << "'");
- else {
- simple_replace(t, "\\\\\\{", "_x_<");
- simple_replace(t, "\\\\\\}", "_x_>");
- LYXERR(Debug::FIND, "After {} replacement: '" << t << "'");
- }
- }
return t;
}
return ods.str();
}
-#if 0
+#if defined(ResultsDebug)
// Debugging output
static void displayMResult(MatchResult &mres, int increment)
{
LYXERR0( "match_len: " << mres.match_len);
LYXERR0( "match_prefix: " << mres.match_prefix);
LYXERR0( "match2end: " << mres.match2end);
+ LYXERR0( "pos_len: " << mres.pos_len); // Set in finalize
+ for (size_t i = 0; i < mres.result.size(); i++)
+ LYXERR0( "Match " << i << " = \"" << mres.result[i] << "\"");
}
#define displayMres(s,i) displayMResult(s,i);
#else
#define displayMres(s,i)
#endif
+/*
+ * Not good, we miss possible matches containing also characters not found in
+ * the innermost depth.
static bool findAdvForwardInnermost(DocIterator & cur)
{
size_t d;
- DocIterator old_cur(cur.buffer());
+ DocIterator old_cur = cur;
int forwardCount = 0;
do {
d = cur.depth();
else
return false;
}
+*/
/** Finalize an advanced find operation, advancing the cursor to the innermost
** position that matches, plus computing the length of the matching text to
** be selected
+ ** Return the cur.pos() difference between start and end of found match
**/
-int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, int expected_len, int prefix_len = 0)
+MatchResult &findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, MatchResult const & expected = MatchResult(-1))
{
// Search the foremost position that matches (avoids find of entire math
// inset when match at start of it)
DocIterator old_cur(cur.buffer());
MatchResult mres;
- int max_match;
+ static MatchResult fail = MatchResult();
+ static MatchResult max_match;
// If (prefix_len > 0) means that forwarding 1 position will remove the complete entry
// Happens with e.g. hyperlinks
// either one sees "http://www.bla.bla" or nothing
// so the search for "www" gives prefix_len = 7 (== sizeof("http://")
// and although we search for only 3 chars, we find the whole hyperlink inset
- bool at_begin = (prefix_len == 0);
- if (findAdvForwardInnermost(cur)) {
+ bool at_begin = (expected.match_prefix == 0);
+ //if (findAdvForwardInnermost(cur)) {
+ if (0) {
mres = match(cur, -1, at_begin);
displayMres(mres, 0);
- if (expected_len > 0) {
- if (mres.match_len < expected_len)
- return 0;
+ if (expected.match_len > 0) {
+ if (mres.match_len < expected.match_len)
+ return fail;
}
else {
if (mres.match_len <= 0)
- return 0;
+ return fail;
}
- max_match = mres.match_len;
+ max_match = mres;
}
- else if (expected_len < 0) {
+ else if (expected.match_len < 0) {
mres = match(cur); /* match valid only if not searching whole words */
displayMres(mres, 0);
- max_match = mres.match_len;
+ max_match = mres;
}
else {
- max_match = expected_len;
+ max_match = expected;
}
- if (max_match <= 0) return 0;
+ if (max_match.match_len <= 0) return fail;
LYXERR(Debug::FIND, "Ok");
// Compute the match length
- int len = 1;
+ int len = 1;
if (cur.pos() + len > cur.lastpos())
- return 0;
+ return fail;
// regexp should use \w+, \S+, or \b(some string)\b
// to search for whole words
if (match.opt.matchword && !match.use_regexp) {
- LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
- while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) {
- ++len;
- LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
- }
- // Length of matched text (different from len param)
- int old_match = match(cur, len, at_begin).match_len;
- if (old_match < 0)
- old_match = 0;
- int new_match;
- // Greedy behaviour while matching regexps
- while ((new_match = match(cur, len + 1, at_begin).match_len) > old_match) {
- ++len;
- old_match = new_match;
- LYXERR(Debug::FIND, "verifying match with len = " << len);
- }
- if (old_match == 0)
- len = 0;
- }
- else {
+ LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
+ while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) {
+ ++len;
+ LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
+ }
+ // Length of matched text (different from len param)
+ static MatchResult old_match = match(cur, len, at_begin);
+ if (old_match.match_len < 0)
+ old_match = fail;
+ MatchResult new_match;
+ // Greedy behaviour while matching regexps
+ while ((new_match = match(cur, len + 1, at_begin)).match_len > old_match.match_len) {
+ ++len;
+ old_match = new_match;
+ LYXERR(Debug::FIND, "verifying match with len = " << len);
+ }
+ return old_match;
+ }
+ else {
int minl = 1;
int maxl = cur.lastpos() - cur.pos();
// Greedy behaviour while matching regexps
while (maxl > minl) {
- MatchResult mres2;
- mres2 = match(cur, len, at_begin);
- displayMres(mres2, len);
+ MatchResult mres2;
+ mres2 = match(cur, len, at_begin);
+ displayMres(mres2, len);
int actual_match = mres2.match_len;
- if (actual_match >= max_match) {
+ if (actual_match >= max_match.match_len) {
// actual_match > max_match _can_ happen,
// if the search area splits
// some following word so that the regex
old_cur = cur;
// Search for real start of matched characters
while (len > 1) {
- int actual_match;
+ MatchResult actual_match;
do {
cur.forwardPos();
} while (cur.depth() > old_cur.depth()); /* Skip inner insets */
}
if (cur.pos() != old_cur.pos()) {
// OK, forwarded 1 pos in actual inset
- actual_match = match(cur, len-1, at_begin).match_len;
- if (actual_match == max_match) {
+ actual_match = match(cur, len-1, at_begin);
+ if (actual_match.match_len == max_match.match_len) {
// Ha, got it! The shorter selection has the same match length
len--;
old_cur = cur;
}
else {
LYXERR(Debug::INFO, "cur.pos() == old_cur.pos(), this should never happen");
- actual_match = match(cur, len, at_begin).match_len;
- if (actual_match == max_match)
+ actual_match = match(cur, len, at_begin);
+ if (actual_match.match_len == max_match.match_len) {
old_cur = cur;
+ max_match = actual_match;
+ }
}
}
- }
- return len;
+ if (len == 0)
+ return fail;
+ else {
+ max_match.pos_len = len;
+ return max_match;
+ }
+ }
}
/// Finds forward
-int findForwardAdv(DocIterator & cur, MatchStringAdv const & match)
+int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
{
if (!cur)
return 0;
while (!theApp()->longOperationCancelled() && cur) {
- (void) findAdvForwardInnermost(cur);
+ //(void) findAdvForwardInnermost(cur);
LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
MatchResult mres = match(cur, -1, false);
displayMres(mres,-1)
// LYXERR0("Leaving first loop");
{
LYXERR(Debug::FIND, "Finalizing 1");
- int len = findAdvFinalize(cur, match, mres.match_len, mres.match_prefix);
- if (len > 0)
- return len;
+ MatchResult found_match = findAdvFinalize(cur, match, mres);
+ if (found_match.match_len > 0) {
+ LASSERT(found_match.pos_len > 0, /**/);
+ match.FillResults(found_match);
+ return found_match.pos_len;
+ }
else {
// try next possible match
cur.forwardPos();
// Sometimes in finalize we understand it wasn't a match
// and we need to continue the outest loop
LYXERR(Debug::FIND, "Finalizing 2");
- int len = findAdvFinalize(cur, match, mres.match_len);
- if (len > 0) {
- return len;
+ MatchResult mres4 = findAdvFinalize(cur, match, mres.match_len);
+ if (mres4.match_len > 0) {
+ match.FillResults(mres4);
+ LASSERT(mres4.pos_len > 0, /**/);
+ return mres4.pos_len;
}
}
if (match_len2 > 0)
/// Find the most backward consecutive match within same paragraph while searching backwards.
-int findMostBackwards(DocIterator & cur, MatchStringAdv const & match)
+MatchResult &findMostBackwards(DocIterator & cur, MatchStringAdv const & match)
{
DocIterator cur_begin = doc_iterator_begin(cur.buffer());
DocIterator tmp_cur = cur;
- int len = findAdvFinalize(tmp_cur, match, -1);
+ static MatchResult mr = findAdvFinalize(tmp_cur, match, MatchResult(-1));
Inset & inset = cur.inset();
for (; cur != cur_begin; cur.backwardPos()) {
LYXERR(Debug::FIND, "findMostBackwards(): cur=" << cur);
new_cur.backwardPos();
if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len)
break;
- int new_len = findAdvFinalize(new_cur, match, -1);
- if (new_len == len)
+ MatchResult new_mr = findAdvFinalize(new_cur, match, MatchResult(-1));
+ if (new_mr.match_len == mr.match_len)
break;
- len = new_len;
+ mr = new_mr;
}
LYXERR(Debug::FIND, "findMostBackwards(): exiting with cur=" << cur);
- return len;
+ return mr;
}
found_match = (match(cur).match_len > 0);
LYXERR(Debug::FIND, "findBackAdv3: found_match="
<< found_match << ", cur: " << cur);
- if (found_match)
- return findMostBackwards(cur, match);
+ if (found_match) {
+ MatchResult found_mr = findMostBackwards(cur, match);
+ match.FillResults(found_mr);
+ LASSERT(found_mr.pos_len > 0, /**/);
+ return found_mr.pos_len;
+ }
// Stop if begin of document reached
if (cur == cur_begin)
right = pit->size();
pit->changeCase(buffer.params(), pos_type(1), right, others_case);
}
-
} // namespace
+#if 1
+static bool replaceMatches(string &t, int maxmatchnum, vector <string> const & replacements)
+{
+ // Should replace the string "$" + std::to_string(matchnum) with replacement
+ // if the char '$' is not prefixed with odd number of char '\\'
+ static regex const rematch("(\\\\)*(\\$\\$([0-9]))");
+ string s;
+ size_t lastpos = 0;
+ smatch sub;
+ for (sregex_iterator it(t.begin(), t.end(), rematch), end; it != end; ++it) {
+ sub = *it;
+ if ((sub.position(2) - sub.position(0)) % 2 == 1)
+ continue;
+ int num = stoi(sub.str(3), nullptr, 10);
+ if (num >= maxmatchnum)
+ continue;
+ if (lastpos < (size_t) sub.position(2))
+ s += t.substr(lastpos, sub.position(2) - lastpos);
+ s += replacements[num];
+ lastpos = sub.position(2) + sub.length(2);
+ }
+ if (lastpos == 0)
+ return false;
+ else if (lastpos < t.length())
+ s += t.substr(lastpos, t.length() - lastpos);
+ t = s;
+ return true;
+}
+#endif
+
///
static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv)
{
ostringstream oss;
repl_buffer_orig.write(oss);
string lyx = oss.str();
+ if (matchAdv.valid_matches > 0) {
+ replaceMatches(lyx, matchAdv.valid_matches, matchAdv.matches);
+ }
Buffer repl_buffer("", false);
repl_buffer.setUnnamed(true);
LASSERT(repl_buffer.readString(lyx), return 0);
/// Perform a FindAdv operation.
-bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt)
+bool findAdv(BufferView * bv, FindAndReplaceOptions & opt)
{
DocIterator cur;
- int match_len = 0;
+ int pos_len = 0;
// e.g., when invoking word-findadv from mini-buffer wither with
// wrong options syntax or before ever opening advanced F&R pane
num_replaced += findAdvReplace(bv, opt, matchAdv);
cur = bv->cursor();
if (opt.forward)
- match_len = findForwardAdv(cur, matchAdv);
+ pos_len = findForwardAdv(cur, matchAdv);
else
- match_len = findBackwardsAdv(cur, matchAdv);
+ pos_len = findBackwardsAdv(cur, matchAdv);
} catch (exception & ex) {
bv->message(from_utf8(ex.what()));
return false;
}
- if (match_len == 0) {
+ if (pos_len == 0) {
if (num_replaced > 0) {
switch (num_replaced)
{
else
bv->message(_("Match found."));
- LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << match_len);
- bv->putSelectionAt(cur, match_len, !opt.forward);
+ if (cur.pos() + pos_len > cur.lastpos()) {
+ // Prevent crash in bv->putSelectionAt()
+ // Should never happen, maybe LASSERT() here?
+ pos_len = cur.lastpos() - cur.pos();
+ }
+ LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << pos_len);
+ bv->putSelectionAt(cur, pos_len, !opt.forward);
return true;
}