#include <map>
#include <regex>
-//#define ResultsDebug
+#define ResultsDebug
#define USE_QT_FOR_SEARCH
#if defined(USE_QT_FOR_SEARCH)
#include <QtCore> // sets QT_VERSION
namespace lyx {
+typedef map<string, string> AccentsMap;
+static AccentsMap accents = map<string, string>();
// Helper class for deciding what should be ignored
class IgnoreFormats {
typedef vector<pair<string, string> > Escapes;
-/// A map of symbols and their escaped equivalent needed within a regex.
-/// @note Beware of order
-/*
-Escapes const & get_regexp_escapes()
-{
- typedef std::pair<std::string, std::string> P;
-
- static Escapes escape_map;
- if (escape_map.empty()) {
- escape_map.push_back(P("$", "_x_$"));
- escape_map.push_back(P("{", "_x_{"));
- escape_map.push_back(P("}", "_x_}"));
- escape_map.push_back(P("[", "_x_["));
- escape_map.push_back(P("]", "_x_]"));
- escape_map.push_back(P("(", "_x_("));
- escape_map.push_back(P(")", "_x_)"));
- escape_map.push_back(P("+", "_x_+"));
- escape_map.push_back(P("*", "_x_*"));
- escape_map.push_back(P(".", "_x_."));
- escape_map.push_back(P("\\", "(?:\\\\|\\\\backslash)"));
- escape_map.push_back(P("~", "(?:\\\\textasciitilde|\\\\sim)"));
- escape_map.push_back(P("^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\textasciicircum|\\\\mathcircumflex)"));
- escape_map.push_back(P("_x_", "\\"));
- }
- return escape_map;
-}
-*/
-
-/// A map of lyx escaped strings and their unescaped equivalent.
-/*
-Escapes const & get_lyx_unescapes()
-{
- typedef std::pair<std::string, std::string> P;
-
- static Escapes escape_map;
- if (escape_map.empty()) {
- escape_map.push_back(P("\\%", "%"));
- escape_map.push_back(P("\\{", "{"));
- escape_map.push_back(P("\\}", "}"));
- escape_map.push_back(P("\\mathcircumflex ", "^"));
- escape_map.push_back(P("\\mathcircumflex", "^"));
- escape_map.push_back(P("\\backslash ", "\\"));
- escape_map.push_back(P("\\backslash", "\\"));
- escape_map.push_back(P("\\sim ", "~"));
- escape_map.push_back(P("\\sim", "~"));
- }
- return escape_map;
-}
-*/
-
-/// A map of escapes turning a regexp matching text to one matching latex.
-/*
-Escapes const & get_regexp_latex_escapes()
-{
- typedef std::pair<std::string, std::string> P;
-
- static Escapes escape_map;
- if (escape_map.empty()) {
- escape_map.push_back(P("\\\\", "(?:\\\\\\\\|\\\\backslash|\\\\textbackslash\\{\\}|\\\\textbackslash)"));
- escape_map.push_back(P("(<?!\\\\\\\\textbackslash)\\{", "\\\\\\{"));
- escape_map.push_back(P("(<?!\\\\\\\\textbackslash\\\\\\{)\\}", "\\\\\\}"));
- escape_map.push_back(P("\\[", "\\{\\[\\}"));
- escape_map.push_back(P("\\]", "\\{\\]\\}"));
- escape_map.push_back(P("\\^", "(?:\\^|\\\\textasciicircum\\{\\}|\\\\textasciicircum|\\\\mathcircumflex)"));
- escape_map.push_back(P("%", "\\\\\\%"));
- escape_map.push_back(P("#", "\\\\#"));
- }
- return escape_map;
-}
-*/
-
-/** @todo Probably the maps need to be migrated to regexps, in order to distinguish if
- ** the found occurrence were escaped.
- **/
-/*
-string apply_escapes(string s, Escapes const & escape_map)
-{
- LYXERR(Debug::FIND, "Escaping: '" << s << "'");
- Escapes::const_iterator it;
- for (it = escape_map.begin(); it != escape_map.end(); ++it) {
-// LYXERR(Debug::FIND, "Escaping " << it->first << " as " << it->second);
- unsigned int pos = 0;
- while (pos < s.length() && (pos = s.find(it->first, pos)) < s.length()) {
- s.replace(pos, it->first.length(), it->second);
- LYXERR(Debug::FIND, "After escape: " << s);
- pos += it->second.length();
-// LYXERR(Debug::FIND, "pos: " << pos);
- }
- }
- LYXERR(Debug::FIND, "Escaped : '" << s << "'");
- return s;
-}
-*/
-
-
string string2regex(string in)
{
static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\$\\])" };
return temp2;
}
-string correctRegex(string t)
+string correctRegex(string t, bool withformat)
{
/* Convert \backslash => \
* and \{, \}, \[, \] => {, }, [, ]
regex wordre("(\\\\)*(\\\\((backslash|mathcircumflex) ?|[\\[\\]\\{\\}]))");
size_t lastpos = 0;
smatch sub;
+ bool backslashed = false;
for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) {
sub = *it;
string replace;
continue;
}
else {
- if (sub.str(4) == "backslash")
+ if (sub.str(4) == "backslash") {
replace = "\\";
+ if (withformat) {
+ // transforms '\backslash \{' into '\{'
+ // and '\{' into '{'
+ string next = t.substr(sub.position(2) + sub.str(2).length(), 2);
+ if ((next == "\\{") || (next == "\\}")) {
+ replace = "";
+ backslashed = true;
+ }
+ }
+ }
else if (sub.str(4) == "mathcircumflex")
replace = "^";
- else if (sub.str(3) == "{")
- replace = "\\braceleft";
- else if (sub.str(3) == "}")
- replace = "\\braceright";
+ else if (backslashed) {
+ backslashed = false;
+ if (withformat && (sub.str(3) == "{"))
+ replace = accents["braceleft"];
+ else if (withformat && (sub.str(3) == "}"))
+ replace = accents["braceright"];
+ else {
+ // else part should not exist
+ LASSERT(1, /**/);
+ }
+ }
else
replace = sub.str(3);
}
/// Within \regexp{} apply get_lyx_unescapes() only (i.e., preserve regexp semantics of the string),
/// while outside apply get_lyx_unescapes()+get_regexp_escapes().
/// If match_latex is true, then apply regexp_latex_escapes() to \regexp{} contents as well.
-string escape_for_regex(string s)
+string escape_for_regex(string s, bool withformat)
{
size_t lastpos = 0;
string result = "";
break;
}
size_t end_pos = s.find("\\endregexp{}}", regex_pos + 8);
- result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)));
+ result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)), withformat);
lastpos = end_pos + 13;
}
return result;
return rv;
}
-#if 0
-/** Checks if supplied string segment is well-formed from the standpoint of matching open-closed braces.
- **
- ** Verify that closed braces exactly match open braces. This avoids that, for example,
- ** \frac{.*}{x} matches \frac{x+\frac{y}{x}}{z} with .* being 'x+\frac{y'.
- **
- ** @param unmatched
- ** Number of open braces that must remain open at the end for the verification to succeed.
- **/
-#if QTSEARCH
-bool braces_match(QString const & beg,
- int unmatched = 0)
-#else
-bool braces_match(string const & beg,
- int unmatched = 0)
-#endif
-{
- int open_pars = 0;
-#if QTSEARCH
- LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg.toStdString() << "'");
-#else
- LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg << "'");
-#endif
- int lastidx = beg.size();
- for (int i=0; i < lastidx; ++i) {
- // Skip escaped braces in the count
-#if QTSEARCH
- QChar c = beg.at(i);
-#else
- char c = beg.at(i);
-#endif
- if (c == '\\') {
- ++i;
- if (i >= lastidx)
- break;
- } else if (c == '{') {
- ++open_pars;
- } else if (c == '}') {
- if (open_pars == 0) {
- LYXERR(Debug::FIND, "Found unmatched closed brace");
- return false;
- } else
- --open_pars;
- }
- }
- if (open_pars != unmatched) {
- LYXERR(Debug::FIND, "Found " << open_pars
- << " instead of " << unmatched
- << " unmatched open braces at the end of count");
- return false;
- }
- LYXERR(Debug::FIND, "Braces match as expected");
- return true;
-}
-#endif
-
class MatchResult {
public:
enum range {
static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres)
{
- int range = oldres.match_len;
- if (range > 0) range--;
- if (newres.match2end < oldres.match2end - oldres.match_len)
+ if (newres.match2end < oldres.match2end)
return MatchResult::newIsTooFar;
if (newres.match_len < oldres.match_len)
return MatchResult::newIsTooFar;
- if ((newres.match_len == oldres.match_len) &&
- (newres.match2end < oldres.match2end + range) &&
- (newres.match2end > oldres.match2end - range)) {
- return MatchResult::newIsBetter;
+
+ if (newres.match_len == oldres.match_len) {
+ if (newres.match2end == oldres.match2end)
+ return MatchResult::newIsBetter;
}
return MatchResult::newIsInvalid;
}
}
}
-typedef map<string, string> AccentsMap;
-static AccentsMap accents = map<string, string>();
-
static void buildaccent(string n, string param, string values)
{
stringstream s(n);
accents["latexe"] = getutf8(0xf0013);
accents["LaTeXe"] = getutf8(0xf0013);
accents["lyxarrow"] = getutf8(0xf0020);
+ accents["braceleft"] = getutf8(0xf0030);
+ accents["braceright"] = getutf8(0xf0031);
accents["backslash lyx"] = getutf8(0xf0010); // Used logos inserted with starting \backslash
accents["backslash LyX"] = getutf8(0xf0010);
accents["backslash tex"] = getutf8(0xf0011);
buildAccentsMap();
static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|"
"cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}"
- "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|guillemot(left|right)|textasciicircum|mathcircumflex|sim)(?![a-zA-Z]))");
+ "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|(brace|guillemot)(left|right)|textasciicircum|mathcircumflex|sim)(?![a-zA-Z]))");
smatch sub;
for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
sub = *itacc;
// Known charaters
// No split
makeKey("backslash|textbackslash|slash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
- makeKey("braceleft|braceright", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("textasciicircum|textasciitilde", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("textasciiacute|texemdash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("dots|ldots", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
// can be digested by our search engine
LYXERR(Debug::FIND, "input: \"" << par << "\"");
result = splitOnKnownMacros(par.substr(0,parlen), isPatternString);
- LYXERR(Debug::FIND, "After split: \"" << result << "\"");
+ LYXERR(Debug::FIND, "After splitOnKnownMacros:\n\"" << result << "\"");
}
else
result = par.substr(0, parlen);
LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'");
LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
}
- LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'");
- par_as_string = escape_for_regex(par_as_string);
+ // LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'");
+ par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat);
// Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them.
- LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
- LYXERR(Debug::FIND, "par_as_string after correctRegex is '" << par_as_string << "'");
+ // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
++close_wildcards;
size_t lng = par_as_string.size();
if (!opt.ignoreformat) {
if (lng < par_as_string.size())
par_as_string = par_as_string.substr(0,lng);
}
+ LYXERR(Debug::FIND, "par_as_string after correctRegex is '" << par_as_string << "'");
if ((lng > 0) && (par_as_string[0] == '^')) {
par_as_string = par_as_string.substr(1);
--lng;
opt.matchstart = true;
}
- LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
- LYXERR(Debug::FIND, "Open braces: " << open_braces);
- LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string);
+ // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
+ // LYXERR(Debug::FIND, "Open braces: " << open_braces);
+ // LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string);
// If entered regexp must match at begin of searched string buffer
// Kornel: Added parentheses to use $1 for size of the leading string
}
}
-#if 0
-// Count number of characters in string
-// {]} ==> 1
-// \& ==> 1
-// --- ==> 1
-// \\[a-zA-Z]+ ==> 1
-#if QTSEARCH
-static int computeSize(QStringRef s, int len)
-#define isLyxAlpha(arg) arg.isLetter()
-#else
-static int computeSize(string s, int len)
-#define isLyxAlpha(arg) isalpha(arg)
-#endif
-{
- if (len == 0)
- return 0;
- int skip = 1;
- int count = 0;
- for (int i = 0; i < len; i += skip, count++) {
- if (s.at(i) == '\\') {
- skip = 2;
- if (i + 1 < len && isLyxAlpha(s.at(i+1))) {
- for (int j = 2; i+j < len; j++) {
- if (! isLyxAlpha(s.at(i+j))) {
- if (s.at(i+j) == ' ')
- skip++;
- else if (s.at(i+j) == '{') {
- if (i+j+1 < len && s.at(i+j+1) == '}')
- skip += 2;
- else if (i + j + 1 >= len)
- skip++;
- }
- break;
- }
- skip++;
- }
- }
- }
- else if (s.at(i) == '{') {
- if (i + 1 < len && s.at(i+1) == '}')
- skip = 2;
- else
- skip = 3;
- }
- else if (s.at(i) == '-') {
- if (i+1 < len && s.at(i+1) == '-') {
- if (i + 2 < len && s.at(i+2) == '-')
- skip = 3;
- else
- skip = 2;
- }
- else
- skip = 1;
- }
- else {
- skip = 1;
- }
- }
- return count;
-}
-#endif
-
MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const
{
MatchResult mres;
docstring docstr = stringifyFromForSearch(opt, cur, len);
string str;
- if (use_regexp || opt.casesensitive)
- str = normalize(docstr);
- else
- str = normalize(lowercase(docstr));
+ str = normalize(docstr);
if (!opt.ignoreformat) {
str = correctlanguagesetting(str, false, !opt.ignoreformat);
+ // remove closing '}' and '\n' to allow for use of '$' in regex
+ size_t lng = str.size();
+ while ((lng > 1) && ((str[lng -1] == '}') || (str[lng -1] == '\n')))
+ lng--;
+ if (lng != str.size()) {
+ str = str.substr(0, lng);
+ }
}
if (str.empty()) {
mres.match_len = -1;
return mres;
}
- LYXERR(Debug::FIND, "Matching against '" << lyx::to_utf8(docstr) << "'");
- LYXERR(Debug::FIND, "After normalization: '" << str << "'");
+ LYXERR(Debug::FIND, "After normalization: Matching against:\n'" << str << "'");
- if (use_regexp) {
+ LASSERT(use_regexp, /**/);
+ {
+ // use_regexp always true
LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin);
#if QTSEARCH
QString qstr = QString::fromStdString(str);
int matchend = match.capturedEnd(0);
while (mres.match_len > 0) {
QChar c = qstr.at(matchend - 1);
- if (c == '\n') {
+ if ((c == '\n') || (c == '}') || (c == '{')) {
mres.match_len--;
matchend--;
}
#endif
return mres;
}
-
- // else !use_regexp: but all code paths above return
- LYXERR(Debug::FIND, "Searching in normal mode: par_as_string='"
- << par_as_string << "', str='" << str << "'");
- LYXERR(Debug::FIND, "Searching in normal mode: lead_as_string='"
- << lead_as_string << "', par_as_string_nolead='"
- << par_as_string_nolead << "'");
-
- if (at_begin) {
- LYXERR(Debug::FIND, "size=" << par_as_string.size()
- << ", substr='" << str.substr(0, par_as_string.size()) << "'");
- if (str.substr(0, par_as_string.size()) == par_as_string) {
- mres.match_len = par_as_string.size();
- mres.match2end = str.size();
- mres.pos = 0;
- return mres;
- }
- } else {
- // Start the search _after_ the leading part
- size_t pos = str.find(par_as_string_nolead, lead_as_string.size());
- if (pos != string::npos) {
- mres.match_len = par_as_string.size();
- mres.match2end = str.size() - pos;
- mres.pos = pos;
- return mres;
- }
- }
- return mres;
}
// Remove stale empty \emph{}, \textbf{} and similar blocks from latexify
// Kornel: Added textsl, textsf, textit, texttt and noun
// + allow to seach for colored text too
- LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, \\*section{} macros from: " << t);
+ LYXERR(Debug::FIND, "Removing stale empty macros from: " << t);
while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", ""))
LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
*/
docstring latexifyFromCursor(DocIterator const & cur, int len)
{
+ /*
LYXERR(Debug::FIND, "Latexifying with len=" << len << " from cursor at pos: " << cur);
LYXERR(Debug::FIND, " with cur.lastpost=" << cur.lastpos() << ", cur.lastrow="
<< cur.lastrow() << ", cur.lastcol=" << cur.lastcol());
+ */
Buffer const & buf = *cur.buffer();
odocstringstream ods;
int len = 1;
if (cur.pos() + len > cur.lastpos())
return fail;
- // regexp should use \w+, \S+, or \b(some string)\b
- // to search for whole words
- if (match.opt.matchword && !match.use_regexp) {
- LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
- while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) {
- ++len;
- LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
- }
- // Length of matched text (different from len param)
- static MatchResult old_match = match(cur, len, at_begin);
- if (old_match.match_len < 0)
- old_match = fail;
- MatchResult new_match;
- // Greedy behaviour while matching regexps
- while ((new_match = match(cur, len + 1, at_begin)).match_len > old_match.match_len) {
- ++len;
- old_match = new_match;
- LYXERR(Debug::FIND, "verifying match with len = " << len);
- }
- displayMres(old_match, "SEARCH RESULT", cur)
- return old_match;
- }
- else {
+
+ LASSERT(match.use_regexp, /**/);
+ {
int minl = 1;
int maxl = cur.lastpos() - cur.pos();
// Greedy behaviour while matching regexps
if (!cur)
return 0;
bool repeat = false;
+ DocIterator orig_cur; // to be used if repeat not successful
+ MatchResult orig_mres;
while (!theApp()->longOperationCancelled() && cur) {
//(void) findAdvForwardInnermost(cur);
LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
default:
// Todo@
// Handle not like MatchResult::newIsTooFar
- LYXERR0( "Something is wrong: Increment = " << increment << " match_prefix = " << mres.match_prefix);
+ LYXERR0( "Probably too far: Increment = " << increment << " match_prefix = " << mres.match_prefix);
firstInvalid--;
increment = increment*3/4;
cur = old_cur;
}
}
}
- if (mres.match_len > 0 && mres.match_prefix + mres.pos - mres.leadsize > 0) {
- repeat = true;
- cur.forwardPos();
- continue;
+ if (mres.match_len > 0) {
+ if (mres.match_prefix + mres.pos - mres.leadsize > 0) {
+ repeat = true;
+ orig_cur = cur;
+ orig_mres = mres;
+ cur.forwardPos();
+ continue;
+ }
+ }
+ else if (repeat) {
+ // seems to never be reached.
+ cur = orig_cur;
+ mres = orig_mres;
}
// LYXERR0("Leaving first loop");
LYXERR(Debug::FIND, "Finalizing 1");
}
} // namespace
-#if 1
static bool replaceMatches(string &t, int maxmatchnum, vector <string> const & replacements)
{
// Should replace the string "$" + std::to_string(matchnum) with replacement
t = s;
return true;
}
-#endif
///
static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv)
istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt)
{
- LYXERR(Debug::FIND, "parsing");
+ // LYXERR(Debug::FIND, "parsing");
string s;
string line;
getline(is, line);
break;
getline(is, line);
}
- LYXERR(Debug::FIND, "file_buf_name: '" << s << "'");
+ // LYXERR(Debug::FIND, "file_buf_name: '" << s << "'");
opt.find_buf_name = from_utf8(s);
is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat >> opt.replace_all;
is.get(); // Waste space before replace string
break;
getline(is, line);
}
- LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'");
+ // LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'");
opt.repl_buf_name = from_utf8(s);
is >> opt.keep_case;
int i;
is >> i;
opt.restr = FindAndReplaceOptions::SearchRestriction(i);
+ /*
LYXERR(Debug::FIND, "parsed: " << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' '
<< opt.expandmacros << ' ' << opt.ignoreformat << ' ' << opt.keep_case << ' '
<< opt.scope << ' ' << opt.restr);
+ */
return is;
}