* \author Jürgen Vigna
* \author Alfredo Braunstein
* \author Tommaso Cucinotta
+ * \author Kornel Benko
*
* Full author contact details are available in file CREDITS.
*/
#include <map>
#include <regex>
+#define USE_QT_FOR_SEARCH
+#if defined(USE_QT_FOR_SEARCH)
+ #include <QtCore> // sets QT_VERSION
+ #if (QT_VERSION >= 0x050000)
+ #include <QRegularExpression>
+ #define QTSEARCH 1
+ #else
+ #define QTSEARCH 0
+ #endif
+#else
+ #define QTSEARCH 0
+#endif
using namespace std;
using namespace lyx::support;
static Escapes escape_map;
if (escape_map.empty()) {
escape_map.push_back(P("\\%", "%"));
+ escape_map.push_back(P("\\{", "{"));
+ escape_map.push_back(P("\\}", "}"));
escape_map.push_back(P("\\mathcircumflex ", "^"));
escape_map.push_back(P("\\mathcircumflex", "^"));
escape_map.push_back(P("\\backslash ", "\\"));
escape_map.push_back(P("\\backslash", "\\"));
- escape_map.push_back(P("\\\\{", "_x_<"));
- escape_map.push_back(P("\\\\}", "_x_>"));
escape_map.push_back(P("\\sim ", "~"));
escape_map.push_back(P("\\sim", "~"));
}
new_pos = s.size();
string t;
if (new_pos > pos) {
+ // outside regexp
LYXERR(Debug::FIND, "new_pos: " << new_pos);
t = apply_escapes(s.substr(pos, new_pos - pos), get_lyx_unescapes());
LYXERR(Debug::FIND, "t [lyx]: " << t);
t = s.substr(new_pos + 8, end_pos - (new_pos + 8));
LYXERR(Debug::FIND, "t in regexp : " << t);
t = apply_escapes(t, get_lyx_unescapes());
- LYXERR(Debug::FIND, "t in regexp [lyx]: " << t);
+ LYXERR(Debug::FIND, "t in regexp after unescapes [lyx]: " << t);
if (match_latex) {
t = apply_escapes(t, get_regexp_latex_escapes());
- LYXERR(Debug::FIND, "t in regexp [ltx]: " << t);
+ LYXERR(Debug::FIND, "t in regexp after latex_escapes [ltx]: " << t);
}
if (end_pos == s.size()) {
s.replace(new_pos, end_pos - new_pos, t);
** @param unmatched
** Number of open braces that must remain open at the end for the verification to succeed.
**/
-bool braces_match(string::const_iterator const & beg,
- string::const_iterator const & end,
+#if QTSEARCH
+bool braces_match(QString const & beg,
int unmatched = 0)
+#else
+bool braces_match(string const & beg,
+ int unmatched = 0)
+#endif
{
int open_pars = 0;
- string::const_iterator it = beg;
- LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << string(beg, end) << "'");
- for (; it != end; ++it) {
+#if QTSEARCH
+ LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg.toStdString() << "'");
+#else
+ LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg << "'");
+#endif
+ int lastidx = beg.size();
+ for (int i=0; i < lastidx; ++i) {
// Skip escaped braces in the count
- if (*it == '\\') {
- ++it;
- if (it == end)
+#if QTSEARCH
+ QChar c = beg.at(i);
+#else
+ char c = beg.at(i);
+#endif
+ if (c == '\\') {
+ ++i;
+ if (i >= lastidx)
break;
- } else if (*it == '{') {
+ } else if (c == '{') {
++open_pars;
- } else if (*it == '}') {
+ } else if (c == '}') {
if (open_pars == 0) {
LYXERR(Debug::FIND, "Found unmatched closed brace");
return false;
class MatchResult {
public:
+ enum range {
+ newIsTooFar,
+ newIsBetter,
+ newIsInvalid
+ };
int match_len;
+ int match_prefix;
int match2end;
int pos;
- MatchResult(): match_len(0),match2end(0), pos(0) {};
+ int leadsize;
+ MatchResult(): match_len(0),match_prefix(0),match2end(0), pos(0),leadsize(0) {};
};
+static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres)
+{
+ if (newres.match2end < oldres.match2end)
+ return MatchResult::newIsTooFar;
+ if (newres.match_len < oldres.match_len)
+ return MatchResult::newIsTooFar;
+ if ((newres.match_len == oldres.match_len) && (newres.match2end == oldres.match2end))
+ return MatchResult::newIsBetter;
+ if ((newres.match_len == oldres.match_len) && (newres.match2end -2 == oldres.match2end)) {
+ // The string contained for instance "\usepackage...fontenc ..."
+ // and now after moved 9 char forward contains "ge...{fontenc} ..."
+ // so we accept it as OK
+ return MatchResult::newIsBetter;
+ }
+ return MatchResult::newIsInvalid;
+}
+
/** The class performing a match between a position in the document and the FindAdvOptions.
**/
+
class MatchStringAdv {
public:
MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt);
** The length of the matching text, or zero if no match was found.
**/
MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const;
+#if QTSEARCH
+ bool regexIsValid;
+ string regexError;
+#endif
public:
/// buffer
// normalized string to search
string par_as_string;
// regular expression to use for searching
+ // regexp2 is same as regexp, but prefixed with a ".*?"
+#if QTSEARCH
+ QRegularExpression regexp;
+ QRegularExpression regexp2;
+#else
regex regexp;
- // same as regexp, but prefixed with a ".*?"
regex regexp2;
+#endif
// leading format material as string
string lead_as_string;
// par_as_string after removal of lead_as_string
// number of (.*?) subexpressions added at end of search regexp for closing
// environments, math mode, styles, etc...
int close_wildcards;
+public:
// Are we searching with regular expressions ?
bool use_regexp;
};
odocstringstream ods;
otexstream os(ods);
runparams.nice = true;
- runparams.flavor = FLAVOR::XETEX;
+ runparams.flavor = Flavor::XeTeX;
runparams.linelen = 10000; //lyxrc.plaintext_linelen;
// No side effect of file copying and image conversion
runparams.dryrun = true;
// OutputParams runparams(&buffer.params().encoding());
OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = true;
- runparams.flavor = FLAVOR::XETEX;
+ runparams.flavor = Flavor::XeTeX;
runparams.linelen = 10000; //lyxrc.plaintext_linelen;
runparams.dryrun = true;
runparams.for_search = true;
AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT,
&runparams);
}
+ // Even in ignore-format we have to remove "\text{}, \lyxmathsym{}" parts
+ string t = to_utf8(str);
+ while (regex_replace(t, t, "\\\\(text|lyxmathsym)\\{([^\\}]*)\\}", "$2"));
+ str = from_utf8(t);
}
return str;
}
// @TODO Support \item[text]
// Kornel: Added textsl, textsf, textit, texttt and noun
// + allow to search for colored text too
- while (regex_replace(t, t, "^\\\\(((footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|emph|noun|minisec|text(bf|md|sl|sf|it|tt))|((textcolor|foreignlanguage|latexenvironment)\\{[a-z]+\\*?\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part|chapter)\\*?)\\{", "")
+ while (regex_replace(t, t, "^\\\\(("
+ "(author|title|subtitle|subject|publishers|dedication|uppertitleback|lowertitleback|extratitle|"
+ "lyxaddress|lyxrightaddress|"
+ "footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|"
+ "emph|noun|minisec|text(bf|md|sl|sf|it|tt))|"
+ "((textcolor|foreignlanguage|latexenvironment)\\{[a-z]+\\*?\\})|"
+ "(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part|chapter)\\*?)\\{", "")
|| regex_replace(t, t, "^\\$", "")
|| regex_replace(t, t, "^\\\\\\[", "")
|| regex_replace(t, t, "^ ?\\\\item\\{[a-z]+\\}", "")
noContent,
/* Char, like \backslash */
isChar,
+ /* replace starting backslash with '#' */
+ isText,
/* \part, \section*, ... */
isSectioning,
/* title, author etc */
isIgnored,
/* like \lettrine[lines=5]{}{} */
cleanToStart,
+ // like isStandard, but always remove head
+ headRemove,
/* End of arguments marker for lettrine,
* so that they can be ignored */
endArguments
// Enable the use of first token again
if (ignoreidx >= 0) {
int value = defLang._tokenstart + defLang._tokensize;
+ int borderidx = 0;
+ if (hasTitle) {
+ borderidx = 1;
+ }
if (value > 0) {
- if (borders[0].low < value)
- borders[0].low = value;
- if (borders[0].upper < value)
- borders[0].upper = value;
+ if (borders[borderidx].low < value)
+ borders[borderidx].low = value;
+ if (borders[borderidx].upper < value)
+ borders[borderidx].upper = value;
}
}
}
}
}
+// Helper function
+static string getutf8(unsigned uchar)
+{
+ #define maxc 5
+ string ret = string();
+ char c[maxc] = {0};
+ if (uchar <= 0x7f) {
+ c[maxc-1] = uchar & 0x7f;
+ }
+ else {
+ unsigned char rest = 0x40;
+ unsigned char first = 0x80;
+ int start = maxc-1;
+ for (int i = start; i >=0; --i) {
+ if (uchar < rest) {
+ c[i] = first + uchar;
+ break;
+ }
+ c[i] = 0x80 | (uchar & 0x3f);
+ uchar >>= 6;
+ rest >>= 1;
+ first >>= 1;
+ first |= 0x80;
+ }
+ }
+ for (int i = 0; i < maxc; i++) {
+ if (c[i] == 0) continue;
+ ret += c[i];
+ }
+ return(ret);
+}
+
static void buildAccentsMap()
{
accents["imath"] = "ı";
accents["i"] = "ı";
accents["jmath"] = "ȷ";
accents["cdot"] = "·";
- accents["lyxmathsym{ß}"] = "ß";
- accents["text{ß}"] = "ß";
+ accents["hairspace"] = getutf8(0xf0000); // select from free unicode plane 15
+ accents["thinspace"] = getutf8(0xf0002); // and used _only_ by findadv
+ accents["negthinspace"] = getutf8(0xf0003); // to omit backslashed latex macros
+ accents["medspace"] = getutf8(0xf0004); // See https://en.wikipedia.org/wiki/Private_Use_Areas
+ accents["negmedspace"] = getutf8(0xf0005);
+ accents["thickspace"] = getutf8(0xf0006);
+ accents["negthickspace"] = getutf8(0xf0007);
+ accents["lyx"] = getutf8(0xf0010); // Used logos
+ accents["LyX"] = getutf8(0xf0010);
+ accents["tex"] = getutf8(0xf0011);
+ accents["TeX"] = getutf8(0xf0011);
+ accents["latex"] = getutf8(0xf0012);
+ accents["LaTeX"] = getutf8(0xf0012);
+ accents["latexe"] = getutf8(0xf0013);
+ accents["LaTeXe"] = getutf8(0xf0013);
+ accents["lyxarrow"] = getutf8(0xf0020);
+ accents["backslash lyx"] = getutf8(0xf0010); // Used logos inserted with starting \backslash
+ accents["backslash LyX"] = getutf8(0xf0010);
+ accents["backslash tex"] = getutf8(0xf0011);
+ accents["backslash TeX"] = getutf8(0xf0011);
+ accents["backslash latex"] = getutf8(0xf0012);
+ accents["backslash LaTeX"] = getutf8(0xf0012);
+ accents["backslash latexe"] = getutf8(0xf0013);
+ accents["backslash LaTeXe"] = getutf8(0xf0013);
+ accents["backslash lyxarrow"] = getutf8(0xf0020);
accents["ddot{\\imath}"] = "ï";
buildaccent("ddot", "aAeEhHiIioOtuUwWxXyY",
"äÄëËḧḦïÏïöÖẗüÜẅẄẍẌÿŸ"); // umlaut
buildaccent("ogonek|k", "AaEeIiUuOo",
"ĄąĘęĮįŲųǪǫ"); // ogonek
buildaccent("cedilla|c", "CcGgKkLlNnRrSsTtEeDdHh",
- "Ã\87çĢĢĶķĻļŅņŖŗŞşŢţȨȩḐḑḨḩ"); // cedilla
+ "Ã\87çĢģĶķĻļŅņŖŗŞşŢţȨȩḐḑḨḩ"); // cedilla
buildaccent("subring|textsubring", "Aa",
"Ḁḁ"); // subring
buildaccent("subhat|textsubcircum", "DdEeLlNnTtUu",
{
if (accents.empty())
buildAccentsMap();
- static regex const accre("\\\\(([\\S]|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}|(i|imath|jmath|cdot)(?![a-zA-Z]))");
+ static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|"
+ "cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}"
+ "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow)))(?![a-zA-Z]))");
smatch sub;
for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
sub = *itacc;
}
// Remove possibly following space too
if (par[pos+sub.str(0).size()] == ' ')
- addIntervall(pos+val.size(), pos + sub.str(0).size()+1);
+ addIntervall(pos+val.size(), pos + sub.str(0).size()+1);
else
- addIntervall(pos+val.size(), pos + sub.str(0).size());
+ addIntervall(pos+val.size(), pos + sub.str(0).size());
for (size_t i = pos+val.size(); i < pos + sub.str(0).size(); i++) {
// remove traces of any remaining chars
par[i] = ' ';
public:
LatexInfo(string const & par, bool isPatternString)
- : entidx_(-1), interval_(isPatternString, par)
+ : entidx_(-1), interval_(isPatternString, par)
{
buildKeys(isPatternString);
entries_ = vector<KeyInfo>();
return -1;
}
if (entries_[0].keytype == KeyInfo::isTitle) {
+ interval_.hasTitle = true;
if (! entries_[0].disabled) {
- interval_.hasTitle = true;
interval_.titleValue = entries_[0].head;
}
else {
- interval_.hasTitle = false;
interval_.titleValue = "";
}
removeHead(entries_[0]);
bool math_end_waiting = false;
size_t math_pos = 10000;
string math_end;
+ static vector<string> usedText = vector<string>();
interval_.removeAccents();
}
// Ignore language if there is math somewhere in pattern-string
if (isPatternString) {
+ for (auto s: usedText) {
+ // Remove entries created in previous search runs
+ keys.erase(s);
+ }
+ usedText = vector<string>();
if (! mi.empty()) {
// Disable language
keys["foreignlanguage"].disabled = true;
}
}
};
+ if (keys.find(key) != keys.end()) {
+ if (keys[key].keytype == KeyInfo::headRemove) {
+ KeyInfo found1 = keys[key];
+ found1.disabled = true;
+ found1.head = "\\" + key + "{";
+ found1._tokenstart = sub.position(size_t(0));
+ found1._tokensize = found1.head.length();
+ found1._dataStart = found1._tokenstart + found1.head.length();
+ int endpos = interval_.findclosing(found1._dataStart, interval_.par.length(), '{', '}', 1);
+ found1._dataEnd = endpos;
+ removeHead(found1);
+ continue;
+ }
+ }
if (evaluatingRegexp) {
if (sub.str(1).compare("endregexp") == 0) {
evaluatingRegexp = false;
}
if (keys.find(key) == keys.end()) {
found = KeyInfo(KeyInfo::isStandard, 0, true);
+ LYXERR(Debug::INFO, "Undefined key " << key << " ==> will be used as text");
+ found = KeyInfo(KeyInfo::isText, 0, false);
if (isPatternString) {
found.keytype = KeyInfo::isChar;
found.disabled = false;
found.used = true;
}
keys[key] = found;
+ usedText.push_back(key);
}
else
found = keys[key];
optionalEnd = optend;
}
string token = sub.str(5);
- int closings = found.parenthesiscount;
+ int closings;
+ if (interval_.par[optend] != '{') {
+ closings = 0;
+ found.parenthesiscount = 0;
+ found.head = "\\" + key;
+ }
+ else
+ closings = found.parenthesiscount;
if (found.parenthesiscount == 1) {
found.head = "\\" + key + "{";
}
found._tokensize = found.head.length();
found._dataStart = found._tokenstart + found.head.length();
if (found.keytype == KeyInfo::doRemove) {
- int endpar = 2 + interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings);
- found._dataStart = endpar;
- found._tokensize = found._dataStart - found._tokenstart;
+ if (closings > 0) {
+ size_t endpar = 2 + interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings);
+ if (endpar >= interval_.par.length())
+ found._dataStart = interval_.par.length();
+ else
+ found._dataStart = endpar;
+ found._tokensize = found._dataStart - found._tokenstart;
+ }
+ else {
+ found._dataStart = found._tokenstart + found._tokensize;
+ }
closings = 0;
}
if (interval_.par.substr(found._dataStart-1, 15).compare("\\endarguments{}") == 0) {
// Disable this key, treate it as standard
found.keytype = KeyInfo::isStandard;
found.disabled = true;
- if ((codeEnd == interval_.par.length()) &&
+ if ((codeEnd +1 >= interval_.par.length()) &&
(found._tokenstart == codeStart)) {
// trickery, because the code inset starts
// with \selectlanguage ...
static bool keysBuilt = false;
if (keysBuilt && !isPatternString) return;
+ // Keys to ignore in any case
+ makeKey("text|textcyrillic|lyxmathsym", KeyInfo(KeyInfo::headRemove, 1, true), true);
// Known standard keys with 1 parameter.
// Split is done, if not at start of region
makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString);
makeKey("textasciicircum|textasciitilde", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("textasciiacute|texemdash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("dots|ldots", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ makeKey("guillemotright|guillemotleft", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
// Spaces
makeKey("quad|qquad|hfill|dotfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("textvisiblespace|nobreakspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("negthickspace|negmedspace|negthinspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ makeKey("thickspace|medspace|thinspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
// Skip
// makeKey("enskip|smallskip|medskip|bigskip|vfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
// Custom space/skip, remove the content (== length value)
makeKey("footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge", KeyInfo(KeyInfo::isSize, 0, false), isPatternString);
// Survives, like known character
- makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ // makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ makeKey("tableofcontents", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("item|listitem", KeyInfo(KeyInfo::isList, 1, false), isPatternString);
makeKey("begin|end", KeyInfo(KeyInfo::isMath, 1, false), isPatternString);
interval_.closes[0] = -1;
break;
}
+ case KeyInfo::isText:
+ interval_.par[actual._tokenstart] = '#';
+ //interval_.addIntervall(actual._tokenstart, actual._tokenstart+1);
+ nextKeyIdx = getNextKey();
+ break;
case KeyInfo::noContent: { /* char like "\hspace{2cm}" */
if (actual.disabled)
interval_.addIntervall(actual._tokenstart, actual._dataEnd);
}
case KeyInfo::isSize: {
if (actual.disabled || (interval_.par[actual._dataStart] != '{') || (interval_.par[actual._dataStart-1] == ' ')) {
- processRegion(actual._dataEnd, actual._dataEnd+1); /* remove possibly following {} */
- interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
+ if (actual.parenthesiscount == 0)
+ interval_.addIntervall(actual._tokenstart, actual._dataEnd);
+ else {
+ interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
+ }
nextKeyIdx = getNextKey();
} else {
// Here _dataStart points to '{', so correct it
}
break;
}
- case KeyInfo::endArguments:
+ case KeyInfo::endArguments: {
// Remove trailing '{}' too
actual._dataStart += 1;
actual._dataEnd += 1;
interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
nextKeyIdx = getNextKey();
break;
+ }
case KeyInfo::noMain:
// fall through
case KeyInfo::isStandard: {
break;
}
case KeyInfo::invalid:
- // This cannot happen, already handled
+ case KeyInfo::headRemove:
+ // These two cases cannot happen, already handled
// fall through
default: {
// LYXERR(Debug::INFO, "Unhandled keytype");
int output_end;
if (actual._dataEnd < end)
output_end = interval_.nextNotIgnored(actual._dataEnd);
- else
+ else if (interval_.par.size() > (size_t) end)
output_end = interval_.nextNotIgnored(end);
+ else
+ output_end = interval_.par.size();
if ((actual.keytype == KeyInfo::isMain) && actual.disabled) {
interval_.addIntervall(actual._tokenstart, actual._tokenstart+actual._tokensize);
}
LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'");
LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
}
+ LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'");
par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat);
// Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them.
LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
lng -= 2;
open_braces++;
}
- else
+ else
break;
-}
+ }
if (lng < par_as_string.size())
par_as_string = par_as_string.substr(0,lng);
/*
// TODO: Adapt '\[12345678]' in par_as_string to acount for the first '()
// Unfortunately is '\1', '\2', etc not working for strings with extra format
// so the convert has no effect in that case
- for (int i = 8; i > 0; --i) {
+ for (int i = 7; i > 0; --i) {
string orig = "\\\\" + std::to_string(i);
- string dest = "\\" + std::to_string(i+1);
+ string dest = "\\" + std::to_string(i+2);
while (regex_replace(par_as_string, par_as_string, orig, dest));
}
- regexp_str = "(" + lead_as_regexp + ")" + par_as_string;
- regexp2_str = "(" + lead_as_regexp + ").*?" + par_as_string;
+ regexp_str = "(" + lead_as_regexp + ")()" + par_as_string;
+ regexp2_str = "(" + lead_as_regexp + ")(.*?)" + par_as_string;
}
LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'");
- regexp = regex(regexp_str);
-
LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'");
- regexp2 = regex(regexp2_str);
+#if QTSEARCH
+ // Handle \w properly
+ QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption;
+ if (! opt.casesensitive) {
+ popts |= QRegularExpression::CaseInsensitiveOption;
+ }
+ regexp = QRegularExpression(QString::fromStdString(regexp_str), popts);
+ regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts);
+ regexError = "";
+ if (regexp.isValid() && regexp2.isValid()) {
+ regexIsValid = true;
+ // Check '{', '}' pairs inside the regex
+ int balanced = 0;
+ int skip = 1;
+ for (unsigned i = 0; i < par_as_string.size(); i+= skip) {
+ char c = par_as_string[i];
+ if (c == '\\') {
+ skip = 2;
+ continue;
+ }
+ if (c == '{')
+ balanced++;
+ else if (c == '}') {
+ balanced--;
+ if (balanced < 0)
+ break;
+ }
+ skip = 1;
+ }
+ if (balanced != 0) {
+ regexIsValid = false;
+ regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\"";
+ }
+ }
+ else {
+ regexIsValid = false;
+ if (!regexp.isValid())
+ regexError += "Invalid regexp \"" + regexp_str + "\", error = " + regexp.errorString().toStdString();
+ if (!regexp2.isValid())
+ regexError += "Invalid regexp2 \"" + regexp2_str + "\", error = " + regexp2.errorString().toStdString();
+ }
+#else
+ if (opt.casesensitive) {
+ regexp = regex(regexp_str);
+ regexp2 = regex(regexp2_str);
+ }
+ else {
+ regexp = regex(regexp_str, std::regex_constants::icase);
+ regexp2 = regex(regexp2_str, std::regex_constants::icase);
+ }
+#endif
}
}
-
+#if 0
// Count number of characters in string
// {]} ==> 1
// \& ==> 1
// --- ==> 1
// \\[a-zA-Z]+ ==> 1
+#if QTSEARCH
+static int computeSize(QStringRef s, int len)
+#define isLyxAlpha(arg) arg.isLetter()
+#else
static int computeSize(string s, int len)
+#define isLyxAlpha(arg) isalpha(arg)
+#endif
{
if (len == 0)
return 0;
int skip = 1;
int count = 0;
for (int i = 0; i < len; i += skip, count++) {
- if (s[i] == '\\') {
+ if (s.at(i) == '\\') {
skip = 2;
- if (isalpha(s[i+1])) {
+ if (i + 1 < len && isLyxAlpha(s.at(i+1))) {
for (int j = 2; i+j < len; j++) {
- if (! isalpha(s[i+j])) {
- if (s[i+j] == ' ')
- skip++;
- else if ((s[i+j] == '{') && s[i+j+1] == '}')
- skip += 2;
- else if ((s[i+j] == '{') && (i + j + 1 >= len))
+ if (! isLyxAlpha(s.at(i+j))) {
+ if (s.at(i+j) == ' ')
skip++;
+ else if (s.at(i+j) == '{') {
+ if (i+j+1 < len && s.at(i+j+1) == '}')
+ skip += 2;
+ else if (i + j + 1 >= len)
+ skip++;
+ }
break;
}
skip++;
}
}
}
- else if (s[i] == '{') {
- if (s[i+1] == '}')
+ else if (s.at(i) == '{') {
+ if (i + 1 < len && s.at(i+1) == '}')
skip = 2;
else
skip = 3;
}
- else if (s[i] == '-') {
- if (s[i+1] == '-') {
- if (s[i+2] == '-')
+ else if (s.at(i) == '-') {
+ if (i+1 < len && s.at(i+1) == '-') {
+ if (i + 2 < len && s.at(i+2) == '-')
skip = 3;
else
skip = 2;
}
return count;
}
+#endif
MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const
{
return mres;
docstring docstr = stringifyFromForSearch(opt, cur, len);
- string str = normalize(docstr, true);
+ string str;
+ if (use_regexp || opt.casesensitive)
+ str = normalize(docstr, true);
+ else
+ str = normalize(lowercase(docstr), true);
if (!opt.ignoreformat) {
str = correctlanguagesetting(str, false, !opt.ignoreformat);
}
if (use_regexp) {
LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin);
+#if QTSEARCH
+ QString qstr = QString::fromStdString(str);
+ QRegularExpression const *p_regexp;
+ QRegularExpression::MatchType flags = QRegularExpression::NormalMatch;
+ if (at_begin) {
+ p_regexp = ®exp;
+ } else {
+ p_regexp = ®exp2;
+ }
+ QRegularExpressionMatch match = p_regexp->match(qstr, 0, flags);
+ if (!match.hasMatch())
+ return mres;
+ // Check braces on segments that matched all (.*?) subexpressions,
+ // except the last "padding" one inserted by lyx.
+ for (int i = 3; i < match.lastCapturedIndex(); ++i)
+ if (!braces_match(match.captured(i), open_braces))
+ return mres;
+#else
regex const *p_regexp;
regex_constants::match_flag_type flags;
if (at_begin) {
if (re_it == sregex_iterator())
return mres;
match_results<string::const_iterator> const & m = *re_it;
-
- if (0) { // Kornel Benko: DO NOT CHECKK
- // Check braces on the segment that matched the entire regexp expression,
- // plus the last subexpression, if a (.*?) was inserted in the constructor.
- if (!braces_match(m[0].first, m[0].second, open_braces))
- return mres;
- }
-
// Check braces on segments that matched all (.*?) subexpressions,
// except the last "padding" one inserted by lyx.
- for (size_t i = 1; i < m.size() - 1; ++i)
- if (!braces_match(m[i].first, m[i].second, open_braces))
+ for (size_t i = 3; i < m.size() - 1; ++i)
+ if (!braces_match(m[i], open_braces))
return mres;
-
+#endif
// Exclude from the returned match length any length
// due to close wildcards added at end of regexp
// and also the length of the leading (e.g. '\emph{}')
// Whole found string, including the leading: m[0].second - m[0].first
// Size of the leading string: m[1].second - m[1].first
int leadingsize = 0;
- if (m.size() > 1)
- leadingsize = m[1].second - m[1].first;
int result;
+#if QTSEARCH
+ if (match.lastCapturedIndex() > 0) {
+ leadingsize = match.capturedEnd(1) - match.capturedStart(1);
+ }
+ int lastidx = match.lastCapturedIndex();
+ for (int i = 0; i <= lastidx; i++) {
+ LYXERR(Debug::FIND, "Match " << i << " is " << match.capturedEnd(i) - match.capturedStart(i) << " long");
+ }
+ if (close_wildcards == 0)
+ result = match.capturedEnd(0) - match.capturedStart(0);
+ else
+ result = match.capturedStart(lastidx + 1 - close_wildcards) - match.capturedStart(0);
+
+#else
+ if (m.size() > 2) {
+ leadingsize = m[1].second - m[1].first;
+ }
for (size_t i = 0; i < m.size(); i++) {
LYXERR(Debug::FIND, "Match " << i << " is " << m[i].second - m[i].first << " long");
}
if (close_wildcards == 0)
result = m[0].second - m[0].first;
-
else
result = m[m.size() - close_wildcards].first - m[0].first;
-
- size_t pos = m.position(size_t(0));
- // Ignore last closing characters
- while (result > 0) {
- if (str[pos+result-1] == '}')
- --result;
- else
- break;
- }
+#endif
if (result > leadingsize)
result -= leadingsize;
else
result = 0;
- mres.match_len = computeSize(str.substr(pos+leadingsize,result), result);
- mres.match2end = str.size() - pos - leadingsize;
- mres.pos = pos+leadingsize;
+#if QTSEARCH
+ mres.match_prefix = match.capturedEnd(2) - match.capturedStart(2);
+ mres.match_len = match.capturedEnd(0) - match.capturedEnd(2);
+ // because of different number of closing at end of string
+ // we have to 'unify' the length of the post-match.
+ // Done by ignoring closing parenthesis and linefeeds at string end
+ int matchend = match.capturedEnd(0);
+ while (mres.match_len > 0) {
+ QChar c = qstr.at(matchend - 1);
+ if ((c == '\n') || (c == '}') || (c == '{')) {
+ mres.match_len--;
+ matchend--;
+ }
+ else
+ break;
+ }
+ size_t strsize = qstr.size();
+ while (strsize > (size_t) match.capturedEnd(0)) {
+ QChar c = qstr.at(strsize-1);
+ if ((c == '\n') || (c == '}')) {
+ --strsize;
+ }
+ else
+ break;
+ }
+ // LYXERR0(qstr.toStdString());
+ mres.match2end = strsize - matchend;
+ mres.pos = match.capturedStart(2);
+#else
+ mres.match_prefix = m[2].second - m[2].first;
+ mres.match_len = m[0].second - m[2].second;
+ // ignore closing parenthesis and linefeeds at string end
+ size_t strend = m[0].second - m[0].first;
+ int matchend = strend;
+ while (mres.match_len > 0) {
+ char c = str.at(matchend - 1);
+ if ((c == '\n') || (c == '}') || (c == '{')) {
+ mres.match_len--;
+ matchend--;
+ }
+ else
+ break;
+ }
+ size_t strsize = str.size();
+ while (strsize > strend) {
+ if ((str.at(strsize-1) == '}') || (str.at(strsize-1) == '\n')) {
+ --strsize;
+ }
+ else
+ break;
+ }
+ // LYXERR0(str);
+ mres.match2end = strsize - matchend;
+ mres.pos = m[2].first - m[0].first;;
+#endif
+ if (mres.match2end < 0)
+ mres.match_len = 0;
+ mres.leadsize = leadingsize;
return mres;
}
string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const
{
string t;
- if (! opt.casesensitive)
- t = lyx::to_utf8(lowercase(s));
- else
- t = lyx::to_utf8(s);
+ t = lyx::to_utf8(s);
// Remove \n at begin
while (!t.empty() && t[0] == '\n')
t = t.substr(1);
LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
-
while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", ""));
// FIXME - check what preceeds the brace
if (hack_braces) {
// OutputParams runparams(&cur.buffer()->params().encoding());
OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = true;
- runparams.flavor = FLAVOR::XETEX;
+ runparams.flavor = Flavor::XeTeX;
runparams.linelen = 10000; //lyxrc.plaintext_linelen;
// No side effect of file copying and image conversion
runparams.dryrun = true;
//OutputParams runparams(&buf.params().encoding());
OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = false;
- runparams.flavor = FLAVOR::XETEX;
+ runparams.flavor = Flavor::XeTeX;
runparams.linelen = 8000; //lyxrc.plaintext_linelen;
// No side effect of file copying and image conversion
runparams.dryrun = true;
for (int s = cur.depth() - 1; s >= 0; --s) {
CursorSlice const & cs = cur[s];
if (cs.asInsetMath() && cs.asInsetMath()->asHullInset()) {
- WriteStream ws(os);
+ TeXMathStream ws(os);
cs.asInsetMath()->asHullInset()->header_write(ws);
break;
}
CursorSlice const & cs2 = cur[s];
InsetMath * inset = cs2.asInsetMath();
if (inset && inset->asHullInset()) {
- WriteStream ws(os);
+ TeXMathStream ws(os);
inset->asHullInset()->footer_write(ws);
break;
}
return ods.str();
}
+#if 0
+// Debugging output
+static void displayMResult(MatchResult &mres, int increment)
+{
+ LYXERR0( "pos: " << mres.pos << " increment " << increment);
+ LYXERR0( "leadsize: " << mres.leadsize);
+ LYXERR0( "match_len: " << mres.match_len);
+ LYXERR0( "match_prefix: " << mres.match_prefix);
+ LYXERR0( "match2end: " << mres.match2end);
+}
+ #define displayMres(s,i) displayMResult(s,i);
+#else
+ #define displayMres(s,i)
+#endif
-/** Finalize an advanced find operation, advancing the cursor to the innermost
- ** position that matches, plus computing the length of the matching text to
- ** be selected
- **/
-int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match)
+static bool findAdvForwardInnermost(DocIterator & cur)
{
- // Search the foremost position that matches (avoids find of entire math
- // inset when match at start of it)
size_t d;
DocIterator old_cur(cur.buffer());
+ int forwardCount = 0;
do {
- LYXERR(Debug::FIND, "Forwarding one step (searching for innermost match)");
d = cur.depth();
old_cur = cur;
cur.forwardPos();
- } while (cur && cur.depth() > d && match(cur).match_len > 0);
+ if (!cur) {
+ break;
+ }
+ if (cur.depth() > d) {
+ forwardCount++;
+ continue;
+ }
+ if (cur.depth() == d)
+ break;
+ } while(1);
cur = old_cur;
- int max_match = match(cur).match_len; /* match valid only if not searching whole words */
+ if (forwardCount > 0) {
+ LYXERR(Debug::FIND, "Forwarded " << forwardCount << " step(s) (searching for innermost match)");
+ return true;;
+ }
+ else
+ return false;
+}
+
+/** Finalize an advanced find operation, advancing the cursor to the innermost
+ ** position that matches, plus computing the length of the matching text to
+ ** be selected
+ **/
+int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, int expected_len)
+{
+ // Search the foremost position that matches (avoids find of entire math
+ // inset when match at start of it)
+ DocIterator old_cur(cur.buffer());
+ MatchResult mres;
+ int max_match;
+ if (findAdvForwardInnermost(cur)) {
+ mres = match(cur);
+ displayMres(mres, 0);
+ if (expected_len > 0) {
+ if (mres.match_len < expected_len)
+ return 0;
+ }
+ else {
+ if (mres.match_len <= 0)
+ return 0;
+ }
+ max_match = mres.match_len;
+ }
+ else if (expected_len < 0) {
+ mres = match(cur); /* match valid only if not searching whole words */
+ displayMres(mres, 0);
+ max_match = mres.match_len;
+ }
+ else {
+ max_match = expected_len;
+ }
if (max_match <= 0) return 0;
LYXERR(Debug::FIND, "Ok");
int len = 1;
if (cur.pos() + len > cur.lastpos())
return 0;
- if (match.opt.matchword) {
+ // regexp should use \w+, \S+, or \b(some string)\b
+ // to search for whole words
+ if (match.opt.matchword && !match.use_regexp) {
LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) {
++len;
if (old_match == 0)
len = 0;
}
- else {
- int minl = 1;
- int maxl = cur.lastpos() - cur.pos();
- // Greedy behaviour while matching regexps
- while (maxl > minl) {
- int actual_match = match(cur, len).match_len;
- if (actual_match >= max_match) {
- // actual_match > max_match _can_ happen,
- // if the search area splits
- // some following word so that the regex
- // (e.g. 'r.*r\b' matches 'r' from the middle of the
- // splitted word)
- // This means, the len value is too big
- maxl = len;
- len = (int)((maxl + minl)/2);
- }
- else {
- // (actual_match < max_match)
- minl = len + 1;
- len = (int)((maxl + minl)/2);
- }
- }
+ else {
+ int minl = 1;
+ int maxl = cur.lastpos() - cur.pos();
+ // Greedy behaviour while matching regexps
+ while (maxl > minl) {
+ MatchResult mres2;
+ mres2 = match(cur, len);
+ displayMres(mres2, len);
+ int actual_match = mres2.match_len;
+ if (actual_match >= max_match) {
+ // actual_match > max_match _can_ happen,
+ // if the search area splits
+ // some following word so that the regex
+ // (e.g. 'r.*r\b' matches 'r' from the middle of the
+ // splitted word)
+ // This means, the len value is too big
+ maxl = len;
+ if (maxl - minl < 4)
+ len = (int)((maxl + minl)/2);
+ else
+ len = (int)(minl + (maxl - minl + 3)/4);
+ }
+ else {
+ // (actual_match < max_match)
+ minl = len + 1;
+ len = (int)((maxl + minl)/2);
+ }
+ }
old_cur = cur;
// Search for real start of matched characters
while (len > 1) {
return len;
}
-
/// Finds forward
int findForwardAdv(DocIterator & cur, MatchStringAdv const & match)
{
if (!cur)
return 0;
while (!theApp()->longOperationCancelled() && cur) {
+ (void) findAdvForwardInnermost(cur);
LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
MatchResult mres = match(cur, -1, false);
+ displayMres(mres,-1)
int match_len = mres.match_len;
- LYXERR(Debug::FIND, "match_len: " << match_len);
if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) {
LYXERR(Debug::INFO, "BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end);
match_len = 0;
}
if (match_len > 0) {
// Try to find the begin of searched string
- int increment = mres.pos/2;
- while (mres.pos > 5 && (increment > 5)) {
+ int increment;
+ int firstInvalid = 100000;
+ if (mres.match_prefix + mres.pos - mres.leadsize > 1)
+ increment = (mres.match_prefix + mres.pos - mres.leadsize + 1)*3/4;
+ else
+ increment = 10;
+ LYXERR(Debug::FIND, "Set increment to " << increment);
+ while (increment > 0) {
DocIterator old_cur = cur;
for (int i = 0; i < increment && cur; cur.forwardPos(), i++) {
}
}
else {
MatchResult mres2 = match(cur, -1, false);
- if ((mres2.match2end < mres.match2end) ||
- (mres2.match_len < mres.match_len)) {
- cur = old_cur;
- increment /= 2;
- }
- else {
- mres = mres2;
- increment -= 2;
- if (increment > mres.pos/2)
- increment = mres.pos/2;
+ displayMres(mres2,increment)
+ switch (interpretMatch(mres, mres2)) {
+ case MatchResult::newIsTooFar:
+ // behind the expected match
+ firstInvalid = increment;
+ cur = old_cur;
+ increment /= 2;
+ break;
+ case MatchResult::newIsBetter:
+ // not reached ye, but cur.pos()+increment is bettert
+ mres = mres2;
+ firstInvalid -= increment;
+ if (increment > firstInvalid*3/4)
+ increment = firstInvalid*3/4;
+ if ((mres2.pos == mres2.leadsize) && (increment >= mres2.match_prefix)) {
+ if (increment >= mres2.match_prefix)
+ increment = (mres2.match_prefix+1)*3/4;
+ }
+ break;
+ default:
+ // Todo@
+ // Handle not like MatchResult::newIsTooFar
+ // LYXERR0( "Something is wrong: Increment = " << increment << " match_prefix = " << mres.match_prefix);
+ firstInvalid--;
+ increment = increment*3/4;
+ cur = old_cur;
+ break;
}
}
}
+ // LYXERR0("Leaving first loop");
+ {
+ LYXERR(Debug::FIND, "Finalizing 1");
+ int len = findAdvFinalize(cur, match, mres.match_len);
+ if (len > 0)
+ return len;
+ else {
+ // try next possible match
+ cur.forwardPos();
+ continue;
+ }
+ }
+ // The following code is newer reached
+ // but parts of it may be needed in future
int match_len_zero_count = 0;
+ MatchResult mres3;
for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) {
- if (i++ > 10) {
- int remaining_len = match(cur, -1, false).match_len;
+ if (i++ > 3) {
+ mres3 = match(cur, -1, false);
+ displayMres(mres3, 1)
+ int remaining_len = mres3.match_len;
if (remaining_len <= 0) {
// Apparently the searched string is not in the remaining part
break;
}
}
LYXERR(Debug::FIND, "Advancing cur: " << cur);
- int match_len3 = match(cur, 1).match_len;
+ mres3 = match(cur, 1);
+ displayMres(mres3, 1)
+ int match_len3 = mres3.match_len;
if (match_len3 < 0)
continue;
- int match_len2 = match(cur).match_len;
+ mres3 = match(cur);
+ displayMres(mres3, 1)
+ int match_len2 = mres3.match_len;
LYXERR(Debug::FIND, "match_len2: " << match_len2);
if (match_len2 > 0) {
// Sometimes in finalize we understand it wasn't a match
// and we need to continue the outest loop
- int len = findAdvFinalize(cur, match);
+ LYXERR(Debug::FIND, "Finalizing 2");
+ int len = findAdvFinalize(cur, match, mres.match_len);
if (len > 0) {
return len;
}
}
- if (match_len2 >= 0) {
- if (match_len2 == 0)
- match_len_zero_count++;
- else
- match_len_zero_count = 0;
- }
- else {
+ if (match_len2 > 0)
+ match_len_zero_count = 0;
+ else if (match_len2 == 0)
+ match_len_zero_count++;
+ if (match_len2 < 0) {
if (++match_len_zero_count > 3) {
LYXERR(Debug::FIND, "match_len2_zero_count: " << match_len_zero_count << ", match_len was " << match_len);
}
{
DocIterator cur_begin = doc_iterator_begin(cur.buffer());
DocIterator tmp_cur = cur;
- int len = findAdvFinalize(tmp_cur, match);
+ int len = findAdvFinalize(tmp_cur, match, -1);
Inset & inset = cur.inset();
for (; cur != cur_begin; cur.backwardPos()) {
LYXERR(Debug::FIND, "findMostBackwards(): cur=" << cur);
new_cur.backwardPos();
if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len)
break;
- int new_len = findAdvFinalize(new_cur, match);
+ int new_len = findAdvFinalize(new_cur, match, -1);
if (new_len == len)
break;
len = new_len;
// OutputParams runparams(&repl_buffer.params().encoding());
OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = false;
- runparams.flavor = FLAVOR::XETEX;
+ runparams.flavor = Flavor::XeTeX;
runparams.linelen = 8000; //lyxrc.plaintext_linelen;
runparams.dryrun = true;
TeXOnePar(repl_buffer, repl_buffer.text(), 0, os, runparams);
try {
MatchStringAdv matchAdv(bv->buffer(), opt);
+#if QTSEARCH
+ if (!matchAdv.regexIsValid) {
+ bv->message(lyx::from_utf8(matchAdv.regexError));
+ return(false);
+ }
+#endif
int length = bv->cursor().selectionEnd().pos() - bv->cursor().selectionBegin().pos();
if (length > 0)
bv->putSelectionAt(bv->cursor().selectionBegin(), length, !opt.forward);
match_len = findForwardAdv(cur, matchAdv);
else
match_len = findBackwardsAdv(cur, matchAdv);
- } catch (...) {
- // This may only be raised by lyx::regex()
- bv->message(_("Invalid regular expression!"));
+ } catch (exception & ex) {
+ bv->message(from_ascii(ex.what()));
return false;
}