* \author Jürgen Vigna
* \author Alfredo Braunstein
* \author Tommaso Cucinotta
+ * \author Kornel Benko
*
* Full author contact details are available in file CREDITS.
*/
#include "lyxfind.h"
#include "Buffer.h"
-#include "buffer_funcs.h"
#include "BufferList.h"
#include "BufferParams.h"
#include "BufferView.h"
#include "output_latex.h"
#include "OutputParams.h"
#include "Paragraph.h"
-#include "ParIterator.h"
-#include "TexRow.h"
#include "Text.h"
#include "Encoding.h"
#include "frontends/alert.h"
#include "mathed/InsetMath.h"
-#include "mathed/InsetMathGrid.h"
#include "mathed/InsetMathHull.h"
#include "mathed/MathData.h"
#include "mathed/MathStream.h"
#include "mathed/MathSupport.h"
-#include "support/convert.h"
#include "support/debug.h"
#include "support/docstream.h"
#include "support/FileName.h"
#include "support/gettext.h"
#include "support/lassert.h"
#include "support/lstrings.h"
-
-#include "support/regex.h"
#include "support/textutils.h"
+
#include <map>
+#include <regex>
+#define USE_QT_FOR_SEARCH
+#if defined(USE_QT_FOR_SEARCH)
+ #include <QtCore> // sets QT_VERSION
+ #if (QT_VERSION >= 0x050000)
+ #include <QRegularExpression>
+ #define QTSEARCH 1
+ #else
+ #define QTSEARCH 0
+ #endif
+#else
+ #define QTSEARCH 0
+#endif
using namespace std;
using namespace lyx::support;
class IgnoreFormats {
public:
///
- IgnoreFormats()
- : ignoreFamily_(false), ignoreSeries_(false),
- ignoreShape_(false), ignoreUnderline_(false),
- ignoreMarkUp_(false), ignoreStrikeOut_(false),
- ignoreSectioning_(false), ignoreFrontMatter_(false),
- ignoreColor_(false), ignoreLanguage_(false) {}
+ IgnoreFormats() = default;
///
- bool getFamily() { return ignoreFamily_; };
+ bool getFamily() const { return ignoreFamily_; }
///
- bool getSeries() { return ignoreSeries_; };
+ bool getSeries() const { return ignoreSeries_; }
///
- bool getShape() { return ignoreShape_; };
+ bool getShape() const { return ignoreShape_; }
///
- bool getUnderline() { return ignoreUnderline_; };
+ bool getUnderline() const { return ignoreUnderline_; }
///
- bool getMarkUp() { return ignoreMarkUp_; };
+ bool getMarkUp() const { return ignoreMarkUp_; }
///
- bool getStrikeOut() { return ignoreStrikeOut_; };
+ bool getStrikeOut() const { return ignoreStrikeOut_; }
///
- bool getSectioning() { return ignoreSectioning_; };
+ bool getSectioning() const { return ignoreSectioning_; }
///
- bool getFrontMatter() { return ignoreFrontMatter_; };
+ bool getFrontMatter() const { return ignoreFrontMatter_; }
///
- bool getColor() { return ignoreColor_; };
+ bool getColor() const { return ignoreColor_; }
///
- bool getLanguage() { return ignoreLanguage_; };
+ bool getLanguage() const { return ignoreLanguage_; }
///
void setIgnoreFormat(string const & type, bool value);
private:
///
- bool ignoreFamily_;
+ bool ignoreFamily_ = false;
///
- bool ignoreSeries_;
+ bool ignoreSeries_ = false;
///
- bool ignoreShape_;
+ bool ignoreShape_ = false;
///
- bool ignoreUnderline_;
+ bool ignoreUnderline_ = false;
///
- bool ignoreMarkUp_;
+ bool ignoreMarkUp_ = false;
///
- bool ignoreStrikeOut_;
+ bool ignoreStrikeOut_ = false;
///
- bool ignoreSectioning_;
+ bool ignoreSectioning_ = false;
///
- bool ignoreFrontMatter_;
+ bool ignoreFrontMatter_ = false;
///
- bool ignoreColor_;
+ bool ignoreColor_ = false;
///
- bool ignoreLanguage_;
+ bool ignoreLanguage_ = false;
};
bool whole, bool forward, bool findnext)
{
Cursor & cur = bv->cursor();
- bool found = false;
if (!cur.selection()) {
// no selection, non-empty search string: find it
if (!searchstr.empty()) {
- found = findOne(bv, searchstr, case_sens, whole, forward, true, findnext);
+ bool const found = findOne(bv, searchstr, case_sens, whole, forward, true, findnext);
return make_pair(found, 0);
}
// empty search string
// no selection or current selection is not search word:
// just find the search word
if (!have_selection || !match) {
- found = findOne(bv, searchstr, case_sens, whole, forward, true, findnext);
+ bool const found = findOne(bv, searchstr, case_sens, whole, forward, true, findnext);
return make_pair(found, 0);
}
static Escapes escape_map;
if (escape_map.empty()) {
escape_map.push_back(P("\\%", "%"));
+ escape_map.push_back(P("\\{", "{"));
+ escape_map.push_back(P("\\}", "}"));
escape_map.push_back(P("\\mathcircumflex ", "^"));
escape_map.push_back(P("\\mathcircumflex", "^"));
escape_map.push_back(P("\\backslash ", "\\"));
escape_map.push_back(P("\\backslash", "\\"));
- escape_map.push_back(P("\\\\{", "_x_<"));
- escape_map.push_back(P("\\\\}", "_x_>"));
escape_map.push_back(P("\\sim ", "~"));
escape_map.push_back(P("\\sim", "~"));
}
new_pos = s.size();
string t;
if (new_pos > pos) {
+ // outside regexp
LYXERR(Debug::FIND, "new_pos: " << new_pos);
t = apply_escapes(s.substr(pos, new_pos - pos), get_lyx_unescapes());
LYXERR(Debug::FIND, "t [lyx]: " << t);
t = s.substr(new_pos + 8, end_pos - (new_pos + 8));
LYXERR(Debug::FIND, "t in regexp : " << t);
t = apply_escapes(t, get_lyx_unescapes());
- LYXERR(Debug::FIND, "t in regexp [lyx]: " << t);
+ LYXERR(Debug::FIND, "t in regexp after unescapes [lyx]: " << t);
if (match_latex) {
t = apply_escapes(t, get_regexp_latex_escapes());
- LYXERR(Debug::FIND, "t in regexp [ltx]: " << t);
+ LYXERR(Debug::FIND, "t in regexp after latex_escapes [ltx]: " << t);
}
if (end_pos == s.size()) {
s.replace(new_pos, end_pos - new_pos, t);
bool regex_replace(string const & s, string & t, string const & searchstr,
string const & replacestr)
{
- lyx::regex e(searchstr, regex_constants::ECMAScript);
+ regex e(searchstr, regex_constants::ECMAScript);
ostringstream oss;
ostream_iterator<char, char> it(oss);
- lyx::regex_replace(it, s.begin(), s.end(), e, replacestr);
+ regex_replace(it, s.begin(), s.end(), e, replacestr);
// tolerate t and s be references to the same variable
bool rv = (s != oss.str());
t = oss.str();
** @param unmatched
** Number of open braces that must remain open at the end for the verification to succeed.
**/
-bool braces_match(string::const_iterator const & beg,
- string::const_iterator const & end,
+#if QTSEARCH
+bool braces_match(QString const & beg,
int unmatched = 0)
+#else
+bool braces_match(string const & beg,
+ int unmatched = 0)
+#endif
{
int open_pars = 0;
- string::const_iterator it = beg;
- LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << string(beg, end) << "'");
- for (; it != end; ++it) {
+#if QTSEARCH
+ LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg.toStdString() << "'");
+#else
+ LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << beg << "'");
+#endif
+ int lastidx = beg.size();
+ for (int i=0; i < lastidx; ++i) {
// Skip escaped braces in the count
- if (*it == '\\') {
- ++it;
- if (it == end)
+#if QTSEARCH
+ QChar c = beg.at(i);
+#else
+ char c = beg.at(i);
+#endif
+ if (c == '\\') {
+ ++i;
+ if (i >= lastidx)
break;
- } else if (*it == '{') {
+ } else if (c == '{') {
++open_pars;
- } else if (*it == '}') {
+ } else if (c == '}') {
if (open_pars == 0) {
LYXERR(Debug::FIND, "Found unmatched closed brace");
return false;
class MatchResult {
public:
+ enum range {
+ newIsTooFar,
+ newIsBetter,
+ newIsInvalid
+ };
int match_len;
+ int match_prefix;
int match2end;
int pos;
- MatchResult(): match_len(0),match2end(0), pos(0) {};
+ int leadsize;
+ MatchResult(): match_len(0),match_prefix(0),match2end(0), pos(0),leadsize(0) {};
};
+static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres)
+{
+ if (newres.match2end < oldres.match2end)
+ return MatchResult::newIsTooFar;
+ if (newres.match_len < oldres.match_len)
+ return MatchResult::newIsTooFar;
+ if ((newres.match_len == oldres.match_len) && (newres.match2end == oldres.match2end))
+ return MatchResult::newIsBetter;
+ if ((newres.match_len == oldres.match_len) && (newres.match2end -2 == oldres.match2end)) {
+ // The string contained for instance "\usepackage...fontenc ..."
+ // and now after moved 9 char forward contains "ge...{fontenc} ..."
+ // so we accept it as OK
+ return MatchResult::newIsBetter;
+ }
+ return MatchResult::newIsInvalid;
+}
+
/** The class performing a match between a position in the document and the FindAdvOptions.
**/
+
class MatchStringAdv {
public:
MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt);
** The length of the matching text, or zero if no match was found.
**/
MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const;
+#if QTSEARCH
+ bool regexIsValid;
+ string regexError;
+#endif
public:
/// buffer
// normalized string to search
string par_as_string;
// regular expression to use for searching
- lyx::regex regexp;
- // same as regexp, but prefixed with a ".*?"
- lyx::regex regexp2;
+ // regexp2 is same as regexp, but prefixed with a ".*?"
+#if QTSEARCH
+ QRegularExpression regexp;
+ QRegularExpression regexp2;
+#else
+ regex regexp;
+ regex regexp2;
+#endif
// leading format material as string
string lead_as_string;
// par_as_string after removal of lead_as_string
// number of (.*?) subexpressions added at end of search regexp for closing
// environments, math mode, styles, etc...
int close_wildcards;
+public:
// Are we searching with regular expressions ?
bool use_regexp;
};
odocstringstream ods;
otexstream os(ods);
runparams.nice = true;
- runparams.flavor = OutputParams::XETEX;
+ runparams.flavor = Flavor::XeTeX;
runparams.linelen = 10000; //lyxrc.plaintext_linelen;
// No side effect of file copying and image conversion
runparams.dryrun = true;
// OutputParams runparams(&buffer.params().encoding());
OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = true;
- runparams.flavor = OutputParams::XETEX;
+ runparams.flavor = Flavor::XeTeX;
runparams.linelen = 10000; //lyxrc.plaintext_linelen;
runparams.dryrun = true;
runparams.for_search = true;
AS_STR_INSETS | AS_STR_SKIPDELETE | AS_STR_PLAINTEXT,
&runparams);
}
+ // Even in ignore-format we have to remove "\text{}, \lyxmathsym{}" parts
+ string t = to_utf8(str);
+ while (regex_replace(t, t, "\\\\(text|lyxmathsym)\\{([^\\}]*)\\}", "$2"));
+ str = from_utf8(t);
}
return str;
}
// @TODO Support \item[text]
// Kornel: Added textsl, textsf, textit, texttt and noun
// + allow to search for colored text too
- while (regex_replace(t, t, REGEX_BOS "\\\\(((footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|emph|noun|minisec|text(bf|md|sl|sf|it|tt))|((textcolor|foreignlanguage|latexenvironment)\\{[a-z]+\\*?\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part|chapter)\\*?)\\{", "")
- || regex_replace(t, t, REGEX_BOS "\\$", "")
- || regex_replace(t, t, REGEX_BOS "\\\\\\[", "")
- || regex_replace(t, t, REGEX_BOS " ?\\\\item\\{[a-z]+\\}", "")
- || regex_replace(t, t, REGEX_BOS "\\\\begin\\{[a-zA-Z_]*\\*?\\}", ""))
+ while (regex_replace(t, t, "^\\\\(("
+ "(author|title|subtitle|subject|publishers|dedication|uppertitleback|lowertitleback|extratitle|"
+ "lyxaddress|lyxrightaddress|"
+ "footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|"
+ "emph|noun|minisec|text(bf|md|sl|sf|it|tt))|"
+ "((textcolor|foreignlanguage|latexenvironment)\\{[a-z]+\\*?\\})|"
+ "(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part|chapter)\\*?)\\{", "")
+ || regex_replace(t, t, "^\\$", "")
+ || regex_replace(t, t, "^\\\\\\[", "")
+ || regex_replace(t, t, "^ ?\\\\item\\{[a-z]+\\}", "")
+ || regex_replace(t, t, "^\\\\begin\\{[a-zA-Z_]*\\*?\\}", ""))
;
LYXERR(Debug::FIND, " after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'");
return s.find(t);
noContent,
/* Char, like \backslash */
isChar,
+ /* replace starting backslash with '#' */
+ isText,
/* \part, \section*, ... */
isSectioning,
/* title, author etc */
isIgnored,
/* like \lettrine[lines=5]{}{} */
cleanToStart,
+ // like isStandard, but always remove head
+ headRemove,
/* End of arguments marker for lettrine,
* so that they can be ignored */
endArguments
};
- KeyInfo()
- : keytype(invalid),
- head(""),
- _tokensize(-1),
- _tokenstart(-1),
- _dataStart(-1),
- _dataEnd(-1),
- parenthesiscount(1),
- disabled(false),
- used(false)
- {};
+ KeyInfo() = default;
KeyInfo(KeyType type, int parcount, bool disable)
: keytype(type),
- _tokensize(-1),
- _tokenstart(-1),
- _dataStart(-1),
- _dataEnd(-1),
parenthesiscount(parcount),
- disabled(disable),
- used(false) {};
- KeyType keytype;
+ disabled(disable) {}
+ KeyType keytype = invalid;
string head;
- int _tokensize;
- int _tokenstart;
- int _dataStart;
- int _dataEnd;
- int parenthesiscount;
- bool disabled;
- bool used; /* by pattern */
+ int _tokensize = -1;
+ int _tokenstart = -1;
+ int _dataStart = -1;
+ int _dataEnd = -1;
+ int parenthesiscount = 1;
+ bool disabled = false;
+ bool used = false; /* by pattern */
};
class Border {
int depts[MAXOPENED];
int closes[MAXOPENED];
int actualdeptindex;
- int previousNotIgnored(int);
- int nextNotIgnored(int);
+ int previousNotIgnored(int) const;
+ int nextNotIgnored(int) const;
void handleOpenP(int i);
void handleCloseP(int i, bool closingAllowed);
void resetOpenedP(int openPos);
void addIntervall(int upper);
void addIntervall(int low, int upper); /* if explicit */
void removeAccents();
- void setForDefaultLang(KeyInfo &defLang);
+ void setForDefaultLang(KeyInfo const & defLang) const;
int findclosing(int start, int end, char up, char down, int repeat);
void handleParentheses(int lastpos, bool closingAllowed);
bool hasTitle;
int langcount; // Number of disabled language specs up to current position in actual interval
- int isOpeningPar(int pos);
+ int isOpeningPar(int pos) const;
string titleValue;
void output(ostringstream &os, int lastpos);
// string show(int lastpos);
vector<Border> Intervall::borders = vector<Border>(30);
-int Intervall::isOpeningPar(int pos)
+int Intervall::isOpeningPar(int pos) const
{
if ((pos < 0) || (size_t(pos) >= par.size()))
return 0;
return 1;
}
-void Intervall::setForDefaultLang(KeyInfo &defLang)
+void Intervall::setForDefaultLang(KeyInfo const & defLang) const
{
// Enable the use of first token again
if (ignoreidx >= 0) {
int value = defLang._tokenstart + defLang._tokensize;
+ int borderidx = 0;
+ if (hasTitle) {
+ borderidx = 1;
+ }
if (value > 0) {
- if (borders[0].low < value)
- borders[0].low = value;
- if (borders[0].upper < value)
- borders[0].upper = value;
+ if (borders[borderidx].low < value)
+ borders[borderidx].low = value;
+ if (borders[borderidx].upper < value)
+ borders[borderidx].upper = value;
}
}
}
const char delim = '|';
while (getline(s, name, delim)) {
size_t start = 0;
- for (size_t i = 0; i < param.size(); i++) {
- string key = name + "{" + param[i] + "}";
+ for (char c : param) {
+ string key = name + "{" + c + "}";
// get the corresponding utf8-value
if ((values[start] & 0xc0) != 0xc0) {
// should not happen, utf8 encoding starts at least with 11xxxxxx
}
}
+// Helper function
+static string getutf8(unsigned uchar)
+{
+ #define maxc 5
+ string ret = string();
+ char c[maxc] = {0};
+ if (uchar <= 0x7f) {
+ c[maxc-1] = uchar & 0x7f;
+ }
+ else {
+ unsigned char rest = 0x40;
+ unsigned char first = 0x80;
+ int start = maxc-1;
+ for (int i = start; i >=0; --i) {
+ if (uchar < rest) {
+ c[i] = first + uchar;
+ break;
+ }
+ c[i] = 0x80 | (uchar & 0x3f);
+ uchar >>= 6;
+ rest >>= 1;
+ first >>= 1;
+ first |= 0x80;
+ }
+ }
+ for (int i = 0; i < maxc; i++) {
+ if (c[i] == 0) continue;
+ ret += c[i];
+ }
+ return(ret);
+}
+
static void buildAccentsMap()
{
accents["imath"] = "ı";
accents["i"] = "ı";
accents["jmath"] = "ȷ";
- accents["lyxmathsym{ß}"] = "ß";
- accents["text{ß}"] = "ß";
+ accents["cdot"] = "·";
+ accents["hairspace"] = getutf8(0xf0000); // select from free unicode plane 15
+ accents["thinspace"] = getutf8(0xf0002); // and used _only_ by findadv
+ accents["negthinspace"] = getutf8(0xf0003); // to omit backslashed latex macros
+ accents["medspace"] = getutf8(0xf0004); // See https://en.wikipedia.org/wiki/Private_Use_Areas
+ accents["negmedspace"] = getutf8(0xf0005);
+ accents["thickspace"] = getutf8(0xf0006);
+ accents["negthickspace"] = getutf8(0xf0007);
+ accents["lyx"] = getutf8(0xf0010); // Used logos
+ accents["LyX"] = getutf8(0xf0010);
+ accents["tex"] = getutf8(0xf0011);
+ accents["TeX"] = getutf8(0xf0011);
+ accents["latex"] = getutf8(0xf0012);
+ accents["LaTeX"] = getutf8(0xf0012);
+ accents["latexe"] = getutf8(0xf0013);
+ accents["LaTeXe"] = getutf8(0xf0013);
+ accents["lyxarrow"] = getutf8(0xf0020);
+ accents["backslash lyx"] = getutf8(0xf0010); // Used logos inserted with starting \backslash
+ accents["backslash LyX"] = getutf8(0xf0010);
+ accents["backslash tex"] = getutf8(0xf0011);
+ accents["backslash TeX"] = getutf8(0xf0011);
+ accents["backslash latex"] = getutf8(0xf0012);
+ accents["backslash LaTeX"] = getutf8(0xf0012);
+ accents["backslash latexe"] = getutf8(0xf0013);
+ accents["backslash LaTeXe"] = getutf8(0xf0013);
+ accents["backslash lyxarrow"] = getutf8(0xf0020);
accents["ddot{\\imath}"] = "ï";
buildaccent("ddot", "aAeEhHiIioOtuUwWxXyY",
"äÄëËḧḦïÏïöÖẗüÜẅẄẍẌÿŸ"); // umlaut
buildaccent("ogonek|k", "AaEeIiUuOo",
"ĄąĘęĮįŲųǪǫ"); // ogonek
buildaccent("cedilla|c", "CcGgKkLlNnRrSsTtEeDdHh",
- "Ã\87çĢĢĶķĻļŅņŖŗŞşŢţȨȩḐḑḨḩ"); // cedilla
+ "Ã\87çĢģĶķĻļŅņŖŗŞşŢţȨȩḐḑḨḩ"); // cedilla
buildaccent("subring|textsubring", "Aa",
"Ḁḁ"); // subring
buildaccent("subhat|textsubcircum", "DdEeLlNnTtUu",
{
if (accents.empty())
buildAccentsMap();
- static regex const accre("\\\\(([\\S]|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
+ static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|"
+ "cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}"
+ "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow)))(?![a-zA-Z]))");
smatch sub;
for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
sub = *itacc;
for (size_t i = 0; i < val.size(); i++) {
par[pos+i] = val[i];
}
- addIntervall(pos+val.size(), pos + sub.str(0).size());
+ // Remove possibly following space too
+ if (par[pos+sub.str(0).size()] == ' ')
+ addIntervall(pos+val.size(), pos + sub.str(0).size()+1);
+ else
+ addIntervall(pos+val.size(), pos + sub.str(0).size());
for (size_t i = pos+val.size(); i < pos + sub.str(0).size(); i++) {
// remove traces of any remaining chars
par[i] = ' ';
closes[1] = -1;
}
-int Intervall::previousNotIgnored(int start)
+int Intervall::previousNotIgnored(int start) const
{
int idx = 0; /* int intervalls */
for (idx = ignoreidx; idx >= 0; --idx) {
return start;
}
-int Intervall::nextNotIgnored(int start)
+int Intervall::nextNotIgnored(int start) const
{
int idx = 0; /* int intervalls */
for (idx = 0; idx <= ignoreidx; idx++) {
void buildEntries(bool);
void makeKey(const string &, KeyInfo, bool isPatternString);
void processRegion(int start, int region_end); /* remove {} parts */
- void removeHead(KeyInfo&, int count=0);
+ void removeHead(KeyInfo const &, int count=0);
public:
LatexInfo(string const & par, bool isPatternString)
- : entidx_(-1), interval_(isPatternString, par)
+ : entidx_(-1), interval_(isPatternString, par)
{
buildKeys(isPatternString);
entries_ = vector<KeyInfo>();
return -1;
}
if (entries_[0].keytype == KeyInfo::isTitle) {
+ interval_.hasTitle = true;
if (! entries_[0].disabled) {
- interval_.hasTitle = true;
interval_.titleValue = entries_[0].head;
}
else {
- interval_.hasTitle = false;
interval_.titleValue = "";
}
removeHead(entries_[0]);
else
return false;
};
- int find(int start, KeyInfo::KeyType keytype) {
+ int find(int start, KeyInfo::KeyType keytype) const {
if (start < 0)
return -1;
int tmpIdx = start;
}
return -1;
};
- int process(ostringstream &os, KeyInfo &actual);
- int dispatch(ostringstream &os, int previousStart, KeyInfo &actual);
+ int process(ostringstream & os, KeyInfo const & actual);
+ int dispatch(ostringstream & os, int previousStart, KeyInfo & actual);
// string show(int lastpos) { return interval.show(lastpos);};
int nextNotIgnored(int start) { return interval_.nextNotIgnored(start);};
KeyInfo &getKeyInfo(int keyinfo) {
else
return entries_[keyinfo];
};
- void setForDefaultLang(KeyInfo &defLang) {interval_.setForDefaultLang(defLang);};
+ void setForDefaultLang(KeyInfo const & defLang) {interval_.setForDefaultLang(defLang);};
void addIntervall(int low, int up) { interval_.addIntervall(low, up); };
};
m.mathSize = end - start;
entries_.push_back(m);
}
- bool empty() { return entries_.empty(); };
- size_t getEndPos() {
+ bool empty() const { return entries_.empty(); };
+ size_t getEndPos() const {
if (entries_.empty() || (actualIdx_ >= entries_.size())) {
return 0;
}
return entries_[actualIdx_].mathEnd;
}
- size_t getStartPos() {
+ size_t getStartPos() const {
if (entries_.empty() || (actualIdx_ >= entries_.size())) {
return 100000; /* definitely enough? */
}
actualIdx_ = 0;
return getStartPos();
}
- size_t getSize() {
+ size_t getSize() const {
if (entries_.empty() || (actualIdx_ >= entries_.size())) {
return size_t(0);
}
bool math_end_waiting = false;
size_t math_pos = 10000;
string math_end;
+ static vector<string> usedText = vector<string>();
interval_.removeAccents();
}
// Ignore language if there is math somewhere in pattern-string
if (isPatternString) {
+ for (auto s: usedText) {
+ // Remove entries created in previous search runs
+ keys.erase(s);
+ }
+ usedText = vector<string>();
if (! mi.empty()) {
// Disable language
keys["foreignlanguage"].disabled = true;
}
}
};
+ if (keys.find(key) != keys.end()) {
+ if (keys[key].keytype == KeyInfo::headRemove) {
+ KeyInfo found1 = keys[key];
+ found1.disabled = true;
+ found1.head = "\\" + key + "{";
+ found1._tokenstart = sub.position(size_t(0));
+ found1._tokensize = found1.head.length();
+ found1._dataStart = found1._tokenstart + found1.head.length();
+ int endpos = interval_.findclosing(found1._dataStart, interval_.par.length(), '{', '}', 1);
+ found1._dataEnd = endpos;
+ removeHead(found1);
+ continue;
+ }
+ }
if (evaluatingRegexp) {
if (sub.str(1).compare("endregexp") == 0) {
evaluatingRegexp = false;
}
if (keys.find(key) == keys.end()) {
found = KeyInfo(KeyInfo::isStandard, 0, true);
+ LYXERR(Debug::INFO, "Undefined key " << key << " ==> will be used as text");
+ found = KeyInfo(KeyInfo::isText, 0, false);
if (isPatternString) {
found.keytype = KeyInfo::isChar;
found.disabled = false;
found.used = true;
}
keys[key] = found;
+ usedText.push_back(key);
}
else
found = keys[key];
optionalEnd = optend;
}
string token = sub.str(5);
- int closings = found.parenthesiscount;
+ int closings;
+ if (interval_.par[optend] != '{') {
+ closings = 0;
+ found.parenthesiscount = 0;
+ found.head = "\\" + key;
+ }
+ else
+ closings = found.parenthesiscount;
if (found.parenthesiscount == 1) {
found.head = "\\" + key + "{";
}
found._tokensize = found.head.length();
found._dataStart = found._tokenstart + found.head.length();
if (found.keytype == KeyInfo::doRemove) {
- int endpar = 2 + interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings);
- found._dataStart = endpar;
- found._tokensize = found._dataStart - found._tokenstart;
+ if (closings > 0) {
+ size_t endpar = 2 + interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings);
+ if (endpar >= interval_.par.length())
+ found._dataStart = interval_.par.length();
+ else
+ found._dataStart = endpar;
+ found._tokensize = found._dataStart - found._tokenstart;
+ }
+ else {
+ found._dataStart = found._tokenstart + found._tokensize;
+ }
closings = 0;
}
if (interval_.par.substr(found._dataStart-1, 15).compare("\\endarguments{}") == 0) {
// Disable this key, treate it as standard
found.keytype = KeyInfo::isStandard;
found.disabled = true;
- if ((codeEnd == interval_.par.length()) &&
+ if ((codeEnd +1 >= interval_.par.length()) &&
(found._tokenstart == codeStart)) {
// trickery, because the code inset starts
// with \selectlanguage ...
static bool keysBuilt = false;
if (keysBuilt && !isPatternString) return;
+ // Keys to ignore in any case
+ makeKey("text|textcyrillic|lyxmathsym", KeyInfo(KeyInfo::headRemove, 1, true), true);
// Known standard keys with 1 parameter.
// Split is done, if not at start of region
makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString);
makeKey("textasciicircum|textasciitilde", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("textasciiacute|texemdash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("dots|ldots", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ makeKey("guillemotright|guillemotleft", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
// Spaces
makeKey("quad|qquad|hfill|dotfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("textvisiblespace|nobreakspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("negthickspace|negmedspace|negthinspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ makeKey("thickspace|medspace|thinspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
// Skip
// makeKey("enskip|smallskip|medskip|bigskip|vfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
// Custom space/skip, remove the content (== length value)
makeKey("footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge", KeyInfo(KeyInfo::isSize, 0, false), isPatternString);
// Survives, like known character
- makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ // makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ makeKey("tableofcontents", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
makeKey("item|listitem", KeyInfo(KeyInfo::isList, 1, false), isPatternString);
makeKey("begin|end", KeyInfo(KeyInfo::isMath, 1, false), isPatternString);
}
}
-void LatexInfo::removeHead(KeyInfo &actual, int count)
+void LatexInfo::removeHead(KeyInfo const & actual, int count)
{
if (actual.parenthesiscount == 0) {
// "{\tiny{} ...}" ==> "{{} ...}"
interval_.closes[0] = -1;
break;
}
+ case KeyInfo::isText:
+ interval_.par[actual._tokenstart] = '#';
+ //interval_.addIntervall(actual._tokenstart, actual._tokenstart+1);
+ nextKeyIdx = getNextKey();
+ break;
case KeyInfo::noContent: { /* char like "\hspace{2cm}" */
if (actual.disabled)
interval_.addIntervall(actual._tokenstart, actual._dataEnd);
}
case KeyInfo::isSize: {
if (actual.disabled || (interval_.par[actual._dataStart] != '{') || (interval_.par[actual._dataStart-1] == ' ')) {
- processRegion(actual._dataEnd, actual._dataEnd+1); /* remove possibly following {} */
- interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
+ if (actual.parenthesiscount == 0)
+ interval_.addIntervall(actual._tokenstart, actual._dataEnd);
+ else {
+ interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
+ }
nextKeyIdx = getNextKey();
} else {
// Here _dataStart points to '{', so correct it
}
break;
}
- case KeyInfo::endArguments:
+ case KeyInfo::endArguments: {
// Remove trailing '{}' too
actual._dataStart += 1;
actual._dataEnd += 1;
interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
nextKeyIdx = getNextKey();
break;
+ }
case KeyInfo::noMain:
// fall through
case KeyInfo::isStandard: {
break;
}
case KeyInfo::invalid:
- // This cannot happen, already handled
+ case KeyInfo::headRemove:
+ // These two cases cannot happen, already handled
// fall through
default: {
// LYXERR(Debug::INFO, "Unhandled keytype");
return nextKeyIdx;
}
-int LatexInfo::process(ostringstream &os, KeyInfo &actual )
+int LatexInfo::process(ostringstream & os, KeyInfo const & actual )
{
int end = interval_.nextNotIgnored(actual._dataEnd);
int oldStart = actual._dataStart;
int output_end;
if (actual._dataEnd < end)
output_end = interval_.nextNotIgnored(actual._dataEnd);
- else
+ else if (interval_.par.size() > (size_t) end)
output_end = interval_.nextNotIgnored(end);
+ else
+ output_end = interval_.par.size();
if ((actual.keytype == KeyInfo::isMain) && actual.disabled) {
interval_.addIntervall(actual._tokenstart, actual._tokenstart+actual._tokensize);
}
int open_braces = 0;
do {
LYXERR(Debug::FIND, "identifyClosing(): t now is '" << t << "'");
- if (regex_replace(t, t, "(.*[^\\\\])\\$" REGEX_EOS, "$1"))
+ if (regex_replace(t, t, "(.*[^\\\\])\\$$", "$1"))
continue;
- if (regex_replace(t, t, "(.*[^\\\\])\\\\\\]" REGEX_EOS, "$1"))
+ if (regex_replace(t, t, "(.*[^\\\\])\\\\\\]$", "$1"))
continue;
- if (regex_replace(t, t, "(.*[^\\\\])\\\\end\\{[a-zA-Z_]*\\*?\\}" REGEX_EOS, "$1"))
+ if (regex_replace(t, t, "(.*[^\\\\])\\\\end\\{[a-zA-Z_]*\\*?\\}$", "$1"))
continue;
- if (regex_replace(t, t, "(.*[^\\\\])\\}" REGEX_EOS, "$1")) {
+ if (regex_replace(t, t, "(.*[^\\\\])\\}$", "$1")) {
++open_braces;
continue;
}
LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'");
LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
}
+ LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'");
par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat);
// Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them.
LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
lng -= 2;
open_braces++;
}
- else
+ else
break;
-}
+ }
if (lng < par_as_string.size())
par_as_string = par_as_string.substr(0,lng);
/*
// TODO: Adapt '\[12345678]' in par_as_string to acount for the first '()
// Unfortunately is '\1', '\2', etc not working for strings with extra format
// so the convert has no effect in that case
- for (int i = 8; i > 0; --i) {
+ for (int i = 7; i > 0; --i) {
string orig = "\\\\" + std::to_string(i);
- string dest = "\\" + std::to_string(i+1);
+ string dest = "\\" + std::to_string(i+2);
while (regex_replace(par_as_string, par_as_string, orig, dest));
}
- regexp_str = "(" + lead_as_regexp + ")" + par_as_string;
- regexp2_str = "(" + lead_as_regexp + ").*?" + par_as_string;
+ regexp_str = "(" + lead_as_regexp + ")()" + par_as_string;
+ regexp2_str = "(" + lead_as_regexp + ")(.*?)" + par_as_string;
}
LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'");
- regexp = lyx::regex(regexp_str);
-
LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'");
- regexp2 = lyx::regex(regexp2_str);
+#if QTSEARCH
+ // Handle \w properly
+ QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption;
+ if (! opt.casesensitive) {
+ popts |= QRegularExpression::CaseInsensitiveOption;
+ }
+ regexp = QRegularExpression(QString::fromStdString(regexp_str), popts);
+ regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts);
+ regexError = "";
+ if (regexp.isValid() && regexp2.isValid()) {
+ regexIsValid = true;
+ // Check '{', '}' pairs inside the regex
+ int balanced = 0;
+ int skip = 1;
+ for (unsigned i = 0; i < par_as_string.size(); i+= skip) {
+ char c = par_as_string[i];
+ if (c == '\\') {
+ skip = 2;
+ continue;
+ }
+ if (c == '{')
+ balanced++;
+ else if (c == '}') {
+ balanced--;
+ if (balanced < 0)
+ break;
+ }
+ skip = 1;
+ }
+ if (balanced != 0) {
+ regexIsValid = false;
+ regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\"";
+ }
+ }
+ else {
+ regexIsValid = false;
+ if (!regexp.isValid())
+ regexError += "Invalid regexp \"" + regexp_str + "\", error = " + regexp.errorString().toStdString();
+ if (!regexp2.isValid())
+ regexError += "Invalid regexp2 \"" + regexp2_str + "\", error = " + regexp2.errorString().toStdString();
+ }
+#else
+ if (opt.casesensitive) {
+ regexp = regex(regexp_str);
+ regexp2 = regex(regexp2_str);
+ }
+ else {
+ regexp = regex(regexp_str, std::regex_constants::icase);
+ regexp2 = regex(regexp2_str, std::regex_constants::icase);
+ }
+#endif
}
}
-
+#if 0
// Count number of characters in string
// {]} ==> 1
// \& ==> 1
// --- ==> 1
// \\[a-zA-Z]+ ==> 1
+#if QTSEARCH
+static int computeSize(QStringRef s, int len)
+#define isLyxAlpha(arg) arg.isLetter()
+#else
static int computeSize(string s, int len)
+#define isLyxAlpha(arg) isalpha(arg)
+#endif
{
if (len == 0)
return 0;
int skip = 1;
int count = 0;
for (int i = 0; i < len; i += skip, count++) {
- if (s[i] == '\\') {
+ if (s.at(i) == '\\') {
skip = 2;
- if (isalpha(s[i+1])) {
+ if (i + 1 < len && isLyxAlpha(s.at(i+1))) {
for (int j = 2; i+j < len; j++) {
- if (! isalpha(s[i+j])) {
- if (s[i+j] == ' ')
- skip++;
- else if ((s[i+j] == '{') && s[i+j+1] == '}')
- skip += 2;
- else if ((s[i+j] == '{') && (i + j + 1 >= len))
+ if (! isLyxAlpha(s.at(i+j))) {
+ if (s.at(i+j) == ' ')
skip++;
+ else if (s.at(i+j) == '{') {
+ if (i+j+1 < len && s.at(i+j+1) == '}')
+ skip += 2;
+ else if (i + j + 1 >= len)
+ skip++;
+ }
break;
}
skip++;
}
}
}
- else if (s[i] == '{') {
- if (s[i+1] == '}')
+ else if (s.at(i) == '{') {
+ if (i + 1 < len && s.at(i+1) == '}')
skip = 2;
else
skip = 3;
}
- else if (s[i] == '-') {
- if (s[i+1] == '-') {
- if (s[i+2] == '-')
+ else if (s.at(i) == '-') {
+ if (i+1 < len && s.at(i+1) == '-') {
+ if (i + 2 < len && s.at(i+2) == '-')
skip = 3;
else
skip = 2;
}
return count;
}
+#endif
MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const
{
return mres;
docstring docstr = stringifyFromForSearch(opt, cur, len);
- string str = normalize(docstr, true);
+ string str;
+ if (use_regexp || opt.casesensitive)
+ str = normalize(docstr, true);
+ else
+ str = normalize(lowercase(docstr), true);
if (!opt.ignoreformat) {
str = correctlanguagesetting(str, false, !opt.ignoreformat);
}
if (use_regexp) {
LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin);
+#if QTSEARCH
+ QString qstr = QString::fromStdString(str);
+ QRegularExpression const *p_regexp;
+ QRegularExpression::MatchType flags = QRegularExpression::NormalMatch;
+ if (at_begin) {
+ p_regexp = ®exp;
+ } else {
+ p_regexp = ®exp2;
+ }
+ QRegularExpressionMatch match = p_regexp->match(qstr, 0, flags);
+ if (!match.hasMatch())
+ return mres;
+ // Check braces on segments that matched all (.*?) subexpressions,
+ // except the last "padding" one inserted by lyx.
+ for (int i = 3; i < match.lastCapturedIndex(); ++i)
+ if (!braces_match(match.captured(i), open_braces))
+ return mres;
+#else
regex const *p_regexp;
regex_constants::match_flag_type flags;
if (at_begin) {
if (re_it == sregex_iterator())
return mres;
match_results<string::const_iterator> const & m = *re_it;
-
- if (0) { // Kornel Benko: DO NOT CHECKK
- // Check braces on the segment that matched the entire regexp expression,
- // plus the last subexpression, if a (.*?) was inserted in the constructor.
- if (!braces_match(m[0].first, m[0].second, open_braces))
- return mres;
- }
-
// Check braces on segments that matched all (.*?) subexpressions,
// except the last "padding" one inserted by lyx.
- for (size_t i = 1; i < m.size() - 1; ++i)
- if (!braces_match(m[i].first, m[i].second, open_braces))
+ for (size_t i = 3; i < m.size() - 1; ++i)
+ if (!braces_match(m[i], open_braces))
return mres;
-
+#endif
// Exclude from the returned match length any length
// due to close wildcards added at end of regexp
// and also the length of the leading (e.g. '\emph{}')
// Whole found string, including the leading: m[0].second - m[0].first
// Size of the leading string: m[1].second - m[1].first
int leadingsize = 0;
- if (m.size() > 1)
- leadingsize = m[1].second - m[1].first;
int result;
+#if QTSEARCH
+ if (match.lastCapturedIndex() > 0) {
+ leadingsize = match.capturedEnd(1) - match.capturedStart(1);
+ }
+ int lastidx = match.lastCapturedIndex();
+ for (int i = 0; i <= lastidx; i++) {
+ LYXERR(Debug::FIND, "Match " << i << " is " << match.capturedEnd(i) - match.capturedStart(i) << " long");
+ }
+ if (close_wildcards == 0)
+ result = match.capturedEnd(0) - match.capturedStart(0);
+ else
+ result = match.capturedStart(lastidx + 1 - close_wildcards) - match.capturedStart(0);
+
+#else
+ if (m.size() > 2) {
+ leadingsize = m[1].second - m[1].first;
+ }
for (size_t i = 0; i < m.size(); i++) {
LYXERR(Debug::FIND, "Match " << i << " is " << m[i].second - m[i].first << " long");
}
if (close_wildcards == 0)
result = m[0].second - m[0].first;
-
else
result = m[m.size() - close_wildcards].first - m[0].first;
-
- size_t pos = m.position(size_t(0));
- // Ignore last closing characters
- while (result > 0) {
- if (str[pos+result-1] == '}')
- --result;
- else
- break;
- }
+#endif
if (result > leadingsize)
result -= leadingsize;
else
result = 0;
- mres.match_len = computeSize(str.substr(pos+leadingsize,result), result);
- mres.match2end = str.size() - pos - leadingsize;
- mres.pos = pos+leadingsize;
+#if QTSEARCH
+ mres.match_prefix = match.capturedEnd(2) - match.capturedStart(2);
+ mres.match_len = match.capturedEnd(0) - match.capturedEnd(2);
+ // because of different number of closing at end of string
+ // we have to 'unify' the length of the post-match.
+ // Done by ignoring closing parenthesis and linefeeds at string end
+ int matchend = match.capturedEnd(0);
+ while (mres.match_len > 0) {
+ QChar c = qstr.at(matchend - 1);
+ if ((c == '\n') || (c == '}') || (c == '{')) {
+ mres.match_len--;
+ matchend--;
+ }
+ else
+ break;
+ }
+ size_t strsize = qstr.size();
+ while (strsize > (size_t) match.capturedEnd(0)) {
+ QChar c = qstr.at(strsize-1);
+ if ((c == '\n') || (c == '}')) {
+ --strsize;
+ }
+ else
+ break;
+ }
+ // LYXERR0(qstr.toStdString());
+ mres.match2end = strsize - matchend;
+ mres.pos = match.capturedStart(2);
+#else
+ mres.match_prefix = m[2].second - m[2].first;
+ mres.match_len = m[0].second - m[2].second;
+ // ignore closing parenthesis and linefeeds at string end
+ size_t strend = m[0].second - m[0].first;
+ int matchend = strend;
+ while (mres.match_len > 0) {
+ char c = str.at(matchend - 1);
+ if ((c == '\n') || (c == '}') || (c == '{')) {
+ mres.match_len--;
+ matchend--;
+ }
+ else
+ break;
+ }
+ size_t strsize = str.size();
+ while (strsize > strend) {
+ if ((str.at(strsize-1) == '}') || (str.at(strsize-1) == '\n')) {
+ --strsize;
+ }
+ else
+ break;
+ }
+ // LYXERR0(str);
+ mres.match2end = strsize - matchend;
+ mres.pos = m[2].first - m[0].first;;
+#endif
+ if (mres.match2end < 0)
+ mres.match_len = 0;
+ mres.leadsize = leadingsize;
return mres;
}
string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const
{
string t;
- if (! opt.casesensitive)
- t = lyx::to_utf8(lowercase(s));
- else
- t = lyx::to_utf8(s);
+ t = lyx::to_utf8(s);
// Remove \n at begin
while (!t.empty() && t[0] == '\n')
t = t.substr(1);
LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
LYXERR(Debug::FIND, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
-
while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", ""));
// FIXME - check what preceeds the brace
if (hack_braces) {
// OutputParams runparams(&cur.buffer()->params().encoding());
OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = true;
- runparams.flavor = OutputParams::XETEX;
+ runparams.flavor = Flavor::XeTeX;
runparams.linelen = 10000; //lyxrc.plaintext_linelen;
// No side effect of file copying and image conversion
runparams.dryrun = true;
//OutputParams runparams(&buf.params().encoding());
OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = false;
- runparams.flavor = OutputParams::XETEX;
+ runparams.flavor = Flavor::XeTeX;
runparams.linelen = 8000; //lyxrc.plaintext_linelen;
// No side effect of file copying and image conversion
runparams.dryrun = true;
for (int s = cur.depth() - 1; s >= 0; --s) {
CursorSlice const & cs = cur[s];
if (cs.asInsetMath() && cs.asInsetMath()->asHullInset()) {
- WriteStream ws(os);
+ TeXMathStream ws(os);
cs.asInsetMath()->asHullInset()->header_write(ws);
break;
}
CursorSlice const & cs2 = cur[s];
InsetMath * inset = cs2.asInsetMath();
if (inset && inset->asHullInset()) {
- WriteStream ws(os);
+ TeXMathStream ws(os);
inset->asHullInset()->footer_write(ws);
break;
}
return ods.str();
}
+#if 0
+// Debugging output
+static void displayMResult(MatchResult &mres, int increment)
+{
+ LYXERR0( "pos: " << mres.pos << " increment " << increment);
+ LYXERR0( "leadsize: " << mres.leadsize);
+ LYXERR0( "match_len: " << mres.match_len);
+ LYXERR0( "match_prefix: " << mres.match_prefix);
+ LYXERR0( "match2end: " << mres.match2end);
+}
+ #define displayMres(s,i) displayMResult(s,i);
+#else
+ #define displayMres(s,i)
+#endif
-/** Finalize an advanced find operation, advancing the cursor to the innermost
- ** position that matches, plus computing the length of the matching text to
- ** be selected
- **/
-int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match)
+static bool findAdvForwardInnermost(DocIterator & cur)
{
- // Search the foremost position that matches (avoids find of entire math
- // inset when match at start of it)
size_t d;
DocIterator old_cur(cur.buffer());
+ int forwardCount = 0;
do {
- LYXERR(Debug::FIND, "Forwarding one step (searching for innermost match)");
d = cur.depth();
old_cur = cur;
cur.forwardPos();
- } while (cur && cur.depth() > d && match(cur).match_len > 0);
+ if (!cur) {
+ break;
+ }
+ if (cur.depth() > d) {
+ forwardCount++;
+ continue;
+ }
+ if (cur.depth() == d)
+ break;
+ } while(1);
cur = old_cur;
- int max_match = match(cur).match_len; /* match valid only if not searching whole words */
+ if (forwardCount > 0) {
+ LYXERR(Debug::FIND, "Forwarded " << forwardCount << " step(s) (searching for innermost match)");
+ return true;;
+ }
+ else
+ return false;
+}
+
+/** Finalize an advanced find operation, advancing the cursor to the innermost
+ ** position that matches, plus computing the length of the matching text to
+ ** be selected
+ **/
+int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, int expected_len)
+{
+ // Search the foremost position that matches (avoids find of entire math
+ // inset when match at start of it)
+ DocIterator old_cur(cur.buffer());
+ MatchResult mres;
+ int max_match;
+ if (findAdvForwardInnermost(cur)) {
+ mres = match(cur);
+ displayMres(mres, 0);
+ if (expected_len > 0) {
+ if (mres.match_len < expected_len)
+ return 0;
+ }
+ else {
+ if (mres.match_len <= 0)
+ return 0;
+ }
+ max_match = mres.match_len;
+ }
+ else if (expected_len < 0) {
+ mres = match(cur); /* match valid only if not searching whole words */
+ displayMres(mres, 0);
+ max_match = mres.match_len;
+ }
+ else {
+ max_match = expected_len;
+ }
if (max_match <= 0) return 0;
LYXERR(Debug::FIND, "Ok");
int len = 1;
if (cur.pos() + len > cur.lastpos())
return 0;
- if (match.opt.matchword) {
+ // regexp should use \w+, \S+, or \b(some string)\b
+ // to search for whole words
+ if (match.opt.matchword && !match.use_regexp) {
LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) {
++len;
if (old_match == 0)
len = 0;
}
- else {
- int minl = 1;
- int maxl = cur.lastpos() - cur.pos();
- // Greedy behaviour while matching regexps
- while (maxl > minl) {
- int actual_match = match(cur, len).match_len;
- if (actual_match >= max_match) {
- // actual_match > max_match _can_ happen,
- // if the search area splits
- // some following word so that the regex
- // (e.g. 'r.*r\b' matches 'r' from the middle of the
- // splitted word)
- // This means, the len value is too big
- maxl = len;
- len = (int)((maxl + minl)/2);
- }
- else {
- // (actual_match < max_match)
- minl = len + 1;
- len = (int)((maxl + minl)/2);
- }
- }
+ else {
+ int minl = 1;
+ int maxl = cur.lastpos() - cur.pos();
+ // Greedy behaviour while matching regexps
+ while (maxl > minl) {
+ MatchResult mres2;
+ mres2 = match(cur, len);
+ displayMres(mres2, len);
+ int actual_match = mres2.match_len;
+ if (actual_match >= max_match) {
+ // actual_match > max_match _can_ happen,
+ // if the search area splits
+ // some following word so that the regex
+ // (e.g. 'r.*r\b' matches 'r' from the middle of the
+ // splitted word)
+ // This means, the len value is too big
+ maxl = len;
+ if (maxl - minl < 4)
+ len = (int)((maxl + minl)/2);
+ else
+ len = (int)(minl + (maxl - minl + 3)/4);
+ }
+ else {
+ // (actual_match < max_match)
+ minl = len + 1;
+ len = (int)((maxl + minl)/2);
+ }
+ }
old_cur = cur;
// Search for real start of matched characters
while (len > 1) {
return len;
}
-
/// Finds forward
-int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
+int findForwardAdv(DocIterator & cur, MatchStringAdv const & match)
{
if (!cur)
return 0;
while (!theApp()->longOperationCancelled() && cur) {
+ (void) findAdvForwardInnermost(cur);
LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
MatchResult mres = match(cur, -1, false);
+ displayMres(mres,-1)
int match_len = mres.match_len;
- LYXERR(Debug::FIND, "match_len: " << match_len);
if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) {
LYXERR(Debug::INFO, "BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end);
match_len = 0;
}
if (match_len > 0) {
// Try to find the begin of searched string
- int increment = mres.pos/2;
- while (mres.pos > 5 && (increment > 5)) {
+ int increment;
+ int firstInvalid = 100000;
+ if (mres.match_prefix + mres.pos - mres.leadsize > 1)
+ increment = (mres.match_prefix + mres.pos - mres.leadsize + 1)*3/4;
+ else
+ increment = 10;
+ LYXERR(Debug::FIND, "Set increment to " << increment);
+ while (increment > 0) {
DocIterator old_cur = cur;
for (int i = 0; i < increment && cur; cur.forwardPos(), i++) {
}
}
else {
MatchResult mres2 = match(cur, -1, false);
- if ((mres2.match2end < mres.match2end) ||
- (mres2.match_len < mres.match_len)) {
- cur = old_cur;
- increment /= 2;
- }
- else {
- mres = mres2;
- increment -= 2;
- if (increment > mres.pos/2)
- increment = mres.pos/2;
+ displayMres(mres2,increment)
+ switch (interpretMatch(mres, mres2)) {
+ case MatchResult::newIsTooFar:
+ // behind the expected match
+ firstInvalid = increment;
+ cur = old_cur;
+ increment /= 2;
+ break;
+ case MatchResult::newIsBetter:
+ // not reached ye, but cur.pos()+increment is bettert
+ mres = mres2;
+ firstInvalid -= increment;
+ if (increment > firstInvalid*3/4)
+ increment = firstInvalid*3/4;
+ if ((mres2.pos == mres2.leadsize) && (increment >= mres2.match_prefix)) {
+ if (increment >= mres2.match_prefix)
+ increment = (mres2.match_prefix+1)*3/4;
+ }
+ break;
+ default:
+ // Todo@
+ // Handle not like MatchResult::newIsTooFar
+ // LYXERR0( "Something is wrong: Increment = " << increment << " match_prefix = " << mres.match_prefix);
+ firstInvalid--;
+ increment = increment*3/4;
+ cur = old_cur;
+ break;
}
}
}
+ // LYXERR0("Leaving first loop");
+ {
+ LYXERR(Debug::FIND, "Finalizing 1");
+ int len = findAdvFinalize(cur, match, mres.match_len);
+ if (len > 0)
+ return len;
+ else {
+ // try next possible match
+ cur.forwardPos();
+ continue;
+ }
+ }
+ // The following code is newer reached
+ // but parts of it may be needed in future
int match_len_zero_count = 0;
+ MatchResult mres3;
for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) {
- if (i++ > 10) {
- int remaining_len = match(cur, -1, false).match_len;
+ if (i++ > 3) {
+ mres3 = match(cur, -1, false);
+ displayMres(mres3, 1)
+ int remaining_len = mres3.match_len;
if (remaining_len <= 0) {
// Apparently the searched string is not in the remaining part
break;
}
}
LYXERR(Debug::FIND, "Advancing cur: " << cur);
- int match_len3 = match(cur, 1).match_len;
+ mres3 = match(cur, 1);
+ displayMres(mres3, 1)
+ int match_len3 = mres3.match_len;
if (match_len3 < 0)
continue;
- int match_len2 = match(cur).match_len;
+ mres3 = match(cur);
+ displayMres(mres3, 1)
+ int match_len2 = mres3.match_len;
LYXERR(Debug::FIND, "match_len2: " << match_len2);
if (match_len2 > 0) {
// Sometimes in finalize we understand it wasn't a match
// and we need to continue the outest loop
- int len = findAdvFinalize(cur, match);
+ LYXERR(Debug::FIND, "Finalizing 2");
+ int len = findAdvFinalize(cur, match, mres.match_len);
if (len > 0) {
return len;
}
}
- if (match_len2 >= 0) {
- if (match_len2 == 0)
- match_len_zero_count++;
- else
- match_len_zero_count = 0;
- }
- else {
+ if (match_len2 > 0)
+ match_len_zero_count = 0;
+ else if (match_len2 == 0)
+ match_len_zero_count++;
+ if (match_len2 < 0) {
if (++match_len_zero_count > 3) {
LYXERR(Debug::FIND, "match_len2_zero_count: " << match_len_zero_count << ", match_len was " << match_len);
}
{
DocIterator cur_begin = doc_iterator_begin(cur.buffer());
DocIterator tmp_cur = cur;
- int len = findAdvFinalize(tmp_cur, match);
+ int len = findAdvFinalize(tmp_cur, match, -1);
Inset & inset = cur.inset();
for (; cur != cur_begin; cur.backwardPos()) {
LYXERR(Debug::FIND, "findMostBackwards(): cur=" << cur);
new_cur.backwardPos();
if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len)
break;
- int new_len = findAdvFinalize(new_cur, match);
+ int new_len = findAdvFinalize(new_cur, match, -1);
if (new_len == len)
break;
len = new_len;
static void changeFirstCase(Buffer & buffer, TextCase first_case, TextCase others_case)
{
ParagraphList::iterator pit = buffer.paragraphs().begin();
- LASSERT(pit->size() >= 1, /**/);
+ LASSERT(!pit->empty(), /**/);
pos_type right = pos_type(1);
pit->changeCase(buffer.params(), pos_type(0), right, first_case);
right = pit->size();
static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv)
{
Cursor & cur = bv->cursor();
- if (opt.repl_buf_name == docstring()
+ if (opt.repl_buf_name.empty()
|| theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true) == 0
|| theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true) == 0)
return 0;
return 0;
// Build a copy of the replace buffer, adapted to the KeepCase option
- Buffer & repl_buffer_orig = *theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true);
+ Buffer const & repl_buffer_orig = *theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true);
ostringstream oss;
repl_buffer_orig.write(oss);
string lyx = oss.str();
// OutputParams runparams(&repl_buffer.params().encoding());
OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = false;
- runparams.flavor = OutputParams::XETEX;
+ runparams.flavor = Flavor::XeTeX;
runparams.linelen = 8000; //lyxrc.plaintext_linelen;
runparams.dryrun = true;
TeXOnePar(repl_buffer, repl_buffer.text(), 0, os, runparams);
try {
MatchStringAdv matchAdv(bv->buffer(), opt);
+#if QTSEARCH
+ if (!matchAdv.regexIsValid) {
+ bv->message(lyx::from_utf8(matchAdv.regexError));
+ return(false);
+ }
+#endif
int length = bv->cursor().selectionEnd().pos() - bv->cursor().selectionBegin().pos();
if (length > 0)
bv->putSelectionAt(bv->cursor().selectionBegin(), length, !opt.forward);
match_len = findForwardAdv(cur, matchAdv);
else
match_len = findBackwardsAdv(cur, matchAdv);
- } catch (...) {
- // This may only be raised by lyx::regex()
- bv->message(_("Invalid regular expression!"));
+ } catch (exception & ex) {
+ bv->message(from_ascii(ex.what()));
return false;
}