#include "ParIterator.h"
#include "TexRow.h"
#include "Text.h"
+#include "Encoding.h"
#include "frontends/Application.h"
#include "frontends/alert.h"
#include "support/lstrings.h"
#include "support/regex.h"
+#include "support/textutils.h"
#include <map>
using namespace std;
size_t new_pos = s.find("\\regexp{", pos);
if (new_pos == string::npos)
new_pos = s.size();
- LYXERR(Debug::FIND, "new_pos: " << new_pos);
- string t = apply_escapes(s.substr(pos, new_pos - pos), get_lyx_unescapes());
- LYXERR(Debug::FIND, "t [lyx]: " << t);
- t = apply_escapes(t, get_regexp_escapes());
- LYXERR(Debug::FIND, "t [rxp]: " << t);
- s.replace(pos, new_pos - pos, t);
- new_pos = pos + t.size();
- LYXERR(Debug::FIND, "Regexp after escaping: " << s);
- LYXERR(Debug::FIND, "new_pos: " << new_pos);
- if (new_pos == s.size())
- break;
+ string t;
+ if (new_pos > pos) {
+ LYXERR(Debug::FIND, "new_pos: " << new_pos);
+ t = apply_escapes(s.substr(pos, new_pos - pos), get_lyx_unescapes());
+ LYXERR(Debug::FIND, "t [lyx]: " << t);
+ t = apply_escapes(t, get_regexp_escapes());
+ LYXERR(Debug::FIND, "t [rxp]: " << t);
+ s.replace(pos, new_pos - pos, t);
+ new_pos = pos + t.size();
+ LYXERR(Debug::FIND, "Regexp after escaping: " << s);
+ LYXERR(Debug::FIND, "new_pos: " << new_pos);
+ if (new_pos == s.size())
+ break;
+ }
// Might fail if \\endregexp{} is preceeded by unexpected stuff (weird escapes)
size_t end_pos = s.find("\\endregexp{}}", new_pos + 8);
LYXERR(Debug::FIND, "end_pos: " << end_pos);
}
+class MatchResult {
+public:
+ int match_len;
+ int match2end;
+ int pos;
+ MatchResult(): match_len(0),match2end(0), pos(0) {};
+};
+
/** The class performing a match between a position in the document and the FindAdvOptions.
**/
class MatchStringAdv {
** @return
** The length of the matching text, or zero if no match was found.
**/
- int operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const;
+ MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const;
public:
/// buffer
private:
/// Auxiliary find method (does not account for opt.matchword)
- int findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const;
+ MatchResult findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const;
/** Normalize a stringified or latexified LyX paragraph.
**
static docstring buffer_to_latex(Buffer & buffer)
{
- OutputParams runparams(&buffer.params().encoding());
+ //OutputParams runparams(&buffer.params().encoding());
+ OutputParams runparams(encodings.fromLyXName("utf8"));
odocstringstream ods;
otexstream os(ods);
runparams.nice = true;
- runparams.flavor = OutputParams::LATEX;
- runparams.linelen = 100000; //lyxrc.plaintext_linelen;
+ runparams.flavor = OutputParams::XETEX;
+ runparams.linelen = 10000; //lyxrc.plaintext_linelen;
// No side effect of file copying and image conversion
runparams.dryrun = true;
runparams.for_search = true;
if (!opt.ignoreformat) {
str = buffer_to_latex(buffer);
} else {
- OutputParams runparams(&buffer.params().encoding());
+ // OutputParams runparams(&buffer.params().encoding());
+ OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = true;
- runparams.flavor = OutputParams::LATEX;
- runparams.linelen = 100000; //lyxrc.plaintext_linelen;
+ runparams.flavor = OutputParams::XETEX;
+ runparams.linelen = 10000; //lyxrc.plaintext_linelen;
runparams.dryrun = true;
runparams.for_search = true;
for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) {
* Ignore all of them */
isSize,
invalid,
- /* inputencoding, shortcut, ...
+ /* inputencoding, ...
* Discard also content, because they do not help in search */
doRemove,
/* twocolumns, ...
void resetOpenedP(int openPos);
void addIntervall(int upper);
void addIntervall(int low, int upper); /* if explicit */
- void setForDefaultLang(int upTo);
+ void removeAccents();
+ void setForDefaultLang(KeyInfo &defLang);
int findclosing(int start, int end, char up, char down, int repeat);
void handleParentheses(int lastpos, bool closingAllowed);
bool hasTitle;
+ int isOpeningPar(int pos);
string titleValue;
void output(ostringstream &os, int lastpos);
// string show(int lastpos);
};
-void Intervall::setForDefaultLang(int upTo)
+int Intervall::isOpeningPar(int pos)
+{
+ if ((pos < 0) || (size_t(pos) >= par.size()))
+ return 0;
+ if (par[pos] != '{')
+ return 0;
+ if (size_t(pos) + 2 >= par.size())
+ return 1;
+ if (par[pos+2] != '}')
+ return 1;
+ if (par[pos+1] == '[' || par[pos+1] == ']')
+ return 3;
+ return 1;
+}
+
+void Intervall::setForDefaultLang(KeyInfo &defLang)
{
// Enable the use of first token again
if (ignoreidx >= 0) {
- if (borders[0].low < upTo)
- borders[0].low = upTo;
- if (borders[0].upper < upTo)
- borders[0].upper = upTo;
+ int value = defLang._tokenstart + defLang._tokensize;
+ if (value > 0) {
+ if (borders[0].low < value)
+ borders[0].low = value;
+ if (borders[0].upper < value)
+ borders[0].upper = value;
+ }
}
}
}
}
+typedef map<string, string> AccentsMap;
+static AccentsMap accents = map<string, string>();
+
+static void buildaccent(string n, string param, string values)
+{
+ stringstream s(n);
+ string name;
+ const char delim = '|';
+ while (getline(s, name, delim)) {
+ size_t start = 0;
+ for (size_t i = 0; i < param.size(); i++) {
+ string key = name + "{" + param[i] + "}";
+ // get the corresponding utf8-value
+ if ((values[start] & 0xc0) != 0xc0) {
+ // should not happen, utf8 encoding starts at least with 11xxxxxx
+ // but value for '\dot{i}' is 'i', which is ascii
+ if ((values[start] & 0x80) == 0) {
+ // is ascii
+ accents[key] = values.substr(start, 1);
+ }
+ start++;
+ continue;
+ }
+ for (int j = 1; ;j++) {
+ if (start + j >= values.size()) {
+ accents[key] = values.substr(start, j);
+ start = values.size() - 1;
+ break;
+ }
+ else if ((values[start+j] & 0xc0) != 0x80) {
+ // This is the first byte of following utf8 char
+ accents[key] = values.substr(start, j);
+ start += j;
+ break;
+ }
+ }
+ }
+ }
+}
+
+static void buildAccentsMap()
+{
+ accents["imath"] = "ı";
+ accents["i"] = "ı";
+ accents["jmath"] = "ȷ";
+ accents["lyxmathsym{ß}"] = "ß";
+ accents["text{ß}"] = "ß";
+ accents["ddot{\\imath}"] = "ï";
+ buildaccent("ddot", "aAeEiIioOuUyY",
+ "äÄëËïÏïöÖüÜÿŸ"); // umlaut
+ buildaccent("dot|.", "cCeEGgIizZaAoObBdDfFyY",
+ "ċĊėĖĠġİİżŻȧȦȯȮḃḂḋḊḟḞẏẎ"); // dot{i} can only happen if ignoring case, but there is no lowercase of 'İ'
+ accents["acute{\\imath}"] = "í";
+ buildaccent("acute", "aAcCeElLoOnNrRsSuUyYzZiI",
+ "áÁćĆéÉĺĹóÓńŃŕŔśŚúÚýÝźŹíÍ");
+ buildaccent("dacute|H|h", "oOuU", "őŐűŰ"); // double acute
+ buildaccent("mathring|r", "aAuUwy",
+ "åÅůŮẘẙ"); // ring
+ accents["check{\\imath}"] = "ǐ";
+ accents["check{\\jmath}"] = "ǰ";
+ buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTtzZ",
+ "čČďĎǎǍěĚǐǏǒǑǔǓǧǦǩǨȟȞľĽňŇřŘšŠŤťžŽ"); // caron
+ accents["hat{\\imath}"] = "î";
+ accents["hat{\\jmath}"] = "ĵ";
+ buildaccent("hat|^", "aAeEiIcCgGhHjJsSwWyYzZoOuU",
+ "âÂêÊîÎĉĈĝĜĥĤĵĴŝŜŵŴŷŶẑẐôÔûÛ"); // circ
+ accents["bar{\\imath}"] = "ī";
+ buildaccent("bar|=", "aAeEiIoOuUyY",
+ "āĀēĒīĪōŌūŪȳȲ"); // macron
+ accents["tilde{\\imath}"] = "ĩ";
+ buildaccent("tilde", "aAnNoOiIuU",
+ "ãÃñÑõÕĩĨũŨ"); // tilde
+ accents["breve{\\imath}"] = "ĭ";
+ buildaccent("breve|u", "aAeEgGiIoOuU",
+ "ăĂĕĔğĞĭĬŏŎŭŬ"); // breve
+ accents["grave{\\imath}"] = "ì";
+ buildaccent("grave|`", "aAeEiIoOuUnNwWyY",
+ "àÀèÈìÌòÒùÙǹǸẁẀỳỲ"); // grave
+ buildaccent("subdot|d", "BbDdHhKkLlMmNnRrSsTtVvWwZzAaEeIiOoUuYy",
+ "ḄḅḌḍḤḥḲḳḶḷṂṃṆṇṚṛṢṣṬṭṾṿẈẉẒẓẠạẸẹỊịỌọỤụỴỵ"); // dot below
+}
+
+/*
+ * Created accents in math or regexp environment
+ * are macros, but we need the utf8 equivalent
+ */
+void Intervall::removeAccents()
+{
+ if (accents.empty())
+ buildAccentsMap();
+ static regex const accre("\\\\((.|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
+ smatch sub;
+ for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
+ sub = *itacc;
+ string key = sub.str(1);
+ if (accents.find(key) != accents.end()) {
+ string val = accents[key];
+ size_t pos = sub.position(0);
+ for (size_t i = 0; i < val.size(); i++) {
+ par[pos+i] = val[i];
+ }
+ addIntervall(pos+val.size(), pos + sub.str(0).size());
+ for (size_t i = pos+val.size(); i < pos + sub.str(0).size(); i++) {
+ // remove traces of any remaining chars
+ par[i] = ' ';
+ }
+ }
+ else {
+ LYXERR0("Not added accent for \"" << key << "\"");
+ }
+ }
+}
+
void Intervall::handleOpenP(int i)
{
actualdeptindex++;
else
return entries[keyinfo];
};
- void setForDefaultLang(int upTo) {interval.setForDefaultLang(upTo);};
+ void setForDefaultLang(KeyInfo &defLang) {interval.setForDefaultLang(defLang);};
void addIntervall(int low, int up) { interval.addIntervall(low, up); };
};
size_t math_pos = 10000;
string math_end;
+ interval.removeAccents();
+
for (sregex_iterator itmath(interval.par.begin(), interval.par.end(), rmath), end; itmath != end; ++itmath) {
submath = *itmath;
if (math_end_waiting) {
found._dataStart = found._dataEnd;
found._tokensize = found._dataEnd - found._tokenstart;
found.parenthesiscount = 0;
+ found.head = interval.par.substr(found._tokenstart, found._tokensize);
+ }
+ else {
+ continue;
}
}
else {
found._dataEnd = found._tokenstart + found._tokensize;
found._dataStart = found._dataEnd;
found.parenthesiscount = 0;
+ found.head = interval.par.substr(found._tokenstart, found._tokensize);
evaluatingMath = true;
}
else {
found.head = interval.par.substr(found._tokenstart, found._tokensize);
}
else {
+ // Swallow possible optional params
while (interval.par[pos1] == '[') {
pos1 = interval.findclosing(pos1+1, interval.par.length(), '[', ']')+1;
}
+ // Swallow also the eventual parameter
if (interval.par[pos1] == '{') {
found._dataEnd = interval.findclosing(pos1+1, interval.par.length()) + 1;
}
found._dataStart = found._dataEnd;
found._tokensize = count + found._dataEnd - pos;
found.parenthesiscount = 0;
+ found.head = interval.par.substr(found._tokenstart, found._tokensize);
found.disabled = true;
}
}
found._dataEnd = found._dataStart;
found._tokensize = count + found._dataEnd - pos;
found.parenthesiscount = 0;
+ found.head = interval.par.substr(found._tokenstart, found._tokensize);
found.disabled = true;
}
}
found.head = "\\" + key + "{";
}
}
+ found._tokensize = found.head.length();
found._dataStart = found._tokenstart + found.head.length();
if (interval.par.substr(found._dataStart-1, 15).compare("\\endarguments{}") == 0) {
found._dataStart += 15;
// Skip
// makeKey("enskip|smallskip|medskip|bigskip|vfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
// Custom space/skip, remove the content (== length value)
- makeKey("vspace|hspace|mspace", KeyInfo(KeyInfo::noContent, 1, false), isPatternString);
+ makeKey("vspace|vspace*|hspace|hspace*|mspace", KeyInfo(KeyInfo::noContent, 1, false), isPatternString);
// Found in fr/UserGuide.lyx
makeKey("og|fg", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
// quotes
makeKey("textquotedblleft|textquotedblright", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
// Known macros to remove (including their parameter)
// No split
- makeKey("inputencoding|shortcut|label|ref|index|bibitem", KeyInfo(KeyInfo::doRemove, 1, false), isPatternString);
+ makeKey("input|inputencoding|label|ref|index|bibitem", KeyInfo(KeyInfo::doRemove, 1, false), isPatternString);
makeKey("addtocounter|setlength", KeyInfo(KeyInfo::noContent, 2, true), isPatternString);
// handle like standard keys with 1 parameter.
makeKey("url|href|vref|thanks", KeyInfo(KeyInfo::isStandard, 1, false), isPatternString);
// Remove table decorations
makeKey("hline|tabularnewline|toprule|bottomrule|midrule", KeyInfo(KeyInfo::doRemove, 0, true), isPatternString);
// Discard shape-header.
- // For footnote too, because of possible lang settings
+ // For footnote or shortcut too, because of lang settings
// and wrong handling if used 'KeyInfo::noMain'
makeKey("circlepar|diamondpar|heartpar|nutpar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
makeKey("trianglerightpar|hexagonpar|starpar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
makeKey("triangleuppar|triangledownpar|droppar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
makeKey("triangleleftpar|shapepar|dropuppar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
- makeKey("hphantom|footnote|includegraphics", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
+ makeKey("hphantom|vphantom|footnote|shortcut|include|includegraphics", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
+ makeKey("parbox", KeyInfo(KeyInfo::doRemove, 1, true), isPatternString);
// like ('tiny{}' or '\tiny ' ... )
makeKey("footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge", KeyInfo(KeyInfo::isSize, 0, false), isPatternString);
void LatexInfo::processRegion(int start, int region_end)
{
while (start < region_end) { /* Let {[} and {]} survive */
- if ((interval.par[start] == '{') &&
- (interval.par[start+1] != ']') &&
- (interval.par[start+1] != '[')) {
+ int cnt = interval.isOpeningPar(start);
+ if (cnt == 1) {
// Closing is allowed past the region
int closing = interval.findclosing(start+1, interval.par.length());
interval.addIntervall(start, start+1);
interval.addIntervall(closing, closing+1);
}
+ else if (cnt == 3)
+ start += 2;
start = interval.nextNotIgnored(start+1);
}
}
}
case KeyInfo::endArguments:
// Remove trailing '{}' too
- actual._dataStart += 2;
- actual._dataEnd += 2;
- interval.addIntervall(actual._tokenstart, actual._dataEnd);
+ actual._dataStart += 1;
+ actual._dataEnd += 1;
+ interval.addIntervall(actual._tokenstart, actual._dataEnd+1);
nextKeyIdx = getNextKey();
break;
case KeyInfo::noMain:
case KeyInfo::isMain: {
if (interval.par.substr(actual._dataStart, 2) == "% ")
interval.addIntervall(actual._dataStart, actual._dataStart+2);
+ if (actual._tokenstart > 0) {
+ int prev = interval.previousNotIgnored(actual._tokenstart - 1);
+ if ((prev >= 0) && interval.par[prev] == '%')
+ interval.addIntervall(prev, prev+1);
+ }
if (actual.disabled) {
removeHead(actual);
if ((interval.par.substr(actual._dataStart, 3) == " \\[") ||
// interval.resetOpenedP(actual._dataStart-1);
}
else {
- if (actual._tokenstart == 0) {
+ if (actual._tokenstart < 26) {
// for the first (and maybe dummy) language
- interval.setForDefaultLang(actual._tokenstart + actual._tokensize);
+ interval.setForDefaultLang(actual);
}
interval.resetOpenedP(actual._dataStart-1);
}
}
// Remove possible empty data
int dstart = interval.nextNotIgnored(actual._dataStart);
- while ((dstart < output_end) && (interval.par[dstart] == '{')) {
+ while (interval.isOpeningPar(dstart) == 1) {
interval.addIntervall(dstart, dstart+1);
int dend = interval.findclosing(dstart+1, output_end);
interval.addIntervall(dend, dend+1);
firstKey._dataStart = datastart;
firstKey._dataEnd = par.length();
(void) li.setNextKey(nextkeyIdx);
- if (firstKey._tokensize > 0) {
- // Fake the last opened parenthesis
- li.setForDefaultLang(firstKey._tokensize);
- }
+ // Fake the last opened parenthesis
+ li.setForDefaultLang(firstKey);
nextkeyIdx = li.process(os, firstKey);
}
else {
firstKey._dataStart = datastart;
firstKey._dataEnd = nextKey._dataEnd+1;
(void) li.setNextKey(nextkeyIdx);
- if (firstKey._tokensize > 0)
- li.setForDefaultLang(firstKey._tokensize);
+ li.setForDefaultLang(firstKey);
nextkeyIdx = li.process(os, firstKey);
}
else {
// Check if ! empty
if ((firstKey._dataStart < firstKey._dataEnd) &&
(par[firstKey._dataStart] != '}')) {
- if (firstKey._tokensize > 0)
- li.setForDefaultLang(firstKey._tokensize);
+ li.setForDefaultLang(firstKey);
(void) li.process(os, firstKey);
}
s = os.str();
missed = 0;
if (withformat) {
regex_f = identifyFeatures(result);
- string features = "";
+ string features = "";
for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
string a = it->first;
regex_with_format = true;
- features += " " + a;
+ features += " " + a;
// LYXERR0("Identified regex format:" << a);
}
- LYXERR(Debug::FIND, "Identified Features" << features);
+ LYXERR(Debug::FIND, "Identified Features" << features);
}
} else if (regex_with_format) {
++close_wildcards;
}
if (!opt.ignoreformat) {
- // Remove extra '\}' at end
- while ( regex_replace(par_as_string, par_as_string, "(.*)\\\\}$", "$1")) {
- open_braces++;
+ // Remove extra '\}' at end if not part of \{\.\}
+ size_t lng = par_as_string.size();
+ while(lng > 2) {
+ if (par_as_string.substr(lng-2, 2).compare("\\}") == 0) {
+ if (lng >= 6) {
+ if (par_as_string.substr(lng-6,3).compare("\\{\\") == 0)
+ break;
+ }
+ lng -= 2;
+ open_braces++;
+ }
+ else
+ break;
}
+ if (lng < par_as_string.size())
+ par_as_string = par_as_string.substr(0,lng);
/*
// save '\.'
regex_replace(par_as_string, par_as_string, "\\\\\\.", "_xxbdotxx_");
return count;
}
-int MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const
+MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const
{
+ MatchResult mres;
+
if (at_begin &&
(opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) )
- return 0;
+ return mres;
docstring docstr = stringifyFromForSearch(opt, cur, len);
string str = normalize(docstr, true);
if (!opt.ignoreformat) {
str = correctlanguagesetting(str, false, !opt.ignoreformat);
}
- if (str.empty()) return(-1);
+ if (str.empty()) {
+ mres.match_len = -1;
+ return mres;
+ }
LYXERR(Debug::FIND, "Matching against '" << lyx::to_utf8(docstr) << "'");
LYXERR(Debug::FIND, "After normalization: '" << str << "'");
}
sregex_iterator re_it(str.begin(), str.end(), *p_regexp, flags);
if (re_it == sregex_iterator())
- return 0;
+ return mres;
match_results<string::const_iterator> const & m = *re_it;
if (0) { // Kornel Benko: DO NOT CHECKK
// Check braces on the segment that matched the entire regexp expression,
// plus the last subexpression, if a (.*?) was inserted in the constructor.
if (!braces_match(m[0].first, m[0].second, open_braces))
- return 0;
+ return mres;
}
// Check braces on segments that matched all (.*?) subexpressions,
// except the last "padding" one inserted by lyx.
for (size_t i = 1; i < m.size() - 1; ++i)
if (!braces_match(m[i].first, m[i].second, open_braces))
- return 0;
+ return mres;
// Exclude from the returned match length any length
// due to close wildcards added at end of regexp
result -= leadingsize;
else
result = 0;
- return computeSize(str.substr(pos+leadingsize,result), result);
+ mres.match_len = computeSize(str.substr(pos+leadingsize,result), result);
+ mres.match2end = str.size() - pos - leadingsize;
+ mres.pos = pos+leadingsize;
+ return mres;
}
// else !use_regexp: but all code paths above return
if (at_begin) {
LYXERR(Debug::FIND, "size=" << par_as_string.size()
<< ", substr='" << str.substr(0, par_as_string.size()) << "'");
- if (str.substr(0, par_as_string.size()) == par_as_string)
- return par_as_string.size();
+ if (str.substr(0, par_as_string.size()) == par_as_string) {
+ mres.match_len = par_as_string.size();
+ mres.match2end = str.size();
+ mres.pos = 0;
+ return mres;
+ }
} else {
size_t pos = str.find(par_as_string_nolead);
- if (pos != string::npos)
- return par_as_string.size();
+ if (pos != string::npos) {
+ mres.match_len = par_as_string.size();
+ mres.match2end = str.size() - pos;
+ mres.pos = pos;
+ return mres;
+ }
}
- return 0;
+ return mres;
}
-int MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin) const
+MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin) const
{
- int res = findAux(cur, len, at_begin);
+ MatchResult mres = findAux(cur, len, at_begin);
+ int res = mres.match_len;
LYXERR(Debug::FIND,
"res=" << res << ", at_begin=" << at_begin
<< ", matchword=" << opt.matchword
<< ", inTexted=" << cur.inTexted());
if (res == 0 || !at_begin || !opt.matchword || !cur.inTexted())
- return res;
- if ((len > 0) && (res < len))
- return 0;
+ return mres;
+ if ((len > 0) && (res < len)) {
+ mres.match_len = 0;
+ return mres;
+ }
Paragraph const & par = cur.paragraph();
bool ws_left = (cur.pos() > 0)
? par.isWordSeparator(cur.pos() - 1)
if (ws_left && ws_right) {
// Check for word separators inside the found 'word'
for (int i = 0; i < len; i++) {
- if (par.isWordSeparator(cur.pos() + i))
- return 0;
+ if (par.isWordSeparator(cur.pos() + i)) {
+ mres.match_len = 0;
+ return mres;
+ }
}
- return res;
+ return mres;
}
- return 0;
+ mres.match_len = 0;
+ return mres;
}
while ((pos = t.find("\n")) != string::npos) {
if (pos > 1 && t[pos-1] == '\\' && t[pos-2] == '\\' ) {
// Handle '\\\n'
- if (std::isalnum(t[pos+1])) {
+ if (isAlnumASCII(t[pos+1])) {
t.replace(pos-2, 3, " ");
}
else {
t.replace(pos-2, 3, "");
}
}
- else if (!std::isalnum(t[pos+1]) || !std::isalnum(t[pos-1])) {
+ else if (!isAlnumASCII(t[pos+1]) || !isAlnumASCII(t[pos-1])) {
// '\n' adjacent to non-alpha-numerics, discard
t.replace(pos, 1, "");
}
// TODO Try adding a AS_STR_INSERTS as last arg
pos_type end = ( len == -1 || cur.pos() + len > int(par.size()) ) ?
int(par.size()) : cur.pos() + len;
- OutputParams runparams(&cur.buffer()->params().encoding());
+ // OutputParams runparams(&cur.buffer()->params().encoding());
+ OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = true;
- runparams.flavor = OutputParams::LATEX;
- runparams.linelen = 100000; //lyxrc.plaintext_linelen;
+ runparams.flavor = OutputParams::XETEX;
+ runparams.linelen = 10000; //lyxrc.plaintext_linelen;
// No side effect of file copying and image conversion
runparams.dryrun = true;
+ runparams.for_search = true;
LYXERR(Debug::FIND, "Stringifying with cur: "
<< cur << ", from pos: " << cur.pos() << ", end: " << end);
return par.asString(cur.pos(), end,
odocstringstream ods;
otexstream os(ods);
- OutputParams runparams(&buf.params().encoding());
+ //OutputParams runparams(&buf.params().encoding());
+ OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = false;
- runparams.flavor = OutputParams::LATEX;
+ runparams.flavor = OutputParams::XETEX;
runparams.linelen = 8000; //lyxrc.plaintext_linelen;
// No side effect of file copying and image conversion
runparams.dryrun = true;
d = cur.depth();
old_cur = cur;
cur.forwardPos();
- } while (cur && cur.depth() > d && match(cur) > 0);
+ } while (cur && cur.depth() > d && match(cur).match_len > 0);
cur = old_cur;
- int max_match = match(cur); /* match valid only if not searching whole words */
+ int max_match = match(cur).match_len; /* match valid only if not searching whole words */
if (max_match <= 0) return 0;
LYXERR(Debug::FIND, "Ok");
return 0;
if (match.opt.matchword) {
LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
- while (cur.pos() + len <= cur.lastpos() && match(cur, len) <= 0) {
+ while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) {
++len;
LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
}
// Length of matched text (different from len param)
- int old_match = match(cur, len);
+ int old_match = match(cur, len).match_len;
if (old_match < 0)
old_match = 0;
int new_match;
// Greedy behaviour while matching regexps
- while ((new_match = match(cur, len + 1)) > old_match) {
+ while ((new_match = match(cur, len + 1).match_len) > old_match) {
++len;
old_match = new_match;
LYXERR(Debug::FIND, "verifying match with len = " << len);
int maxl = cur.lastpos() - cur.pos();
// Greedy behaviour while matching regexps
while (maxl > minl) {
- int actual_match = match(cur, len);
+ int actual_match = match(cur, len).match_len;
if (actual_match >= max_match) {
// actual_match > max_match _can_ happen,
// if the search area splits
}
if (cur.pos() != old_cur.pos()) {
// OK, forwarded 1 pos in actual inset
- actual_match = match(cur, len-1);
+ actual_match = match(cur, len-1).match_len;
if (actual_match == max_match) {
// Ha, got it! The shorter selection has the same match length
len--;
}
else {
LYXERR0("cur.pos() == old_cur.pos(), this should never happen");
- actual_match = match(cur, len);
+ actual_match = match(cur, len).match_len;
if (actual_match == max_match)
old_cur = cur;
}
return 0;
while (!theApp()->longOperationCancelled() && cur) {
LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
- int match_len = match(cur, -1, false);
+ MatchResult mres = match(cur, -1, false);
+ int match_len = mres.match_len;
LYXERR(Debug::FIND, "match_len: " << match_len);
+ if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) {
+ LYXERR0("BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end);
+ match_len = 0;
+ }
if (match_len > 0) {
+ // Try to find the begin of searched string
+ int increment = mres.pos/2;
+ while (mres.pos > 5 && (increment > 5)) {
+ DocIterator old_cur = cur;
+ for (int i = 0; i < increment && cur; cur.forwardPos(), i++) {
+ }
+ if (! cur || (cur.pit() > old_cur.pit())) {
+ // Are we outside of the paragraph?
+ // This can happen if moving past some UTF8-encoded chars
+ cur = old_cur;
+ increment /= 2;
+ }
+ else {
+ MatchResult mres2 = match(cur, -1, false);
+ if ((mres2.match2end < mres.match2end) ||
+ (mres2.match_len < mres.match_len)) {
+ cur = old_cur;
+ increment /= 2;
+ }
+ else {
+ mres = mres2;
+ increment -= 2;
+ if (increment > mres.pos/2)
+ increment = mres.pos/2;
+ }
+ }
+ }
int match_len_zero_count = 0;
- for (; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) {
+ for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) {
+ if (i++ > 10) {
+ int remaining_len = match(cur, -1, false).match_len;
+ if (remaining_len <= 0) {
+ // Apparently the searched string is not in the remaining part
+ break;
+ }
+ else {
+ i = 0;
+ }
+ }
LYXERR(Debug::FIND, "Advancing cur: " << cur);
- int match_len3 = match(cur, 1);
+ int match_len3 = match(cur, 1).match_len;
if (match_len3 < 0)
continue;
- int match_len2 = match(cur);
+ int match_len2 = match(cur).match_len;
LYXERR(Debug::FIND, "match_len2: " << match_len2);
if (match_len2 > 0) {
// Sometimes in finalize we understand it wasn't a match
LYXERR(Debug::FIND, "findMostBackwards(): cur=" << cur);
DocIterator new_cur = cur;
new_cur.backwardPos();
- if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur))
+ if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len)
break;
int new_len = findAdvFinalize(new_cur, match);
if (new_len == len)
bool pit_changed = false;
do {
cur.pos() = 0;
- bool found_match = match(cur, -1, false);
+ bool found_match = (match(cur, -1, false).match_len > 0);
if (found_match) {
if (pit_changed)
LYXERR(Debug::FIND, "findBackAdv2: cur: " << cur);
DocIterator cur_prev_iter;
do {
- found_match = match(cur);
+ found_match = (match(cur).match_len > 0);
LYXERR(Debug::FIND, "findBackAdv3: found_match="
<< found_match << ", cur: " << cur);
if (found_match)
return;
LASSERT(sel_len > 0, return);
- if (!matchAdv(sel_beg, sel_len))
+ if (!matchAdv(sel_beg, sel_len).match_len)
return;
// Build a copy of the replace buffer, adapted to the KeepCase option
} else if (cur.inMathed()) {
odocstringstream ods;
otexstream os(ods);
- OutputParams runparams(&repl_buffer.params().encoding());
+ // OutputParams runparams(&repl_buffer.params().encoding());
+ OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = false;
- runparams.flavor = OutputParams::LATEX;
+ runparams.flavor = OutputParams::XETEX;
runparams.linelen = 8000; //lyxrc.plaintext_linelen;
runparams.dryrun = true;
TeXOnePar(repl_buffer, repl_buffer.text(), 0, os, runparams);