buildAccentsMap();
//LYXERR0("correctRegex input '" << t << "'");
+ int skip = 0;
for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) {
sub = *it;
string replace;
replace = "";
backslashed = true;
}
+ else if (withformat && next[0] == '$') {
+ replace = accents["lyxdollar"];
+ skip = 1; // Skip following '$'
+ }
}
}
else if (sub.str(4) == "mathcircumflex")
if (lastpos < (size_t) sub.position(2))
s += std::regex_replace(t.substr(lastpos, sub.position(2) - lastpos), protectedSpace, R"( )");
s += replace;
- lastpos = sub.position(2) + sub.length(2);
+ lastpos = sub.position(2) + sub.length(2) + skip;
+ skip = 0;
}
if (lastpos == 0)
s = std::regex_replace(t, protectedSpace, R"( )");
** constructor as opt.search, under the opt.* options settings.
**
** @param at_begin
- ** If set, then match is searched only against beginning of text starting at cur.
- ** If unset, then match is searched anywhere in text starting at cur.
+ ** If set to MatchStringAdv::MatchFromStart,
+ ** then match is searched only against beginning of text starting at cur.
+ ** Otherwise the match is searched anywhere in text starting at cur.
**
** @return
** The length of the matching text, or zero if no match was found.
**/
- MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const;
+ enum matchType {
+ MatchAnyPlace,
+ MatchFromStart
+ };
+ string matchTypeAsString(matchType const x) const { return (x == MatchFromStart ? "MatchFromStart" : "MatchAnyPlace"); }
+ MatchResult operator()(DocIterator const & cur, int len, matchType at_begin) const;
#if QTSEARCH
bool regexIsValid;
string regexError;
private:
/// Auxiliary find method (does not account for opt.matchword)
- MatchResult findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const;
+ MatchResult findAux(DocIterator const & cur, int len, matchType at_begin) const;
void CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string = "");
/** Normalize a stringified or latexified LyX paragraph.
string par;
int ignoreidx;
static vector<Border> borders;
- int depts[MAXOPENED];
- int closes[MAXOPENED];
+ static vector<int> depts;
+ static vector<int> closes;
int actualdeptindex;
int previousNotIgnored(int) const;
int nextNotIgnored(int) const;
void removeAccents();
void setForDefaultLang(KeyInfo const & defLang) const;
int findclosing(int start, int end, char up, char down, int repeat);
+ void removeInvalidClosings(void);
void handleParentheses(int lastpos, bool closingAllowed);
bool hasTitle;
// Number of disabled language specs up
};
vector<Border> Intervall::borders = vector<Border>(30);
+vector<int> Intervall::depts = vector<int>(30);
+vector<int> Intervall::closes = vector<int>(30);
int Intervall::isOpeningPar(int pos) const
{
}
}
+#if 0
+// Not needed, because dpts and closes are now dynamically expanded
static void checkDepthIndex(int val)
{
static int maxdepthidx = MAXOPENED-2;
LYXERR(Debug::INFO, "maxdepthidx now " << val);
}
}
+#endif
#if 0
// Not needed, because borders are now dynamically expanded
accents["braceright"] = getutf8(0xf0031);
accents["lyxtilde"] = getutf8(0xf0032);
accents["sim"] = getutf8(0xf0032);
+ accents["lyxdollar"] = getutf8(0xf0033);
accents["backslash lyx"] = getutf8(0xf0010); // Used logos inserted with starting \backslash
accents["backslash LyX"] = getutf8(0xf0010);
accents["backslash tex"] = getutf8(0xf0011);
if (accents.empty())
buildAccentsMap();
static regex const accre("\\\\("
- "([\\S]|[A-Za-z]+)\\{[^\\{\\}]+\\}"
+ "([\\S]|[A-Za-z]+)\\{[^\\\\\\{\\}]+\\}"
+ "|([\\S]|[A-Za-z]+)\\{\\\\[ij](math)?\\}"
"|("
"(backslash ([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))"
"|[A-Za-z]+"
void Intervall::handleOpenP(int i)
{
actualdeptindex++;
+ if ((size_t) actualdeptindex >= depts.size()) {
+ depts.resize(actualdeptindex + 30);
+ closes.resize(actualdeptindex + 30);
+ }
depts[actualdeptindex] = i+1;
closes[actualdeptindex] = -1;
- checkDepthIndex(actualdeptindex);
+ // checkDepthIndex(actualdeptindex);
}
void Intervall::handleCloseP(int i, bool closingAllowed)
return end;
}
+void Intervall::removeInvalidClosings(void)
+{
+ // this can happen, if there are deleted parts
+ int skip = 0;
+ int depth = 0;
+ for (unsigned i = 0; i < par.size(); i += 1 + skip) {
+ char c = par[i];
+ skip = 0;
+ if (c == '\\') skip = 1;
+ else if (c == '{')
+ depth++;
+ else if (c == '}') {
+ if (depth == 0) {
+ addIntervall(i, i+1);
+ LYXERR(Debug::FINDVERBOSE, "removed invalid closing '}' at " << i);
+ }
+ else
+ --depth;
+ }
+ }
+}
class MathInfo {
class MathEntry {
public:
static bool removeMathHull = false;
interval_.removeAccents();
+ interval_.removeInvalidClosings();
for (sregex_iterator itmath(interval_.par.begin(), interval_.par.end(), rmath), end; itmath != end; ++itmath) {
submath = *itmath;
if (keysBuilt && !isPatternString) return;
// Keys to ignore in any case
- makeKey("text|textcyrillic|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true);
+ makeKey("text|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true);
+ makeKey("nonumber|notag", KeyInfo(KeyInfo::headRemove, 0, true), true);
// Known standard keys with 1 parameter.
// Split is done, if not at start of region
makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString);
CreateRegexp(opt, "", "", "");
return;
}
- use_regexp = lyx::to_utf8(ds).find("\\regexp{") != std::string::npos;
+ use_regexp = ds.find(from_utf8("\\regexp{")) != std::string::npos;
if (opt.replace_all && previous_single_replace) {
previous_single_replace = false;
num_replaced = 0;
break;
}
if (lng < par_as_string.size())
- par_as_string = par_as_string.substr(0,lng);
+ par_as_string.resize(lng);
}
LYXERR(Debug::FINDVERBOSE, "par_as_string after correctRegex is '" << par_as_string << "'");
if ((lng > 0) && (par_as_string[0] == '^')) {
}
}
-MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const
+MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, MatchStringAdv::matchType at_begin) const
{
MatchResult mres;
mres.searched_size = len;
- if (at_begin &&
- (opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) )
- return mres;
docstring docstr = stringifyFromForSearch(opt, cur, len);
string str;
LASSERT(use_regexp, /**/);
{
// use_regexp always true
- LYXERR(Debug::FINDVERBOSE, "Searching in regexp mode: at_begin=" << at_begin);
+ LYXERR(Debug::FINDVERBOSE, "Searching in regexp mode: at_begin=" << matchTypeAsString(at_begin));
#if QTSEARCH
QString qstr = QString::fromStdString(str);
QRegularExpression const *p_regexp;
QRegularExpression::MatchType flags = QRegularExpression::NormalMatch;
- if (at_begin) {
+ if (at_begin == MatchStringAdv::MatchFromStart) {
p_regexp = ®exp;
} else {
p_regexp = ®exp2;
#else
regex const *p_regexp;
regex_constants::match_flag_type flags;
- if (at_begin) {
+ if (at_begin == MatchStringAdv::MatchFromStart) {
flags = regex_constants::match_continuous;
p_regexp = ®exp;
} else {
}
-MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin) const
+MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, MatchStringAdv::matchType at_begin) const
{
MatchResult mres = findAux(cur, len, at_begin);
int res = mres.match_len;
LYXERR(Debug::FINDVERBOSE,
- "res=" << res << ", at_begin=" << at_begin
+ "res=" << res << ", at_begin=" << matchTypeAsString(at_begin)
<< ", matchAtStart=" << opt.matchAtStart
<< ", inTexted=" << cur.inTexted());
if (opt.matchAtStart) {
}
#endif
-string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const
+#if 1
+static string convertLF2Space(docstring const &s, bool ignore_format)
+{
+ // Using original docstring to handle '\n'
+
+ if (s.size() == 0) return "";
+ stringstream t;
+ size_t pos;
+ size_t start = 0;
+ size_t end = s.size() - 1;
+ if (!ignore_format) {
+ while (s[start] == '\n' && start <= end)
+ start++;
+ while (end >= start && s[end] == '\n')
+ end--;
+ if (start >= end + 1)
+ return "";
+ }
+ do {
+ bool dospace = true;
+ int skip = -1;
+ pos = s.find('\n', start);
+ if (pos >= end) {
+ t << lyx::to_utf8(s.substr(start, end + 1 - start));
+ break;
+ }
+ if (!ignore_format) {
+ if ((pos > start + 1) &&
+ s[pos-1] == '\\' &&
+ s[pos-2] == '\\') {
+ skip = 2;
+ if ((pos > start + 2) &&
+ (s[pos+1] == '~' || isSpace(s[pos+1]) ||
+ s[pos-3] == '~' || isSpace(s[pos-3]))) {
+ // discard "\\\\\n", do not replace with space
+ dospace = false;
+ }
+ }
+ else if (pos > start) {
+ if (s[pos-1] == '%') {
+ skip = 1;
+ while ((pos > start+skip) && (s[pos-1-skip] == '%'))
+ skip++;
+ if ((pos > start+skip) &&
+ (s[pos+1] == '~' || isSpace(s[pos+1]) ||
+ s[pos-1-skip] == '~' || isSpace(s[pos-1-skip]))) {
+ // discard '%%%%%\n'
+ dospace = false;
+ }
+ }
+ else if (!isAlnumASCII(s[pos+1]) || !isAlnumASCII(s[pos-1])) {
+ dospace = false;
+ skip = 0; // remove the '\n' only
+ }
+ }
+ }
+ else {
+ dospace = true;
+ skip = 0;
+ }
+ t << lyx::to_utf8(s.substr(start, pos-skip-start));
+ if (dospace)
+ t << ' ';
+ start = pos+1;
+ } while (start <= end);
+ return(t.str());
+}
+
+#else
+static string convertLF2Space(docstring const & s, bool ignore_format)
{
+ // Using utf8-converted string to handle '\n'
+
string t;
t = lyx::to_utf8(s);
// Remove \n at begin
while ((pos = t.find("\n")) != string::npos) {
if (pos > 1 && t[pos-1] == '\\' && t[pos-2] == '\\' ) {
// Handle '\\\n'
- if (isAlnumASCII(t[pos+1])) {
+ if (isPrintableNonspace(t[pos+1]) && ((pos < 3) || isPrintableNonspace(t[pos-3]))) {
t.replace(pos-2, 3, " ");
}
else {
+ // Already a space there
t.replace(pos-2, 3, "");
}
}
}
}
}
- // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify
- // Kornel: Added textsl, textsf, textit, texttt and noun
- // + allow to seach for colored text too
- LYXERR(Debug::FINDVERBOSE, "Removing stale empty macros from: " << t);
- while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", ""))
- LYXERR(Debug::FINDVERBOSE, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
- while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
- LYXERR(Debug::FINDVERBOSE, " further removing stale empty \\section{}, \\part{}, \\paragraph{} macros from: " << t);
- while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", ""));
+ return(t);
- return t;
}
+#endif
+string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const
+{
+ string t = convertLF2Space(s, ignore_format);
+
+ // The following replaces are not appropriate in non-format-search mode
+ if (!ignore_format) {
+ // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify
+ // Kornel: Added textsl, textsf, textit, texttt and noun
+ // + allow to seach for colored text too
+ LYXERR(Debug::FINDVERBOSE, "Removing stale empty macros from: " << t);
+ while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", ""))
+ LYXERR(Debug::FINDVERBOSE, " further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
+ while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
+ LYXERR(Debug::FINDVERBOSE, " further removing stale empty \\section{}, \\part{}, \\paragraph{} macros from: " << t);
+ while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", ""));
+ }
+ return t;
+}
docstring stringifyFromCursor(DocIterator const & cur, int len)
{
// either one sees "http://www.bla.bla" or nothing
// so the search for "www" gives prefix_len = 7 (== sizeof("http://")
// and although we search for only 3 chars, we find the whole hyperlink inset
- bool at_begin = (expected.match_prefix == 0);
+ MatchStringAdv::matchType at_begin = (expected.match_prefix == 0) ? MatchStringAdv::MatchFromStart : MatchStringAdv::MatchAnyPlace;
if (!match.opt.forward && match.opt.ignoreformat) {
if (expected.pos > 0)
return fail;
}
- LASSERT(at_begin, /**/);
- if (expected.match_len > 0 && at_begin) {
+ LASSERT(at_begin == MatchStringAdv::MatchFromStart, /**/);
+ if (expected.match_len > 0 && at_begin == MatchStringAdv::MatchFromStart) {
// Search for deepest match
old_cur = cur;
max_match = expected;
}
else {
// (expected.match_len <= 0)
- mres = match(cur); /* match valid only if not searching whole words */
+ mres = match(cur, -1, MatchStringAdv::MatchFromStart); /* match valid only if not searching whole words */
displayMres(mres, "Start with negative match", cur);
max_match = mres;
}
- if (max_match.match_len <= 0) return fail;
+ // Only now we are really at_begin
+ if ((max_match.match_len <= 0) ||
+ (match.opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()))
+ return fail;
LYXERR(Debug::FINDVERBOSE, "Ok");
// Compute the match length
while (!theApp()->longOperationCancelled() && cur) {
//(void) findAdvForwardInnermost(cur);
LYXERR(Debug::FINDVERBOSE, "findForwardAdv() cur: " << cur);
- MatchResult mres = match(cur, -1, false);
+ MatchResult mres = match(cur, -1, MatchStringAdv::MatchAnyPlace);
string msg = "Starting";
if (repeat)
msg = "Repeated";
continue;
}
cur.pos() = cur.pos() + increment;
- MatchResult mres2 = match(cur, -1, false);
+ MatchResult mres2 = match(cur, -1, MatchStringAdv::MatchAnyPlace);
displayMres(mres2, "findForwardAdv loop", cur)
switch (interpretMatch(mres, mres2)) {
case MatchResult::newIsTooFar:
LYXERR(Debug::FINDVERBOSE, "findMostBackwards(): cur=" << cur);
DocIterator new_cur = cur;
new_cur.backwardPos();
- if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len)
+ if (new_cur == cur || &new_cur.inset() != &inset
+ || match(new_cur, -1, MatchStringAdv::MatchFromStart).match_len <= 0)
break;
MatchResult new_mr = findAdvFinalize(new_cur, match, expected);
if (new_mr.match_len == mr.match_len)
bool pit_changed = false;
do {
cur.pos() = 0;
- MatchResult found_match = match(cur, -1, false);
+ MatchResult found_match = match(cur, -1, MatchStringAdv::MatchAnyPlace);
if (found_match.match_len > 0) {
if (pit_changed)
LYXERR(Debug::FINDVERBOSE, "findBackAdv2: cur: " << cur);
DocIterator cur_prev_iter;
do {
- found_match = match(cur);
+ found_match = match(cur, -1, MatchStringAdv::MatchFromStart);
LYXERR(Debug::FINDVERBOSE, "findBackAdv3: found_match="
<< (found_match.match_len > 0) << ", cur: " << cur);
if (found_match.match_len > 0) {
return 0;
LASSERT(sel_len > 0, return 0);
- if (!matchAdv(sel_beg, sel_len).match_len)
+ if (matchAdv(sel_beg, sel_len, MatchStringAdv::MatchFromStart).match_len <= 0)
return 0;
// Build a copy of the replace buffer, adapted to the KeepCase option