X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Flyxfind.cpp;h=c8fc40947e75643cb281589fc41b62b30508858c;hb=8f43f83ddfbe6adbb04a2ba86e62797c0a313324;hp=a4dd810e35d95ee48d9875ea28c219a803e4cea3;hpb=769df229dc2a95c0511f3e5510c772c02376c7f3;p=lyx.git

diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp
index a4dd810e35..c8fc40947e 100644
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -14,6 +14,7 @@
  */
 
 #include <config.h>
+#include <iterator>
 
 #include "lyxfind.h"
 
@@ -88,6 +89,8 @@ class IgnoreFormats {
 	///
 	bool getShape() const { return ignoreShape_; }
 	///
+	bool getSize() const { return ignoreSize_; }
+	///
 	bool getUnderline() const { return ignoreUnderline_; }
 	///
 	bool getMarkUp() const { return ignoreMarkUp_; }
@@ -118,6 +121,8 @@ private:
 	///
 	bool ignoreShape_ = false;
 	///
+	bool ignoreSize_ = true;
+	///
 	bool ignoreUnderline_ = false;
 	///
 	bool ignoreMarkUp_ = false;
@@ -166,6 +171,9 @@ void IgnoreFormats::setIgnoreFormat(string const & type, bool value, bool fromUs
 	else if (type == "shape") {
 		ignoreShape_ = value;
 	}
+	else if (type == "size") {
+		ignoreSize_ = value;
+	}
 	else if (type == "family") {
 		ignoreFamily_ = value;
 	}
@@ -805,10 +813,48 @@ namespace {
 
 typedef vector<pair<string, string> > Escapes;
 
+static string getRegexSpaceCount(int count)
+{
+	if (count > 0) {
+		if (count > 1)
+			return "\\s{" + std::to_string(count) + "}";
+		else
+			return "\\s";
+	}
+	return "";
+}
+
 string string2regex(string in)
 {
-	static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\$\\])" };
-	string temp = std::regex_replace(in, specialChars,  R"(\$&)" );
+	static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\$\\])" };
+	string tempx = std::regex_replace(in, specialChars,  R"(\$&)" );
+	// Special handling for ' '
+	string temp("");
+	int blanks = 0;
+	for (unsigned i = 0; i < tempx.size(); i++) {
+		if (tempx[i] == ' ' || tempx[i] == '~' ) {
+			// normal blanks
+			blanks++;
+		}
+		else if ((tempx[i] == '\302' && tempx[i+1] == '\240')
+			|| (tempx[i] == '\342' && tempx[i+1] == '\200')) {
+			// protected space
+			// thin space
+			blanks++;
+			i++;
+		}
+		else {
+			if (blanks > 0) {
+				temp += getRegexSpaceCount(blanks);
+			}
+			temp += tempx[i];
+			blanks = 0;
+		}
+	}
+	if (blanks > 0) {
+		temp += getRegexSpaceCount(blanks);
+	}
+
 	string temp2("");
 	size_t lastpos = 0;
 	size_t fl_pos = 0;
@@ -838,7 +884,8 @@ string correctRegex(string t, bool withformat)
 	 * and \{, \}, \[, \] => {, }, [, ]
 	 */
 	string s("");
-	regex wordre("(\\\\)*(\\\\(([A-Za-z]+|[\\{\\}%])( |\\{\\})?|[\\[\\]\\{\\}]))");
+	static std::regex wordre("(\\\\)*(\\\\(( |[A-Za-z]+|[\\{\\}%])( |\\{\\})?|[\\[\\]\\{\\}]))");
+	static std::regex protectedSpace { R"(~)" };
 	size_t lastpos = 0;
 	smatch sub;
 	bool backslashed = false;
@@ -846,6 +893,7 @@ string correctRegex(string t, bool withformat)
 		buildAccentsMap();
 
 	//LYXERR0("correctRegex input '" << t << "'");
+	int skip = 0;
 	for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) {
 		sub = *it;
 		string replace;
@@ -858,10 +906,14 @@ string correctRegex(string t, bool withformat)
 				{
 					// transforms '\backslash \{' into '\{'
 					string next = t.substr(sub.position(2) + sub.str(2).length(), 2);
-					if ((next == "\\{") || (next == "\\}")) {
+					if ((next == "\\{") || (next == "\\}") || (next == "\\ ")) {
 						replace = "";
 						backslashed = true;
 					}
+					else if (withformat && next[0] == '$') {
+						replace = accents["lyxdollar"];
+						skip = 1;	// Skip following '$'
+					}
 				}
 			}
 			else if (sub.str(4) == "mathcircumflex")
@@ -873,6 +925,8 @@ string correctRegex(string t, bool withformat)
 						replace = accents["braceleft"];
 					else if (sub.str(3) == "}")
 						replace = accents["braceright"];
+					else if (sub.str(3) == " ")
+						replace = "\\ ";
 					else {
 						// else part should not exist
 						LASSERT(0, /**/);
@@ -883,6 +937,8 @@ string correctRegex(string t, bool withformat)
 						replace = "\\{";
 					else if (sub.str(3) == "}")
 						replace = "\\}";
+					else if (sub.str(3) == " ")
+						replace = "\\ ";
 					else {
 						// else part should not exist
 						LASSERT(0, /**/);
@@ -895,6 +951,8 @@ string correctRegex(string t, bool withformat)
 				replace = "}";
 			else if (sub.str(4) == "%")
 				replace = "%";
+			else if (sub.str(4) == " ")
+				replace = " ";
 			else {
 				AccentsIterator it_ac = accents.find(sub.str(4));
 				if (it_ac == accents.end()) {
@@ -906,14 +964,15 @@ string correctRegex(string t, bool withformat)
 			}
 		}
 		if (lastpos < (size_t) sub.position(2))
-			s += t.substr(lastpos, sub.position(2) - lastpos);
+			s += std::regex_replace(t.substr(lastpos, sub.position(2) - lastpos), protectedSpace, R"( )");
 		s += replace;
-		lastpos = sub.position(2) + sub.length(2);
+		lastpos = sub.position(2) + sub.length(2) + skip;
+		skip = 0;
 	}
 	if (lastpos == 0)
-		s = t;
+		s = std::regex_replace(t, protectedSpace, R"( )");
 	else if (lastpos < t.length())
-		s += t.substr(lastpos, t.length() - lastpos);
+		s += std::regex_replace(t.substr(lastpos, t.length() - lastpos), protectedSpace, R"( )");
 	// Handle quotes in regex
 	// substitute all 'â', 'â', 'Â»', 'Â«' with '"'
 	// and all 'â', 'â', 'âº', 'â¹' with "\'"
@@ -1008,13 +1067,19 @@ public:
 	 ** constructor as opt.search, under the opt.* options settings.
 	 **
 	 ** @param at_begin
-	 ** 	If set, then match is searched only against beginning of text starting at cur.
-	 ** 	If unset, then match is searched anywhere in text starting at cur.
+	 ** 	If set to MatchStringAdv::MatchFromStart,
+	 ** 	  then match is searched only against beginning of text starting at cur.
+	 ** 	Otherwise the match is searched anywhere in text starting at cur.
 	 **
 	 ** @return
 	 ** The length of the matching text, or zero if no match was found.
 	 **/
-	MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const;
+	enum matchType {
+		MatchAnyPlace,
+		MatchFromStart
+	};
+	string matchTypeAsString(matchType const x) const { return (x == MatchFromStart ? "MatchFromStart" : "MatchAnyPlace"); }
+	MatchResult operator()(DocIterator const & cur, int len, matchType at_begin) const;
 #if QTSEARCH
 	bool regexIsValid;
 	string regexError;
@@ -1030,7 +1095,7 @@ public:
 
 private:
 	/// Auxiliary find method (does not account for opt.matchword)
-	MatchResult findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const;
+	MatchResult findAux(DocIterator const & cur, int len, matchType at_begin) const;
 	void CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string = "");
 
 	/** Normalize a stringified or latexified LyX paragraph.
@@ -1046,7 +1111,7 @@ private:
 	 ** @todo Normalization should also expand macros, if the corresponding
 	 ** search option was checked.
 	 **/
-	string normalize(docstring const & s, bool ignore_fomat) const;
+	string convertLF2Space(docstring const & s, bool ignore_fomat) const;
 	// normalized string to search
 	string par_as_string;
 	// regular expression to use for searching
@@ -1091,7 +1156,7 @@ void MatchStringAdv::FillResults(MatchResult &found_mr)
 static void setFindParams(OutputParams &runparams)
 {
 	runparams.flavor = Flavor::XeTeX;
-	runparams.use_polyglossia = true;
+	//runparams.use_polyglossia = true;
 	runparams.linelen = 10000; //lyxrc.plaintext_linelen;
 	// No side effect of file copying and image conversion
 	runparams.dryrun = true;
@@ -1106,11 +1171,11 @@ static docstring buffer_to_latex(Buffer & buffer)
 	runparams.nice = true;
 	setFindParams(runparams);
 	if (ignoreFormats.getDeleted())
-		runparams.for_search = OutputParams::SearchWithoutDeleted;
+		runparams.find_set_feature(OutputParams::SearchWithoutDeleted);
 	else
-		runparams.for_search = OutputParams::SearchWithDeleted;
+		runparams.find_set_feature(OutputParams::SearchWithDeleted);
 	if (ignoreFormats.getNonContent()) {
-		runparams.for_search |= OutputParams::SearchNonOutput;
+		runparams.find_add_feature(OutputParams::SearchNonOutput);
 	}
 	pit_type const endpit = buffer.paragraphs().size();
 	for (pit_type pit = 0; pit != endpit; ++pit) {
@@ -1120,7 +1185,7 @@ static docstring buffer_to_latex(Buffer & buffer)
 	return ods.str();
 }
 
-static string latexNamesToUtf8(docstring strIn)
+static string latexNamesToUtf8(docstring strIn, bool withformat)
 {
 	string addtmp = to_utf8(strIn);
 	static regex const rmAcc("(\\\\)*("
@@ -1162,6 +1227,10 @@ static string latexNamesToUtf8(docstring strIn)
 		add = addtmp;
 	else if (addtmp.length() > lastpos)
 		add += addtmp.substr(lastpos, addtmp.length() - lastpos);
+	if (!withformat) {
+		static std::regex repltilde { R"(~)" };
+		add = std::regex_replace(add, repltilde, accents["lyxtilde"]);
+	}
 	LYXERR(Debug::FINDVERBOSE, "Adding to search string: '"
 			<< add << "'");
 	return add;
@@ -1180,20 +1249,20 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co
 		int option = AS_STR_INSETS |AS_STR_PLAINTEXT;
 		if (ignoreFormats.getDeleted()) {
 			option |= AS_STR_SKIPDELETE;
-			runparams.for_search = OutputParams::SearchWithoutDeleted;
+			runparams.find_set_feature(OutputParams::SearchWithoutDeleted);
 		}
 		else {
-			runparams.for_search = OutputParams::SearchWithDeleted;
+			runparams.find_set_feature(OutputParams::SearchWithDeleted);
 		}
 		if (ignoreFormats.getNonContent()) {
-			runparams.for_search |= OutputParams::SearchNonOutput;
+			runparams.find_add_feature(OutputParams::SearchNonOutput);
 		}
 		string t("");
 		for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) {
 			Paragraph const & par = buffer.paragraphs().at(pit);
 			string add = latexNamesToUtf8(par.asString(pos_type(0), par.size(),
 								option,
-								&runparams));
+								&runparams), !opt.ignoreformat);
 			LYXERR(Debug::FINDVERBOSE, "Adding to search string: '"
 				<< add << "'");
 			t += add;
@@ -1226,7 +1295,7 @@ static size_t identifyLeading(string const & s)
 	       || regex_replace(t, t, "^\\\\begin\\{[a-zA-Z_]*\\*?\\}", ""))
 	       ;
 	LYXERR(Debug::FINDVERBOSE, "  after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'");
-	return s.find(t);
+	return s.size() - t.size();
 }
 
 /*
@@ -1366,8 +1435,8 @@ public:
 	string par;
 	int ignoreidx;
 	static vector<Border> borders;
-	int depts[MAXOPENED];
-	int closes[MAXOPENED];
+	static vector<int> depts;
+	static vector<int> closes;
 	int actualdeptindex;
 	int previousNotIgnored(int) const;
 	int nextNotIgnored(int) const;
@@ -1379,6 +1448,7 @@ public:
 	void removeAccents();
 	void setForDefaultLang(KeyInfo const & defLang) const;
 	int findclosing(int start, int end, char up, char down, int repeat);
+	void removeInvalidClosings(void);
 	void handleParentheses(int lastpos, bool closingAllowed);
 	bool hasTitle;
 	// Number of disabled language specs up
@@ -1391,6 +1461,8 @@ public:
 };
 
 vector<Border> Intervall::borders = vector<Border>(30);
+vector<int> Intervall::depts = vector<int>(30);
+vector<int> Intervall::closes = vector<int>(30);
 
 int Intervall::isOpeningPar(int pos) const
 {
@@ -1424,6 +1496,8 @@ void Intervall::setForDefaultLang(KeyInfo const & defLang) const
 	}
 }
 
+#if 0
+// Not needed, because dpts and closes are now dynamically expanded
 static void checkDepthIndex(int val)
 {
 	static int maxdepthidx = MAXOPENED-2;
@@ -1437,6 +1511,7 @@ static void checkDepthIndex(int val)
 		LYXERR(Debug::INFO, "maxdepthidx now " << val);
 	}
 }
+#endif
 
 #if 0
 // Not needed, because borders are now dynamically expanded
@@ -1888,7 +1963,6 @@ static void buildAccentsMap()
 	accents["cdot"] = "Â·";
 	accents["textasciicircum"] = "^";
 	accents["mathcircumflex"] = "^";
-	accents["sim"] = "~";
 	accents["guillemotright"] = "Â»";
 	accents["guillemotleft"] = "Â«";
 	accents["hairspace"]     = getutf8(0xf0000);	// select from free unicode plane 15
@@ -1909,6 +1983,9 @@ static void buildAccentsMap()
 	accents["lyxarrow"]      = getutf8(0xf0020);
 	accents["braceleft"]     = getutf8(0xf0030);
 	accents["braceright"]    = getutf8(0xf0031);
+	accents["lyxtilde"]      = getutf8(0xf0032);
+	accents["sim"]           = getutf8(0xf0032);
+	accents["lyxdollar"]     = getutf8(0xf0033);
 	accents["backslash lyx"]           = getutf8(0xf0010);	// Used logos inserted with starting \backslash
 	accents["backslash LyX"]           = getutf8(0xf0010);
 	accents["backslash tex"]           = getutf8(0xf0011);
@@ -1984,7 +2061,8 @@ void Intervall::removeAccents()
 	if (accents.empty())
 		buildAccentsMap();
 	static regex const accre("\\\\("
-				 "([\\S]|[A-Za-z]+)\\{[^\\{\\}]+\\}"
+				 "([\\S]|[A-Za-z]+)\\{[^\\\\\\{\\}]+\\}"
+				 "|([\\S]|[A-Za-z]+)\\{\\\\[ij](math)?\\}"
 				 "|("
 				 "(backslash ([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))"
 				 "|[A-Za-z]+"
@@ -2020,9 +2098,13 @@ void Intervall::removeAccents()
 void Intervall::handleOpenP(int i)
 {
 	actualdeptindex++;
+	if ((size_t) actualdeptindex >= depts.size()) {
+		depts.resize(actualdeptindex + 30);
+		closes.resize(actualdeptindex + 30);
+	}
 	depts[actualdeptindex] = i+1;
 	closes[actualdeptindex] = -1;
-	checkDepthIndex(actualdeptindex);
+	// checkDepthIndex(actualdeptindex);
 }
 
 void Intervall::handleCloseP(int i, bool closingAllowed)
@@ -2184,6 +2266,27 @@ int Intervall::findclosing(int start, int end, char up = '{', char down = '}', i
 	return end;
 }
 
+void Intervall::removeInvalidClosings(void)
+{
+	// this can happen, if there are deleted parts
+	int skip = 0;
+	int depth = 0;
+	for (unsigned i = 0; i < par.size(); i += 1 + skip) {
+		char c = par[i];
+		skip = 0;
+		if (c == '\\') skip = 1;
+		else if (c == '{')
+			depth++;
+		else if (c == '}') {
+			if (depth == 0) {
+				addIntervall(i, i+1);
+				LYXERR(Debug::FINDVERBOSE, "removed invalid closing '}' at " << i);
+			}
+			else
+				--depth;
+		}
+	}
+}
 class MathInfo {
 	class MathEntry {
 	public:
@@ -2271,6 +2374,7 @@ void LatexInfo::buildEntries(bool isPatternString)
 	static bool removeMathHull = false;
 
 	interval_.removeAccents();
+	interval_.removeInvalidClosings();
 
 	for (sregex_iterator itmath(interval_.par.begin(), interval_.par.end(), rmath), end; itmath != end; ++itmath) {
 		submath = *itmath;
@@ -2320,7 +2424,7 @@ void LatexInfo::buildEntries(bool isPatternString)
 	}
 	// Ignore language if there is math somewhere in pattern-string
 	if (isPatternString) {
-		for (auto s: usedText) {
+		for (auto const & s: usedText) {
 			// Remove entries created in previous search runs
 			keys.erase(s);
 		}
@@ -2676,7 +2780,8 @@ void LatexInfo::buildKeys(bool isPatternString)
 	if (keysBuilt && !isPatternString) return;
 
 	// Keys to ignore in any case
-	makeKey("text|textcyrillic|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true);
+	makeKey("text|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true);
+	makeKey("nonumber|notag", KeyInfo(KeyInfo::headRemove, 0, true), true);
 	// Known standard keys with 1 parameter.
 	// Split is done, if not at start of region
 	makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString);
@@ -2729,9 +2834,14 @@ void LatexInfo::buildKeys(bool isPatternString)
 	// handle like standard keys with 1 parameter.
 	makeKey("url|href|vref|thanks", KeyInfo(KeyInfo::isStandard, 1, false), isPatternString);
 
-	// Ignore deleted text
-	makeKey("lyxdeleted", KeyInfo(KeyInfo::doRemove, 3, false), isPatternString);
-	// but preserve added text
+	if (ignoreFormats.getDeleted()) {
+		// Ignore deleted text
+		makeKey("lyxdeleted", KeyInfo(KeyInfo::doRemove, 3, false), isPatternString);
+	}
+	else {
+		// but preserve added text
+		makeKey("lyxdeleted", KeyInfo(KeyInfo::doRemove, 2, false), isPatternString);
+	}
 	makeKey("lyxadded", KeyInfo(KeyInfo::doRemove, 2, false), isPatternString);
 
 	// Macros to remove, but let the parameter survive
@@ -2757,7 +2867,7 @@ void LatexInfo::buildKeys(bool isPatternString)
 	makeKey("textgreek|textcyrillic", KeyInfo(KeyInfo::isStandard, 1, true), false);
 	makeKey("parbox", KeyInfo(KeyInfo::doRemove, 1, true), isPatternString);
 	// like ('tiny{}' or '\tiny ' ... )
-	makeKey("footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge", KeyInfo(KeyInfo::isSize, 0, false), isPatternString);
+	makeKey("footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge", KeyInfo(KeyInfo::isSize, 0, ignoreFormats.getSize()), isPatternString);
 
 	// Survives, like known character
 	// makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
@@ -2814,6 +2924,8 @@ string Intervall::show(int lastpos)
 	int idx = 0;                          /* int intervalls */
 	string s;
 	int i = 0;
+	if ((unsigned) lastpos > par.size())
+		lastpos = par.size();
 	for (idx = 0; idx <= ignoreidx; idx++) {
 		while (i < lastpos) {
 			int printsize;
@@ -3360,10 +3472,12 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with
 	while ((parlen > 0) && (par[parlen-1] == '\n')) {
 		parlen--;
 	}
+#if 0
 	if (isPatternString && (parlen > 0) && (par[parlen-1] == '~')) {
 		// Happens to be there in case of description or labeling environment
 		parlen--;
 	}
+#endif
 	string result;
 	if (withformat) {
 		// Split the latex input into pieces which
@@ -3545,7 +3659,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
 		CreateRegexp(opt, "", "", "");
 		return;
 	}
-	use_regexp = lyx::to_utf8(ds).find("\\regexp{") != std::string::npos;
+	use_regexp = ds.find(from_utf8("\\regexp{")) != std::string::npos;
 	if (opt.replace_all && previous_single_replace) {
 		previous_single_replace = false;
 		num_replaced = 0;
@@ -3555,7 +3669,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
 		previous_single_replace = true;
 	}
 	// When using regexp, braces are hacked already by escape_for_regex()
-	par_as_string = normalize(ds, opt.ignoreformat);
+	par_as_string = convertLF2Space(ds, opt.ignoreformat);
 	open_braces = 0;
 	close_wildcards = 0;
 
@@ -3634,7 +3748,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
 					break;
 			}
 			if (lng < par_as_string.size())
-				par_as_string = par_as_string.substr(0,lng);
+				par_as_string.resize(lng);
 		}
 		LYXERR(Debug::FINDVERBOSE, "par_as_string after correctRegex is '" << par_as_string << "'");
 		if ((lng > 0) && (par_as_string[0] == '^')) {
@@ -3661,7 +3775,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
 			}
 			if (opt.matchword) {
 				modifyRegexForMatchWord(par_as_string);
-				opt.matchword = false;
+				// opt.matchword = false;
 			}
 			regexp_str = "(" + lead_as_regexp + ")()" + par_as_string;
 			regexp2_str = "(" + lead_as_regexp + ")(.*?)" + par_as_string;
@@ -3672,18 +3786,15 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
 	}
 }
 
-MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const
+MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, MatchStringAdv::matchType at_begin) const
 {
 	MatchResult mres;
 
 	mres.searched_size = len;
-	if (at_begin &&
-			(opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) )
-		return mres;
 
 	docstring docstr = stringifyFromForSearch(opt, cur, len);
 	string str;
-	str = normalize(docstr, opt.ignoreformat);
+	str = convertLF2Space(docstr, opt.ignoreformat);
 	if (!opt.ignoreformat) {
 		str = correctlanguagesetting(str, false, !opt.ignoreformat);
 		// remove closing '}' and '\n' to allow for use of '$' in regex
@@ -3693,22 +3804,25 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
 		if (lng != str.size()) {
 			str = str.substr(0, lng);
 		}
+		// Replace occurences of '~' to ' '
+		static std::regex specialChars { R"(~)" };
+		str = std::regex_replace(str, specialChars,  R"( )" );
 	}
 	if (str.empty()) {
 		mres.match_len = -1;
 		return mres;
 	}
-	LYXERR(Debug::FINDVERBOSE, "After normalization: Matching against:\n'" << str << "'");
+	LYXERR(Debug::FINDVERBOSE|Debug::FIND, "After normalization: Matching against:\n'" << str << "'");
 
 	LASSERT(use_regexp, /**/);
 	{
 		// use_regexp always true
-		LYXERR(Debug::FINDVERBOSE, "Searching in regexp mode: at_begin=" << at_begin);
+		LYXERR(Debug::FINDVERBOSE, "Searching in regexp mode: at_begin=" << matchTypeAsString(at_begin));
 #if QTSEARCH
 		QString qstr = QString::fromStdString(str);
 		QRegularExpression const *p_regexp;
 		QRegularExpression::MatchType flags = QRegularExpression::NormalMatch;
-		if (at_begin) {
+		if (at_begin == MatchStringAdv::MatchFromStart) {
 			p_regexp = &regexp;
 		} else {
 			p_regexp = &regexp2;
@@ -3719,7 +3833,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
 #else
 		regex const *p_regexp;
 		regex_constants::match_flag_type flags;
-		if (at_begin) {
+		if (at_begin == MatchStringAdv::MatchFromStart) {
 			flags = regex_constants::match_continuous;
 			p_regexp = &regexp;
 		} else {
@@ -3758,7 +3872,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
 		int matchend = match.capturedEnd(0);
 		size_t strsize = qstr.size();
 		if (!opt.ignoreformat) {
-			while (mres.match_len > 0) {
+			while (mres.match_len > 1) {
 				QChar c = qstr.at(matchend - 1);
 				if ((c == '\n') || (c == '}') || (c == '{')) {
 					mres.match_len--;
@@ -3787,7 +3901,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
 		int matchend = strend;
 		size_t strsize = str.size();
 		if (!opt.ignoreformat) {
-			while (mres.match_len > 0) {
+			while (mres.match_len > 1) {
 				char c = str.at(matchend - 1);
 				if ((c == '\n') || (c == '}') || (c == '{')) {
 					mres.match_len--;
@@ -3833,23 +3947,20 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
 }
 
 
-MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin) const
+MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, MatchStringAdv::matchType at_begin) const
 {
 	MatchResult mres = findAux(cur, len, at_begin);
-	int res = mres.match_len;
 	LYXERR(Debug::FINDVERBOSE,
-	       "res=" << res << ", at_begin=" << at_begin
+	       "res=" << mres.match_len << ", at_begin=" << matchTypeAsString(at_begin)
 	       << ", matchAtStart=" << opt.matchAtStart
 	       << ", inTexted=" << cur.inTexted());
-	if (opt.matchAtStart) {
-		if (cur.pos() != 0)
-			mres.match_len = 0;
-		else if (mres.match_prefix > 0)
-			mres.match_len = 0;
-		return mres;
+	if (mres.match_len > 0) {
+		if (opt.matchAtStart) {
+			if (cur.pos() > 0 || mres.match_prefix > 0)
+				mres.match_len = 0;
+		}
 	}
-	else
-		return mres;
+	return mres;
 }
 
 #if 0
@@ -3877,63 +3988,73 @@ static bool simple_replace(string &t, string from, string to)
 }
 #endif
 
-string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const
+string MatchStringAdv::convertLF2Space(docstring const &s, bool ignore_format) const
 {
-	string t;
-	t = lyx::to_utf8(s);
-	// Remove \n at begin
-	while (!t.empty() && t[0] == '\n')
-		t = t.substr(1);
-	// Remove \n* at end
-	while (!t.empty() && t[t.size() - 1] == '\n') {
-		t = t.substr(0, t.size() - 1);
-	}
+	// Using original docstring to handle '\n'
+
+	if (s.size() == 0) return "";
+	stringstream t;
 	size_t pos;
-	// Handle all other '\n'
-	while ((pos = t.find("\n")) != string::npos) {
-		if (pos > 1 && t[pos-1] == '\\' && t[pos-2] == '\\' ) {
-			// Handle '\\\n'
-			if (isAlnumASCII(t[pos+1])) {
-				t.replace(pos-2, 3, " ");
-			}
-			else {
-				t.replace(pos-2, 3, "");
-			}
+	size_t start = 0;
+	size_t end = s.size() - 1;
+	if (!ignore_format) {
+		while (s[start] == '\n' && start <= end)
+			start++;
+		while (end >= start && s[end] == '\n')
+			end--;
+		if (start >= end + 1)
+			return "";
+	}
+	do {
+		bool dospace = true;
+		int skip = -1;
+		pos = s.find('\n', start);
+		if (pos >= end) {
+			t << lyx::to_utf8(s.substr(start, end + 1 - start));
+			break;
 		}
-		else {
-			if (!isAlnumASCII(t[pos+1]) || !isAlnumASCII(t[pos-1])) {
-				// '\n' adjacent to non-alpha-numerics, discard
-				t.replace(pos, 1, "");
-			}
-			else {
-				// Replace all other \n with spaces
-				t.replace(pos, 1, " ");
+		if (!ignore_format) {
+			if ((pos > start + 1) &&
+			     s[pos-1] == '\\' &&
+			     s[pos-2] == '\\') {
+				skip = 2;
+				if ((pos > start + 2) &&
+				    (s[pos+1] == '~' || isSpace(s[pos+1]) ||
+				     s[pos-3] == '~' || isSpace(s[pos-3]))) {
+					// discard "\\\\\n", do not replace with space
+					dospace = false;
+				}
 			}
-			if (!ignore_format) {
-				size_t count = 0;
-				while ((pos > count + 1) && (t[pos - 1 -count] == '%')) {
-					count++;
+			else if (pos > start) {
+				if (s[pos-1] == '%') {
+					skip = 1;
+					while ((pos > start+skip) && (s[pos-1-skip] == '%'))
+						skip++;
+					if ((pos > start+skip) &&
+					    (s[pos+1] == '~' || isSpace(s[pos+1]) ||
+					     s[pos-1-skip] == '~' || isSpace(s[pos-1-skip]))) {
+						// discard '%%%%%\n'
+						dospace = false;
+					}
 				}
-				if (count > 0) {
-					t.replace(pos - count, count, "");
+				else if (!isAlnumASCII(s[pos+1]) || !isAlnumASCII(s[pos-1])) {
+					dospace = false;
+					skip = 0;	// remove the '\n' only
 				}
 			}
 		}
-	}
-	// Remove stale empty \emph{}, \textbf{} and similar blocks from latexify
-	// Kornel: Added textsl, textsf, textit, texttt and noun
-	// + allow to seach for colored text too
-	LYXERR(Debug::FINDVERBOSE, "Removing stale empty macros from: " << t);
-	while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", ""))
-		LYXERR(Debug::FINDVERBOSE, "  further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
-	while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
-		LYXERR(Debug::FINDVERBOSE, "  further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
-	while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", ""));
-
-	return t;
+		else {
+			dospace = true;
+			skip = 0;
+		}
+		t << lyx::to_utf8(s.substr(start, pos-skip-start));
+		if (dospace)
+			t << ' ';
+		start = pos+1;
+	} while (start <= end);
+	return(t.str());
 }
 
-
 docstring stringifyFromCursor(DocIterator const & cur, int len)
 {
 	LYXERR(Debug::FINDVERBOSE, "Stringifying with len=" << len << " from cursor at pos: " << cur);
@@ -3950,20 +4071,20 @@ docstring stringifyFromCursor(DocIterator const & cur, int len)
 		int option = AS_STR_INSETS | AS_STR_PLAINTEXT;
 		if (ignoreFormats.getDeleted()) {
 			option |= AS_STR_SKIPDELETE;
-			runparams.for_search = OutputParams::SearchWithoutDeleted;
+			runparams.find_set_feature(OutputParams::SearchWithoutDeleted);
 		}
 		else {
-			runparams.for_search = OutputParams::SearchWithDeleted;
+			runparams.find_set_feature(OutputParams::SearchWithDeleted);
 		}
 		if (ignoreFormats.getNonContent()) {
-			runparams.for_search |= OutputParams::SearchNonOutput;
+			runparams.find_add_feature(OutputParams::SearchNonOutput);
 		}
 		LYXERR(Debug::FINDVERBOSE, "Stringifying with cur: "
 		       << cur << ", from pos: " << cur.pos() << ", end: " << end);
 		docstring res = from_utf8(latexNamesToUtf8(par.asString(cur.pos(), end,
 								        option,
-								        &runparams)));
-		LYXERR(Debug::FIND, "Stringified text from pos(" << cur.pos() << ") len(" << len << "): " << res);
+								        &runparams), false));
+		LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Stringified text from pos(" << cur.pos() << ") len(" << len << "): " << res);
 		return res;
 	} else if (cur.inMathed()) {
 		CursorSlice cs = cur.top();
@@ -3975,7 +4096,7 @@ docstring stringifyFromCursor(DocIterator const & cur, int len)
 		MathData md2;
 		for (MathData::const_iterator it = md.begin() + cs.pos(); it != it_end; ++it)
 			md2.push_back(*it);
-		docstring res = from_utf8(latexNamesToUtf8(asString(md2)));
+		docstring res = from_utf8(latexNamesToUtf8(asString(md2), false));
 		LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Stringified math from pos(" << cur.pos() << ") len(" << len << "): " << res);
 		return res;
 	}
@@ -4003,13 +4124,13 @@ docstring latexifyFromCursor(DocIterator const & cur, int len)
 	runparams.nice = false;
 	setFindParams(runparams);
 	if (ignoreFormats.getDeleted()) {
-		runparams.for_search = OutputParams::SearchWithoutDeleted;
+		runparams.find_set_feature(OutputParams::SearchWithoutDeleted);
 	}
 	else {
-		runparams.for_search = OutputParams::SearchWithDeleted;
+		runparams.find_set_feature(OutputParams::SearchWithDeleted);
 	}
 	if (ignoreFormats.getNonContent()) {
-		runparams.for_search |= OutputParams::SearchNonOutput;
+		runparams.find_add_feature(OutputParams::SearchNonOutput);
 	}
 
 	if (cur.inTexted()) {
@@ -4019,7 +4140,7 @@ docstring latexifyFromCursor(DocIterator const & cur, int len)
 			endpos = cur.pos() + len;
 		TeXOnePar(buf, *cur.innerText(), cur.pit(), os, runparams,
 			  string(), cur.pos(), endpos, true);
-		LYXERR(Debug::FIND, "Latexified text from pos(" << cur.pos() << ") len(" << len << "): " << ods.str());
+		LYXERR(Debug::FINDVERBOSE|Debug::FIND, "Latexified text from pos(" << cur.pos() << ") len(" << len << "): " << ods.str());
 		return(ods.str());
 	} else if (cur.inMathed()) {
 		// Retrieve the math environment type, and add '$' or '$[' or others (\begin{equation}) accordingly
@@ -4114,13 +4235,13 @@ MatchResult findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Mat
 	// either one sees "http://www.bla.bla" or nothing
 	// so the search for "www" gives prefix_len = 7 (== sizeof("http://")
 	// and although we search for only 3 chars, we find the whole hyperlink inset
-	bool at_begin = (expected.match_prefix == 0);
+	MatchStringAdv::matchType at_begin = (expected.match_prefix == 0) ? MatchStringAdv::MatchFromStart : MatchStringAdv::MatchAnyPlace;
 	if (!match.opt.forward && match.opt.ignoreformat) {
 		if (expected.pos > 0)
 			return fail;
 	}
-	LASSERT(at_begin, /**/);
-	if (expected.match_len > 0 && at_begin) {
+	LASSERT(at_begin == MatchStringAdv::MatchFromStart, /**/);
+	if (expected.match_len > 0 && at_begin == MatchStringAdv::MatchFromStart) {
 		// Search for deepest match
 		old_cur = cur;
 		max_match = expected;
@@ -4152,11 +4273,14 @@ MatchResult findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, Mat
 	}
 	else {
 		// (expected.match_len <= 0)
-		mres = match(cur);      /* match valid only if not searching whole words */
+		mres = match(cur, -1, MatchStringAdv::MatchFromStart);      /* match valid only if not searching whole words */
 		displayMres(mres, "Start with negative match", cur);
 		max_match = mres;
 	}
-	if (max_match.match_len <= 0) return fail;
+	// Only now we are really at_begin
+	if ((max_match.match_len <= 0) ||
+	    (match.opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()))
+		return fail;
 	LYXERR(Debug::FINDVERBOSE, "Ok");
 
 	// Compute the match length
@@ -4254,7 +4378,7 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
 	while (!theApp()->longOperationCancelled() && cur) {
 		//(void) findAdvForwardInnermost(cur);
 		LYXERR(Debug::FINDVERBOSE, "findForwardAdv() cur: " << cur);
-		MatchResult mres = match(cur, -1, false);
+		MatchResult mres = match(cur, -1, MatchStringAdv::MatchAnyPlace);
 		string msg = "Starting";
 		if (repeat)
 			msg = "Repeated";
@@ -4292,7 +4416,7 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
 					continue;
 				}
 				cur.pos() = cur.pos() + increment;
-				MatchResult mres2 = match(cur, -1, false);
+				MatchResult mres2 = match(cur, -1, MatchStringAdv::MatchAnyPlace);
 				displayMres(mres2, "findForwardAdv loop", cur)
 						switch (interpretMatch(mres, mres2)) {
 					case MatchResult::newIsTooFar:
@@ -4315,7 +4439,7 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
 					default:
 						// Todo@
 						// Handle not like MatchResult::newIsTooFar
-						LYXERR0( "Probably too far: Increment = " << increment << " match_prefix = " << mres.match_prefix);
+						LYXERR(Debug::FINDVERBOSE, "Probably too far: Increment = " << increment << " match_prefix = " << mres.match_prefix);
 						firstInvalid--;
 						increment = increment*3/4;
 						cur = old_cur;
@@ -4368,7 +4492,8 @@ MatchResult findMostBackwards(DocIterator & cur, MatchStringAdv const & match, M
 		LYXERR(Debug::FINDVERBOSE, "findMostBackwards(): cur=" << cur);
 		DocIterator new_cur = cur;
 		new_cur.backwardPos();
-		if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len)
+		if (new_cur == cur || &new_cur.inset() != &inset
+		    || match(new_cur, -1, MatchStringAdv::MatchFromStart).match_len <= 0)
 			break;
 		MatchResult new_mr = findAdvFinalize(new_cur, match, expected);
 		if (new_mr.match_len == mr.match_len)
@@ -4394,7 +4519,7 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match)
 	bool pit_changed = false;
 	do {
 		cur.pos() = 0;
-		MatchResult found_match = match(cur, -1, false);
+		MatchResult found_match = match(cur, -1, MatchStringAdv::MatchAnyPlace);
 
 		if (found_match.match_len > 0) {
 			if (pit_changed)
@@ -4404,7 +4529,7 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match)
 			LYXERR(Debug::FINDVERBOSE, "findBackAdv2: cur: " << cur);
 			DocIterator cur_prev_iter;
 			do {
-				found_match = match(cur);
+				found_match = match(cur, -1, MatchStringAdv::MatchFromStart);
 				LYXERR(Debug::FINDVERBOSE, "findBackAdv3: found_match="
 				       << (found_match.match_len > 0) << ", cur: " << cur);
 				if (found_match.match_len > 0) {
@@ -4566,7 +4691,7 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma
 		return 0;
 	LASSERT(sel_len > 0, return 0);
 
-	if (!matchAdv(sel_beg, sel_len).match_len)
+	if (matchAdv(sel_beg, sel_len, MatchStringAdv::MatchFromStart).match_len <= 0)
 		return 0;
 
 	// Build a copy of the replace buffer, adapted to the KeepCase option
@@ -4634,6 +4759,10 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma
 	return 1;
 }
 
+static bool isWordChar(char_type c)
+{
+	return isLetterChar(c) || isNumberChar(c);
+}
 
 /// Perform a FindAdv operation.
 bool findAdv(BufferView * bv, FindAndReplaceOptions & opt)
@@ -4659,8 +4788,52 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions & opt)
 			bv->putSelectionAt(bv->cursor().selectionBegin(), length, !opt.forward);
 		num_replaced += findAdvReplace(bv, opt, matchAdv);
 		cur = bv->cursor();
-		if (opt.forward)
+		if (opt.forward) {
+			if (opt.matchword && cur.pos() > 0) {  // Skip word-characters if we are in the mid of a word
+				if (cur.inTexted()) {
+					Paragraph const & par = cur.paragraph();
+					int len_limit, new_pos;
+					if (cur.lastpos() < par.size())
+						len_limit = cur.lastpos();
+					else
+						len_limit = par.size();
+					for (new_pos = cur.pos() - 1; new_pos < len_limit; new_pos++) {
+						if (!isWordChar(par.getChar(new_pos)))
+							break;
+					}
+					if (new_pos > cur.pos())
+						cur.pos() = new_pos;
+				}
+				else if (cur.inMathed()) {
+					// Check if 'cur.pos()-1' and 'cur.pos()' both point to a letter,
+					// I am not sure, we should consider the selection
+					bool sel = bv->cursor().selection();
+					if (!sel && cur.pos() < cur.lastpos()) {
+						CursorSlice const & cs = cur.top();
+						MathData md = cs.cell();
+						int len = -1;
+						MathData::const_iterator it_end = md.end();
+						MathData md2;
+						// Start the check with one character before actual cursor position
+						for (MathData::const_iterator it = md.begin() + cs.pos() - 1;
+						    it != it_end; ++it)
+							md2.push_back(*it);
+						docstring inp = asString(md2);
+						for (len = 0; (unsigned) len < inp.size() && len + cur.pos() <= cur.lastpos(); len++) {
+							if (!isWordChar(inp[len]))
+								break;
+						}
+						// len == 0 means previous char was a word separator
+						// len == 1       search starts with a word separator
+						// len == 2 ...   we have to skip len -1 chars
+						if (len > 1)
+							cur.pos() = cur.pos() + len - 1;
+					}
+				}
+				opt.matchword = false;
+			}
 			pos_len = findForwardAdv(cur, matchAdv);
+		}
 		else
 			pos_len = findBackwardsAdv(cur, matchAdv);
 	} catch (exception & ex) {