From 8b21b2f8fbf71e09f0806f5a84eb7ba4501e14fc Mon Sep 17 00:00:00 2001
From: Kornel Benko <kornel@lyx.org>
Date: Sun, 14 Oct 2018 20:39:13 +0200
Subject: [PATCH] Amend(2) 7a03fa6: Advanced search with format:

Further tweeking.
---
 src/lyxfind.cpp | 235 ++++++++++++++++++++++++++++++------------------
 1 file changed, 147 insertions(+), 88 deletions(-)

diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp
index e2ca8cdd1e..baec6ab5c3 100644
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -979,6 +979,12 @@ static string removefontinfo(string par)
 	return(par);
 }
 
+class emptyResult {
+ public:
+  bool isEmpty;
+  int lastPosition;
+ emptyResult(bool empty, int pos) : isEmpty(empty), lastPosition(pos) {};
+};
 
 class LangInfo {
   public:
@@ -1034,9 +1040,8 @@ class LangInfo {
     void output(ostringstream &os, int);
     void addIntervall(int upper);
     void addIntervall(int low, int upper); /* if explicit */
-    void handleParentheses(int lastpos);
-    string show(int lastpos);
-    bool discardParethesizedInBlock(int start);
+    void handleParentheses(int lastpos, bool closingAllowed);
+    int discardParethesizedInBlock(int start);
   private:
     string par;
     string _search;
@@ -1054,18 +1059,20 @@ class LangInfo {
     int ignoreidx;
     bool regexPossible;
     void adaptIgnoringParts(bool useOld = false);
-    int nextNotIgored(int start);
+    int nextNotIgnored(int start);
+    int previousNotIgnored(int start);
     bool discarSuperfluousParentheses(int start);
+    emptyResult checkEmpty(int start, bool atStart);
 };
 
 void LangInfo::setDataEnd(int dataend)
 {
   if (dataend < _tokenend) {
     _dataEnd = _tokenend;
-    // cout << "Wrong data start, too low\n";
+    LYXERR(Debug::FIND, "Wrong data start, too low");
   }
   else if (size_t(dataend) > par.length()) {
-    // cout << "Wrong data start, too high\n";
+    LYXERR(Debug::FIND, "Wrong data start, too high");
     _dataEnd = par.length();
   }
   else
@@ -1112,17 +1119,17 @@ void LangInfo::setDataStart(int datastart)
   bool reUse = true;                    /* Reuse previous ignoring intervalls */
   if (datastart < _tokenend) {
     _dataStart = _tokenend;
-    // cout << "Wrong data start, too low\n";
+    LYXERR(Debug::FIND, "Wrong data start, too low");
     reUse = false;
   }
   else if (size_t(datastart) > par.length()) {
-    // cout << "Wrong data start, too high\n";
+    LYXERR(Debug::FIND, "Wrong data start, too high");
     _dataStart = par.length();
     reUse = false;
   }
   else
     _dataStart = datastart;
-  //cout << "found entry at " << _tokenstart << "\n";
+  LYXERR(Debug::FIND, "found entry at " << _tokenstart);
   actualdeptindex = 1;                  /* == Number of open brases */
   depts[0] = _dataStart;
   closes[0] = -1;
@@ -1134,7 +1141,7 @@ void LangInfo::setDataStart(int datastart)
  * Keep the list of actual opened parentheses actual
  * (e.g. depth == 4 means there are 4 '{' not processed yet)
  */
-void LangInfo::handleParentheses(int lastpos)
+void LangInfo::handleParentheses(int lastpos, bool closingAllowed)
 {
   int skip = 0;
   for (int i = depts[actualdeptindex]; i < lastpos; i+= 1 + skip) {
@@ -1149,7 +1156,13 @@ void LangInfo::handleParentheses(int lastpos)
     }
     else if (c == '}') {
       if (actualdeptindex <= 0) {
-        LYXERR(Debug::FIND, "ERROR ERROR ERROR"); /* should never happen! */
+        if (closingAllowed) {
+          // if we are at the very end
+          addIntervall(i, i+1);
+        }
+        else {
+          LYXERR(Debug::FIND, "Bad closing parenthesis in latex");  /* should never happen! */
+        }
       }
       else {
         closes[actualdeptindex] = i+1;
@@ -1196,35 +1209,25 @@ void LangInfo::addIntervall(int upper)
   if (actualdeptindex >= 0)
     low = depts[actualdeptindex];   /*  the position of last unclosed '{' */
   else {
-    LYXERR(Debug::FIND, "ERROR ERROR ERROR2");
+    LYXERR(Debug::FIND, "Error while checking the position of last open parenthesis");
     low = upper;
   }
   addIntervall(low, upper);
 }
 
-string LangInfo::show(int lastpos)
+int LangInfo::previousNotIgnored(int start)
 {
-  ostringstream os;
-
-  os << par.substr(_tokenstart, _tokenend - _tokenstart);
-  int idx = 0;
-  for (int i = _dataStart; i < lastpos;) {
-    if (i <= ignoreIntervalls[idx][0]) {
-      os << par.substr(i, ignoreIntervalls[idx][0] - i);
-      i = ignoreIntervalls[idx][1];
-    }
-    idx++;
-    if (idx > ignoreidx) {
-      os << par.substr(i, lastpos-i);
-      break;
+    int idx = 0;                          /* int intervalls */
+    for (idx = ignoreidx; idx >= 0; --idx) {
+      if (start > ignoreIntervalls[idx][1])
+        return(start);
+      if (start >= ignoreIntervalls[idx][0])
+        start = ignoreIntervalls[idx][0]-1;
     }
-  }
-  for (int i = actualdeptindex; i > 0; --i)
-    os << "}";
-  return os.str();
+    return start;
 }
 
-int LangInfo::nextNotIgored(int start)
+int LangInfo::nextNotIgnored(int start)
 {
     int idx = 0;                          /* int intervalls */
     for (idx = 0; idx <= ignoreidx; idx++) {
@@ -1260,8 +1263,8 @@ void LangInfo::output(ostringstream &os, int lastpos)
     for (int i = _dataStart; i < lastpos;) {
       if (i <= ignoreIntervalls[idx][0]) {
         os << par.substr(i, ignoreIntervalls[idx][0] - i);
-        handleParentheses(ignoreIntervalls[idx][0]);
         i = ignoreIntervalls[idx][1];
+        handleParentheses(ignoreIntervalls[idx][1], false);
       }
       idx++;
       if (idx > ignoreidx) {
@@ -1271,17 +1274,17 @@ void LangInfo::output(ostringstream &os, int lastpos)
         break;
       }
     }
+    handleParentheses(lastpos, false);
     for (int i = actualdeptindex; i > 0; --i)
       os << "}";
   }
-  handleParentheses(lastpos);
+  handleParentheses(lastpos, true);     /* extra closings '}' allowed here */
 }
 
 bool LangInfo::nextInfo()
 {
   int start = _tokenstart;
 
-  // cout << par << "\n";
   if (valid == Invalid)
     _dataEnd = _tokenstart;
   else if (valid == LastValid)
@@ -1337,14 +1340,106 @@ bool LangInfo::firstInfo(string search1, int datastart)
   return nextInfo();
 }
 
-bool LangInfo::discardParethesizedInBlock(int start)
+/*
+ * Return 0 if nothing found
+ * >0 size of found a known macro
+ * <0 -size of emmty unknow macro
+ */
+static int checkMacro(string checked)
+{
+  static regex anymacro("(\\\\([a-z]+)(\\{\\})+).*", regex_constants::ECMAScript);
+  static regex known("(backslash)$", regex_constants::ECMAScript);
+  cmatch cm;
+
+  if (regex_match(checked.c_str(), cm, anymacro)) {
+    string found2 = cm[2];
+    if (regex_match(found2, known)) {
+      return cm[1].second - cm[1].first;
+    }
+    else {
+      return cm[1].first - cm[1].second;
+    }
+  }
+  else
+    return 0;
+}
+
+emptyResult LangInfo::checkEmpty(int start, bool atStartOrigin)
 {
-  int depth = 0;
-  int skip = 0;
-  bool isempty = true;
+  emptyResult Result(true, start);
+
+  bool atStart = atStartOrigin;
+  while (start < _dataEnd) {
+    if (par[start] == '{') {
+      emptyResult inside = checkEmpty(start+1, atStart);
+      if (inside.isEmpty) {
+        if (atStart)
+          addIntervall(start, inside.lastPosition+1);
+        else
+          addIntervall(start+1,inside.lastPosition);
+      }
+      else {
+        // non empty parenthesis
+        if (atStart) {
+          addIntervall(start, start+1);
+          addIntervall(inside.lastPosition, inside.lastPosition+1);
+        }
+      }
+      Result.isEmpty &= inside.isEmpty;
+      start = inside.lastPosition+1;
+    }
+    else if (par[start] == '}') {
+      Result.lastPosition = start;
+      return(Result);
+    }
+    else if (par[start] == '\\') {
+      int check = checkMacro(par.substr(start, 20));
+      if (check > 0) {
+        // Known char,
+        start += check;
+        Result.isEmpty = false;
+        atStart = false;
+      }
+      else if (check == 0) {
+        // skip next escaped
+        // or it is \regexp{.*\endregexp{}} which counts as 1 char!
+        if (regexPossible && (par.compare(start, 8, "\\regexp{") == 0)) {
+          size_t endreg = par.find("\\endregexp{}}");
+          if (endreg > size_t(_dataEnd) - 13)
+            start = _dataEnd;
+          else
+            start = endreg + 12;
+        }
+        else
+          start += 2;
+        Result.isEmpty = false;
+        atStart = false;
+      }
+      else {
+        // Here follows maybe empty macro?
+        // discard e.g. '\noun{}', or '\noun{}{}'
+        addIntervall(start, start - check);
+        start = start - check;
+        atStart = atStartOrigin;
+      }
+    }
+    else {
+      // Normal chars
+      Result.isEmpty = false;
+      if (par[start] != ' ')
+        atStart = false;
+      else
+        atStart = atStartOrigin;
+      start += 1;
+    }
+  }
+  return Result;
+}
 
-  size_t regex_start, regex_end;
+int LangInfo::discardParethesizedInBlock(int start)
+{
   if (regexPossible) {
+    size_t regex_start, regex_end;
     regex_start = par.find("\\regexp{", start);
     if (regex_start == string::npos)
       regexPossible = false;
@@ -1358,52 +1453,18 @@ bool LangInfo::discardParethesizedInBlock(int start)
         regexPossible = false;
     }
   }
-  if (!regexPossible) {
-    regex_start = _dataEnd;
-    regex_end = _dataEnd;
-  }
-  for (int i = start; i < _dataEnd; i += 1+skip) {
-    char c = par[i];
-    skip = 0;
-    if (c == '\\') {
-      if (size_t(i) == regex_start) {
-        // 12 is correct, even if the length of "\\endregexp{}}" is 13
-        skip = regex_end + 12 - i;
-      }
-      else
-        skip = 1;
-      isempty = false;
-    }
-    else if (c == '{') {
-      if (depth == 0) {
-        addIntervall(i, i+1);
-        // cout << "discard '{' at " << i << "\n";
-      }
-      else
-        isempty = false;
-      depth++;
-    }
-    else if (c == '}') {
-      if (depth == 1) {
-        addIntervall(i, i+1);
-        // cout << "discard '}' at " << i << "\n";
-      }
-      else if (depth < 1)
-        break;
-      depth--;
-    }
-    else
-      isempty = false;
-  }
-  return(isempty);
+  int previous = previousNotIgnored(start-1);
+  bool atStart =  (par[previous] == '{');
+  emptyResult inside = checkEmpty(start, atStart);
+  return inside.lastPosition+1;
 }
 
 bool LangInfo::discarSuperfluousParentheses(int start)
 {
-  start = nextNotIgored(start);
+  start = nextNotIgnored(start);
+  start = discardParethesizedInBlock(start);
   while ((par[start] == '{') && (start < _dataEnd)) {
-    discardParethesizedInBlock(start);
-    start = nextNotIgored(start+1);
+    start = discardParethesizedInBlock(start);
   }
   // It is empty if (par[start] == '}')
   return ((start >= _dataEnd) || (par[start] == '}'));
@@ -1439,7 +1500,7 @@ void LangInfo::process(ostringstream &os)
     start = color.getEnd()+1;
   else {
     // Apparently nothing output so far
-    start = _dataStart;
+    start = nextNotIgnored(_dataStart);
   }
   discarSuperfluousParentheses(start);
   output(os, _dataEnd);
@@ -1462,12 +1523,6 @@ string splitForColors(string par) {
       firstLanguage.setDataEnd(par.length());
       // discard old closing
       firstLanguage.addIntervall(oldend, oldend+1);
-      for (int i = 1; i < firstLanguage.getEnd(); i++) {
-        if (par[i] == '{')
-          firstLanguage.discardParethesizedInBlock(i);
-        else
-          break;
-      }
     }
     firstLanguage.process(os);
     // For the case, that the first language ends unexpected
@@ -1728,6 +1783,10 @@ int MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) con
 
 	docstring docstr = stringifyFromForSearch(opt, cur, len);
 	string str = normalize(docstr, true);
+	if (!opt.ignoreformat) {
+		str = removefontinfo(str);
+		str = correctlanguagesetting(str, false, false);
+	}
 	if (str.empty()) return(-1);
 	LYXERR(Debug::FIND, "Matching against     '" << lyx::to_utf8(docstr) << "'");
 	LYXERR(Debug::FIND, "After normalization: '" << str << "'");
@@ -1944,7 +2003,7 @@ docstring latexifyFromCursor(DocIterator const & cur, int len)
 			endpos = cur.pos() + len;
 		TeXOnePar(buf, *cur.innerText(), cur.pit(), os, runparams,
 			  string(), cur.pos(), endpos);
-		string s = correctlanguagesetting(lyx::to_utf8(ods.str()), false, false);
+		string s = lyx::to_utf8(ods.str());
 		LYXERR(Debug::FIND, "Latexified +modified text: '" << s << "'");
 		return(lyx::from_utf8(s));
 	} else if (cur.inMathed()) {
-- 
2.39.5