From d7354a1a093b702836b22a0b828cdc79c25bc7ae Mon Sep 17 00:00:00 2001
From: Kornel Benko <kornel@lyx.org>
Date: Mon, 18 Mar 2019 18:19:44 +0100
Subject: [PATCH] FindAdv: Polishing

1.) Use vector for borders, because any value may be too small
  if there are plenty of accented characters in a paragraph
2.) use '[\S]' instead of '.' in regex for 'accre'. The regex would
  otherwise find also patterns like '\ {some text}'
---
 src/lyxfind.cpp | 80 +++++++++++++++++++++++++++++++------------------
 1 file changed, 51 insertions(+), 29 deletions(-)
diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp
index 655bbd9900..b347448828 100644
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -162,7 +162,7 @@ IgnoreFormats ignoreFormats;
 
 void setIgnoreFormat(string type, bool value)
 {
-	ignoreFormats.setIgnoreFormat(type, value);
+  ignoreFormats.setIgnoreFormat(type, value);
 }
 
 
@@ -1109,6 +1109,8 @@ class Border {
   int upper;
 };
 
+static vector<Border> borders = vector<Border>(30);
+
 #define MAXOPENED 30
 class Intervall {
   bool isPatternString;
@@ -1122,7 +1124,6 @@ class Intervall {
   int depts[MAXOPENED];
   int closes[MAXOPENED];
   int actualdeptindex;
-  Border borders[2*MAXOPENED];
   int previousNotIgnored(int);
   int nextNotIgnored(int);
   void handleOpenP(int i);
@@ -1173,20 +1174,28 @@ void Intervall::setForDefaultLang(KeyInfo &defLang)
 static void checkDepthIndex(int val)
 {
   static int maxdepthidx = MAXOPENED-2;
+  static int lastmaxdepth = 0;
+  if (val > lastmaxdepth) {
+    LYXERR0("Depth reached " << val);
+    lastmaxdepth = val;
+  }
   if (val > maxdepthidx) {
     maxdepthidx = val;
     LYXERR0("maxdepthidx now " << val);
   }
 }
 
+#if 0
+// Not needed, because borders are now dynamically expanded
 static void checkIgnoreIdx(int val)
 {
-  static int maxignoreidx = 2*MAXOPENED - 4;
-  if (val > maxignoreidx) {
-    maxignoreidx = val;
-    LYXERR0("maxignoreidx now " << val);
+  static int lastmaxignore = -1;
+  if ((lastmaxignore < val) && (size_t(val+1) >= borders.size())) {
+    LYXERR0("IgnoreIdx reached " << val);
+    lastmaxignore = val;
   }
 }
+#endif
 
 /*
  * Expand the region of ignored parts of the input latex string
@@ -1203,9 +1212,14 @@ void Intervall::addIntervall(int low, int upper)
   }
   Border br(low, upper);
   if (idx > ignoreidx) {
-    borders[idx] = br;
+    if (borders.size() <= size_t(idx)) {
+      borders.push_back(br);
+    }
+    else {
+      borders[idx] = br;
+    }
     ignoreidx = idx;
-    checkIgnoreIdx(ignoreidx);
+    // checkIgnoreIdx(ignoreidx);
     return;
   }
   else {
@@ -1213,12 +1227,18 @@ void Intervall::addIntervall(int low, int upper)
     // We know here that br.low > borders[idx-1].upper
     if (br.upper < borders[idx].low) {
       // We have to insert at this pos
-      for (int i = ignoreidx+1; i > idx; --i) {
+      if (size_t(ignoreidx+1) >= borders.size()) {
+        borders.push_back(borders[ignoreidx]);
+      }
+      else {
+        borders[ignoreidx+1] = borders[ignoreidx];
+      }
+      for (int i = ignoreidx; i > idx; --i) {
         borders[i] = borders[i-1];
       }
       borders[idx] = br;
       ignoreidx += 1;
-      checkIgnoreIdx(ignoreidx);
+      // checkIgnoreIdx(ignoreidx);
       return;
     }
     // Here we know, that we are overlapping
@@ -1263,13 +1283,14 @@ static void buildaccent(string n, string param, string values)
       // get the corresponding utf8-value
       if ((values[start] & 0xc0) != 0xc0) {
         // should not happen, utf8 encoding starts at least with 11xxxxxx
-	// but value for '\dot{i}' is 'i', which is ascii
-	if ((values[start] & 0x80) == 0) {
-	  // is ascii
-	  accents[key] = values.substr(start, 1);
-	}
-	start++;
-	continue;
+        // but value for '\dot{i}' is 'i', which is ascii
+        if ((values[start] & 0x80) == 0) {
+          // is ascii
+          accents[key] = values.substr(start, 1);
+          // LYXERR0("" << key << "=" << accents[key]);
+        }
+        start++;
+        continue;
       }
       for (int j = 1; ;j++) {
         if (start + j >= values.size()) {
@@ -1281,6 +1302,7 @@ static void buildaccent(string n, string param, string values)
           // This is the first byte of following utf8 char
           accents[key] = values.substr(start, j);
           start += j;
+          // LYXERR0("" << key << "=" << accents[key]);
           break;
         }
       }
@@ -1299,13 +1321,13 @@ static void buildAccentsMap()
   buildaccent("ddot", "aAeEiIioOuUyY",
                       "Ã¤ÃÃ«ÃÃ¯ÃÃ¯Ã¶ÃÃ¼ÃÃ¿Å¸");	// umlaut
   buildaccent("dot|.", "cCeEGgIizZaAoObBdDfFyY",
-                       "ÄÄÄÄÄ Ä¡Ä°Ä°Å¼Å»È§È¦È¯È®á¸á¸á¸á¸á¸á¸áºáº"); // dot{i} can only happen if ignoring case, but there is no lowercase of 'Ä°'
+                       "ÄÄÄÄÄ Ä¡Ä°Ä°Å¼Å»È§È¦È¯È®á¸á¸á¸á¸á¸á¸áºáº");	// dot{i} can only happen if ignoring case, but there is no lowercase of 'Ä°'
   accents["acute{\\imath}"] = "Ã­";
   buildaccent("acute", "aAcCeElLoOnNrRsSuUyYzZiI",
                        "Ã¡ÃÄÄÃ©ÃÄºÄ¹Ã³ÃÅÅÅÅÅÅÃºÃÃ½ÃÅºÅ¹Ã­Ã");
   buildaccent("dacute|H|h", "oOuU", "ÅÅÅ±Å°");	// double acute
   buildaccent("mathring|r", "aAuUwy",
-                            "Ã¥ÃÅ¯Å®áºáº");  // ring
+                            "Ã¥ÃÅ¯Å®áºáº");	// ring
   accents["check{\\imath}"] = "Ç";
   accents["check{\\jmath}"] = "Ç°";
   buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTtzZ",
@@ -1322,22 +1344,22 @@ static void buildAccentsMap()
                        "Ã£ÃÃ±ÃÃµÃÄ©Ä¨Å©Å¨");	// tilde
   accents["breve{\\imath}"] = "Ä­";
   buildaccent("breve|u", "aAeEgGiIoOuU",
-                         "ÄÄÄÄÄÄÄ­Ä¬ÅÅÅ­Å¬");    // breve
+                         "ÄÄÄÄÄÄÄ­Ä¬ÅÅÅ­Å¬");	// breve
   accents["grave{\\imath}"] = "Ã¬";
   buildaccent("grave|`", "aAeEiIoOuUnNwWyY",
-                         "Ã ÃÃ¨ÃÃ¬ÃÃ²ÃÃ¹ÃÇ¹Ç¸áºáºá»³á»²");   // grave
+                         "Ã ÃÃ¨ÃÃ¬ÃÃ²ÃÃ¹ÃÇ¹Ç¸áºáºá»³á»²");	// grave
   buildaccent("subdot|d", "BbDdHhKkLlMmNnRrSsTtVvWwZzAaEeIiOoUuYy",
-                          "á¸á¸á¸á¸á¸¤á¸¥á¸²á¸³á¸¶á¸·á¹á¹á¹á¹á¹á¹á¹¢á¹£á¹¬á¹­á¹¾á¹¿áºáºáºáºáº áº¡áº¸áº¹á»á»á»á»á»¤á»¥á»´á»µ");  // dot below
+                          "á¸á¸á¸á¸á¸¤á¸¥á¸²á¸³á¸¶á¸·á¹á¹á¹á¹á¹á¹á¹¢á¹£á¹¬á¹­á¹¾á¹¿áºáºáºáºáº áº¡áº¸áº¹á»á»á»á»á»¤á»¥á»´á»µ");	// dot below
   buildaccent("ogonek|k", "AaEeIiUuOo",
-                          "ÄÄÄÄÄ®Ä¯Å²Å³ÇªÇ«"); // ogonek
-  buildaccent("cedilla|c", "CcGKkLlNnRrSsTtEeDdHh",
-                           "ÃÃ§Ä¢Ä¶Ä·Ä»Ä¼ÅÅÅÅÅÅÅ¢Å£È¨È©á¸á¸á¸¨á¸©"); // cedilla
+                          "ÄÄÄÄÄ®Ä¯Å²Å³ÇªÇ«");	// ogonek
+  buildaccent("cedilla|c", "CcGgKkLlNnRrSsTtEeDdHh",
+                           "ÃÃ§Ä¢Ä¢Ä¶Ä·Ä»Ä¼ÅÅÅÅÅÅÅ¢Å£È¨È©á¸á¸á¸¨á¸©");	// cedilla
   buildaccent("subring|textsubring", "Aa",
-                                     "á¸á¸"); // subring
+                                     "á¸á¸");	// subring
   buildaccent("subhat|textsubcircum", "DdEeLlNnTtUu",
-                                      "á¸á¸á¸á¸á¸¼á¸½á¹á¹á¹°á¹±á¹¶á¹·"); // subcircum
+                                      "á¸á¸á¸á¸á¸¼á¸½á¹á¹á¹°á¹±á¹¶á¹·");	// subcircum
   buildaccent("subtilde|textsubtilde", "EeIiUu",
-                                       "á¸á¸á¸¬á¸­á¹´á¹µ"); // subtilde
+                                       "á¸á¸á¸¬á¸­á¹´á¹µ");	// subtilde
 }
 
 /*
@@ -1348,7 +1370,7 @@ void Intervall::removeAccents()
 {
   if (accents.empty())
     buildAccentsMap();
-  static regex const accre("\\\\((.|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
+  static regex const accre("\\\\(([\\S]|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
   smatch sub;
   for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
     sub = *itacc;
-- 
2.39.2