Fix remainder of #9681 (textcyrillic and textgreek handling).

[lyx.git] / src / lyxfind.cpp
diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp

index 8c11bdbe8e84ab804ae0dee421fa4ff017970798..b3474488280f2d79cee379018bef605f5a0f2f93 100644 (file)
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -162,7 +162,7 @@ IgnoreFormats ignoreFormats;
  
  void setIgnoreFormat(string type, bool value)
  {
-       ignoreFormats.setIgnoreFormat(type, value);
+  ignoreFormats.setIgnoreFormat(type, value);
  }
  
  
@@ -1109,6 +1109,8 @@ class Border {
    int upper;
  };
  
+static vector<Border> borders = vector<Border>(30);
+
  #define MAXOPENED 30
  class Intervall {
    bool isPatternString;
@@ -1122,7 +1124,6 @@ class Intervall {
    int depts[MAXOPENED];
    int closes[MAXOPENED];
    int actualdeptindex;
-  Border borders[2*MAXOPENED];
    int previousNotIgnored(int);
    int nextNotIgnored(int);
    void handleOpenP(int i);
@@ -1173,20 +1174,28 @@ void Intervall::setForDefaultLang(KeyInfo &defLang)
  static void checkDepthIndex(int val)
  {
    static int maxdepthidx = MAXOPENED-2;
+  static int lastmaxdepth = 0;
+  if (val > lastmaxdepth) {
+    LYXERR0("Depth reached " << val);
+    lastmaxdepth = val;
+  }
    if (val > maxdepthidx) {
      maxdepthidx = val;
      LYXERR0("maxdepthidx now " << val);
    }
  }
  
+#if 0
+// Not needed, because borders are now dynamically expanded
  static void checkIgnoreIdx(int val)
  {
-  static int maxignoreidx = 2*MAXOPENED - 4;
-  if (val > maxignoreidx) {
-    maxignoreidx = val;
-    LYXERR0("maxignoreidx now " << val);
+  static int lastmaxignore = -1;
+  if ((lastmaxignore < val) && (size_t(val+1) >= borders.size())) {
+    LYXERR0("IgnoreIdx reached " << val);
+    lastmaxignore = val;
    }
  }
+#endif
  
  /*
   * Expand the region of ignored parts of the input latex string
@@ -1203,9 +1212,14 @@ void Intervall::addIntervall(int low, int upper)
    }
    Border br(low, upper);
    if (idx > ignoreidx) {
-    borders[idx] = br;
+    if (borders.size() <= size_t(idx)) {
+      borders.push_back(br);
+    }
+    else {
+      borders[idx] = br;
+    }
      ignoreidx = idx;
-    checkIgnoreIdx(ignoreidx);
+    // checkIgnoreIdx(ignoreidx);
      return;
    }
    else {
@@ -1213,12 +1227,18 @@ void Intervall::addIntervall(int low, int upper)
      // We know here that br.low > borders[idx-1].upper
      if (br.upper < borders[idx].low) {
        // We have to insert at this pos
-      for (int i = ignoreidx+1; i > idx; --i) {
+      if (size_t(ignoreidx+1) >= borders.size()) {
+        borders.push_back(borders[ignoreidx]);
+      }
+      else {
+        borders[ignoreidx+1] = borders[ignoreidx];
+      }
+      for (int i = ignoreidx; i > idx; --i) {
          borders[i] = borders[i-1];
        }
        borders[idx] = br;
        ignoreidx += 1;
-      checkIgnoreIdx(ignoreidx);
+      // checkIgnoreIdx(ignoreidx);
        return;
      }
      // Here we know, that we are overlapping
@@ -1263,6 +1283,12 @@ static void buildaccent(string n, string param, string values)
        // get the corresponding utf8-value
        if ((values[start] & 0xc0) != 0xc0) {
          // should not happen, utf8 encoding starts at least with 11xxxxxx
+        // but value for '\dot{i}' is 'i', which is ascii
+        if ((values[start] & 0x80) == 0) {
+          // is ascii
+          accents[key] = values.substr(start, 1);
+          // LYXERR0("" << key << "=" << accents[key]);
+        }
          start++;
          continue;
        }
@@ -1272,10 +1298,11 @@ static void buildaccent(string n, string param, string values)
            start = values.size() - 1;
            break;
          }
-        else if ((values[start+j] & 0xc0) == 0xc0) {
+        else if ((values[start+j] & 0xc0) != 0x80) {
            // This is the first byte of following utf8 char
            accents[key] = values.substr(start, j);
            start += j;
+          // LYXERR0("" << key << "=" << accents[key]);
            break;
          }
        }
@@ -1289,21 +1316,22 @@ static void buildAccentsMap()
    accents["i"] = "ı";
    accents["jmath"] = "ȷ";
    accents["lyxmathsym{ß}"] = "ß";
+  accents["text{ß}"] = "ß";
    accents["ddot{\\imath}"] = "ï";
-  buildaccent("ddot", "aAeEiIoOuUyY",
-                      "Ã¤Ã\84Ã«Ã\8bÃ¯Ã\8fÃ¶Ã\96Ã¼Ã\9cÃ¿Å¸");     // umlaut
-  buildaccent("dot|.", "cCeEgGiIzZaAoObBdDfFyY",
-                       "Ä\8bÄ\8aÄ\97Ä\96Ä¡Ä iÄ°Å¼Å»È§È¦È¯È®á¸\83á¸\82á¸\8bá¸\8aá¸\9fá¸\9eáº\8fáº\8e");
+  buildaccent("ddot", "aAeEiIioOuUyY",
+                      "Ã¤Ã\84Ã«Ã\8bÃ¯Ã\8fÃ¯Ã¶Ã\96Ã¼Ã\9cÃ¿Å¸");   // umlaut
+  buildaccent("dot|.", "cCeEGgIizZaAoObBdDfFyY",
+                       "Ä\8bÄ\8aÄ\97Ä\96Ä Ä¡Ä°Ä°Å¼Å»È§È¦È¯È®á¸\83á¸\82á¸\8bá¸\8aá¸\9fá¸\9eáº\8fáº\8e");        // dot{i} can only happen if ignoring case, but there is no lowercase of 'Ä°'
    accents["acute{\\imath}"] = "í";
    buildaccent("acute", "aAcCeElLoOnNrRsSuUyYzZiI",
                         "áÁćĆéÉĺĹóÓńŃŕŔśŚúÚýÝźŹíÍ");
    buildaccent("dacute|H|h", "oOuU", "őŐűŰ");       // double acute
    buildaccent("mathring|r", "aAuUwy",
-                            "åÅůŮẘẙ");  // ring
+                            "åÅůŮẘẙ"); // ring
    accents["check{\\imath}"] = "ǐ";
    accents["check{\\jmath}"] = "ǰ";
-  buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTzZ",
-                         "Ä\8dÄ\8cÄ\8fÄ\8eÇ\8eÇ\8dÄ\9bÄ\9aÇ\90Ç\8fÇ\92Ç\91Ç\94Ç\93Ç§Ç¦Ç©Ç¨È\9fÈ\9eÄ¾Ä½Å\88Å\87Å\99Å\98Å¡Å Å¤Å¾Å½");    // caron
+  buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTtzZ",
+                         "Ä\8dÄ\8cÄ\8fÄ\8eÇ\8eÇ\8dÄ\9bÄ\9aÇ\90Ç\8fÇ\92Ç\91Ç\94Ç\93Ç§Ç¦Ç©Ç¨È\9fÈ\9eÄ¾Ä½Å\88Å\87Å\99Å\98Å¡Å Å¤Å¥Å¾Å½");  // caron
    accents["hat{\\imath}"] = "î";
    accents["hat{\\jmath}"] = "ĵ";
    buildaccent("hat|^", "aAeEiIcCgGhHjJsSwWyYzZoOuU",
@@ -1316,12 +1344,22 @@ static void buildAccentsMap()
                         "ãÃñÑõÕĩĨũŨ");        // tilde
    accents["breve{\\imath}"] = "ĭ";
    buildaccent("breve|u", "aAeEgGiIoOuU",
-                         "ăĂĕĔğĞĭĬŏŎŭŬ");    // breve
+                         "ăĂĕĔğĞĭĬŏŎŭŬ");  // breve
    accents["grave{\\imath}"] = "ì";
    buildaccent("grave|`", "aAeEiIoOuUnNwWyY",
-                         "àÀèÈìÌòÒùÙǹǸẁẀỳỲ");   // grave
+                         "àÀèÈìÌòÒùÙǹǸẁẀỳỲ");      // grave
    buildaccent("subdot|d", "BbDdHhKkLlMmNnRrSsTtVvWwZzAaEeIiOoUuYy",
-                          "ḄḅḌḍḤḥḲḳḶḷṂṃṆṇṚṛṢṣṬṭṾṿẈẉẒẓẠạẸẹỊịỌọỤụỴỵ");  // dot below
+                          "ḄḅḌḍḤḥḲḳḶḷṂṃṆṇṚṛṢṣṬṭṾṿẈẉẒẓẠạẸẹỊịỌọỤụỴỵ");       // dot below
+  buildaccent("ogonek|k", "AaEeIiUuOo",
+                          "ĄąĘęĮįŲųǪǫ");     // ogonek
+  buildaccent("cedilla|c", "CcGgKkLlNnRrSsTtEeDdHh",
+                           "ÇçĢĢĶķĻļŅņŖŗŞşŢţȨȩḐḑḨḩ");        // cedilla
+  buildaccent("subring|textsubring", "Aa",
+                                     "Ḁḁ");        // subring
+  buildaccent("subhat|textsubcircum", "DdEeLlNnTtUu",
+                                      "ḒḓḘḙḼḽṊṋṰṱṶṷ"); // subcircum
+  buildaccent("subtilde|textsubtilde", "EeIiUu",
+                                       "ḚḛḬḭṴṵ");  // subtilde
  }
  
  /*
@@ -1332,14 +1370,14 @@ void Intervall::removeAccents()
  {
    if (accents.empty())
      buildAccentsMap();
-  static regex const accre("\\\\((.|grave|breve|lyxmathsym|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
+  static regex const accre("\\\\(([\\S]|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
    smatch sub;
    for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
      sub = *itacc;
      string key = sub.str(1);
      if (accents.find(key) != accents.end()) {
        string val = accents[key];
-      size_t pos = sub.position(0);
+      size_t pos = sub.position(size_t(0));
        for (size_t i = 0; i < val.size(); i++) {
          par[pos+i] = val[i];
        }
@@ -2632,6 +2670,8 @@ static int identifyClosing(string & t)
         return open_braces;
  }
  
+static int num_replaced = 0;
+static bool previous_single_replace = true;
  
  MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt)
         : p_buf(&buf), p_first_buf(&buf), opt(opt)
@@ -2639,6 +2679,14 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const &
         Buffer & find_buf = *theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true);
         docstring const & ds = stringifySearchBuffer(find_buf, opt);
         use_regexp = lyx::to_utf8(ds).find("\\regexp{") != std::string::npos;
+       if (opt.replace_all && previous_single_replace) {
+               previous_single_replace = false;
+               num_replaced = 0;
+       }
+       else if (!opt.replace_all) {
+               num_replaced = 0;       // count number of replaced strings
+               previous_single_replace = true;
+       }
         // When using regexp, braces are hacked already by escape_for_regex()
         par_as_string = normalize(ds, !use_regexp);
         open_braces = 0;
@@ -2855,7 +2903,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
  
                 // Exclude from the returned match length any length
                 // due to close wildcards added at end of regexp
-               // and also the length of the leading (e.g. '\emph{')
+               // and also the length of the leading (e.g. '\emph{}')
                 //
                 // Whole found string, including the leading: m[0].second - m[0].first
                 // Size of the leading string: m[1].second - m[1].first
@@ -2907,7 +2955,8 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
                         return mres;
                 }
         } else {
-               size_t pos = str.find(par_as_string_nolead);
+               // Start the search _after_ the leading part
+               size_t pos = str.find(par_as_string_nolead, lead_as_string.size());
                 if (pos != string::npos) {
                         mres.match_len = par_as_string.size();
                         mres.match2end = str.size() - pos;
@@ -3431,10 +3480,10 @@ FindAndReplaceOptions::FindAndReplaceOptions(
         docstring const & find_buf_name, bool casesensitive,
         bool matchword, bool forward, bool expandmacros, bool ignoreformat,
         docstring const & repl_buf_name, bool keep_case,
-       SearchScope scope, SearchRestriction restr)
+       SearchScope scope, SearchRestriction restr, bool replace_all)
         : find_buf_name(find_buf_name), casesensitive(casesensitive), matchword(matchword),
           forward(forward), expandmacros(expandmacros), ignoreformat(ignoreformat),
-         repl_buf_name(repl_buf_name), keep_case(keep_case), scope(scope), restr(restr)
+         repl_buf_name(repl_buf_name), keep_case(keep_case), scope(scope), restr(restr), replace_all(replace_all)
  {
  }
  
@@ -3496,29 +3545,29 @@ static void changeFirstCase(Buffer & buffer, TextCase first_case, TextCase other
  } // namespace
  
  ///
-static void findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv)
+static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv)
  {
         Cursor & cur = bv->cursor();
         if (opt.repl_buf_name == docstring()
             || theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true) == 0
             || theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true) == 0)
-               return;
+               return 0;
  
         DocIterator sel_beg = cur.selectionBegin();
         DocIterator sel_end = cur.selectionEnd();
         if (&sel_beg.inset() != &sel_end.inset()
             || sel_beg.pit() != sel_end.pit()
             || sel_beg.idx() != sel_end.idx())
-               return;
+               return 0;
         int sel_len = sel_end.pos() - sel_beg.pos();
         LYXERR(Debug::FIND, "sel_beg: " << sel_beg << ", sel_end: " << sel_end
                << ", sel_len: " << sel_len << endl);
         if (sel_len == 0)
-               return;
-       LASSERT(sel_len > 0, return);
+               return 0;
+       LASSERT(sel_len > 0, return 0);
  
         if (!matchAdv(sel_beg, sel_len).match_len)
-               return;
+               return 0;
  
         // Build a copy of the replace buffer, adapted to the KeepCase option
         Buffer & repl_buffer_orig = *theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true);
@@ -3527,7 +3576,7 @@ static void findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, M
         string lyx = oss.str();
         Buffer repl_buffer("", false);
         repl_buffer.setUnnamed(true);
-       LASSERT(repl_buffer.readString(lyx), return);
+       LASSERT(repl_buffer.readString(lyx), return 0);
         if (opt.keep_case && sel_len >= 2) {
                 LYXERR(Debug::FIND, "keep_case true: cur.pos()=" << cur.pos() << ", sel_len=" << sel_len);
                 if (cur.inTexted()) {
@@ -3580,6 +3629,7 @@ static void findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, M
         LYXERR(Debug::FIND, "After pos adj cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len);
         bv->putSelectionAt(DocIterator(cur), sel_len, !opt.forward);
         bv->processUpdateFlags(Update::Force);
+       return 1;
  }
  
  
@@ -3599,7 +3649,7 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt)
                 int length = bv->cursor().selectionEnd().pos() - bv->cursor().selectionBegin().pos();
                 if (length > 0)
                         bv->putSelectionAt(bv->cursor().selectionBegin(), length, !opt.forward);
-               findAdvReplace(bv, opt, matchAdv);
+               num_replaced += findAdvReplace(bv, opt, matchAdv);
                 cur = bv->cursor();
                 if (opt.forward)
                         match_len = findForwardAdv(cur, matchAdv);
@@ -3612,11 +3662,31 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt)
         }
  
         if (match_len == 0) {
-               bv->message(_("Match not found!"));
+               if (num_replaced > 0) {
+                       switch (num_replaced)
+                       {
+                               case 1:
+                                       bv->message(_("One match has been replaced."));
+                                       break;
+                               case 2:
+                                       bv->message(_("Two matches have been replaced."));
+                                       break;
+                               default:
+                                       bv->message(bformat(_("%1$d matches have been replaced."), num_replaced));
+                                       break;
+                       }
+                       num_replaced = 0;
+               }
+               else {
+                       bv->message(_("Match not found."));
+               }
                 return false;
         }
  
-       bv->message(_("Match found!"));
+       if (num_replaced > 0)
+               bv->message(_("Match has been replaced."));
+       else
+               bv->message(_("Match found."));
  
         LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << match_len);
         bv->putSelectionAt(cur, match_len, !opt.forward);
@@ -3633,6 +3703,7 @@ ostringstream & operator<<(ostringstream & os, FindAndReplaceOptions const & opt
            << opt.forward << ' '
            << opt.expandmacros << ' '
            << opt.ignoreformat << ' '
+          << opt.replace_all << ' '
            << to_utf8(opt.repl_buf_name) << "\nEOSS\n"
            << opt.keep_case << ' '
            << int(opt.scope) << ' '
@@ -3660,7 +3731,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt)
         }
         LYXERR(Debug::FIND, "file_buf_name: '" << s << "'");
         opt.find_buf_name = from_utf8(s);
-       is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat;
+       is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat >> opt.replace_all;
         is.get();       // Waste space before replace string
         s = "";
         getline(is, line);