FindAdv: Expand the list of handled chars for ogonek

[lyx.git] / src / lyxfind.cpp
diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp

index 29873aea7386b5d5aeff8341928200ffa1b1fe23..4ba16b95d95fdc0f06589e81db8d7c63148d1cf5 100644 (file)
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -32,6 +32,7 @@
  #include "ParIterator.h"
  #include "TexRow.h"
  #include "Text.h"
+#include "Encoding.h"
  
  #include "frontends/Application.h"
  #include "frontends/alert.h"
@@ -52,6 +53,7 @@
  #include "support/lstrings.h"
  
  #include "support/regex.h"
+#include "support/textutils.h"
  #include <map>
  
  using namespace std;
@@ -909,11 +911,12 @@ private:
  
  static docstring buffer_to_latex(Buffer & buffer)
  {
-       OutputParams runparams(&buffer.params().encoding());
+       //OutputParams runparams(&buffer.params().encoding());
+       OutputParams runparams(encodings.fromLyXName("utf8"));
         odocstringstream ods;
         otexstream os(ods);
         runparams.nice = true;
-       runparams.flavor = OutputParams::LATEX;
+       runparams.flavor = OutputParams::XETEX;
         runparams.linelen = 10000; //lyxrc.plaintext_linelen;
         // No side effect of file copying and image conversion
         runparams.dryrun = true;
@@ -933,9 +936,10 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co
         if (!opt.ignoreformat) {
                 str = buffer_to_latex(buffer);
         } else {
-               OutputParams runparams(&buffer.params().encoding());
+               // OutputParams runparams(&buffer.params().encoding());
+               OutputParams runparams(encodings.fromLyXName("utf8"));
                 runparams.nice = true;
-               runparams.flavor = OutputParams::LATEX;
+               runparams.flavor = OutputParams::XETEX;
                 runparams.linelen = 10000; //lyxrc.plaintext_linelen;
                 runparams.dryrun = true;
                 runparams.for_search = true;
@@ -1126,6 +1130,7 @@ class Intervall {
    void resetOpenedP(int openPos);
    void addIntervall(int upper);
    void addIntervall(int low, int upper); /* if explicit */
+  void removeAccents();
    void setForDefaultLang(KeyInfo &defLang);
    int findclosing(int start, int end, char up, char down, int repeat);
    void handleParentheses(int lastpos, bool closingAllowed);
@@ -1243,6 +1248,121 @@ void Intervall::addIntervall(int low, int upper)
    }
  }
  
+typedef map<string, string> AccentsMap;
+static AccentsMap accents = map<string, string>();
+
+static void buildaccent(string n, string param, string values)
+{
+  stringstream s(n);
+  string name;
+  const char delim = '|';
+  while (getline(s, name, delim)) {
+    size_t start = 0;
+    for (size_t i = 0; i < param.size(); i++) {
+      string key = name + "{" + param[i] + "}";
+      // get the corresponding utf8-value
+      if ((values[start] & 0xc0) != 0xc0) {
+        // should not happen, utf8 encoding starts at least with 11xxxxxx
+       // but value for '\dot{i}' is 'i', which is ascii
+       if ((values[start] & 0x80) == 0) {
+         // is ascii
+         accents[key] = values.substr(start, 1);
+       }
+       start++;
+       continue;
+      }
+      for (int j = 1; ;j++) {
+        if (start + j >= values.size()) {
+          accents[key] = values.substr(start, j);
+          start = values.size() - 1;
+          break;
+        }
+        else if ((values[start+j] & 0xc0) != 0x80) {
+          // This is the first byte of following utf8 char
+          accents[key] = values.substr(start, j);
+          start += j;
+          break;
+        }
+      }
+    }
+  }
+}
+
+static void buildAccentsMap()
+{
+  accents["imath"] = "ı";
+  accents["i"] = "ı";
+  accents["jmath"] = "ȷ";
+  accents["lyxmathsym{ß}"] = "ß";
+  accents["text{ß}"] = "ß";
+  accents["ddot{\\imath}"] = "ï";
+  buildaccent("ddot", "aAeEiIioOuUyY",
+                      "äÄëËïÏïöÖüÜÿŸ");   // umlaut
+  buildaccent("dot|.", "cCeEGgIizZaAoObBdDfFyY",
+                       "ċĊėĖĠġİİżŻȧȦȯȮḃḂḋḊḟḞẏẎ"); // dot{i} can only happen if ignoring case, but there is no lowercase of 'İ'
+  accents["acute{\\imath}"] = "í";
+  buildaccent("acute", "aAcCeElLoOnNrRsSuUyYzZiI",
+                       "áÁćĆéÉĺĹóÓńŃŕŔśŚúÚýÝźŹíÍ");
+  buildaccent("dacute|H|h", "oOuU", "őŐűŰ");       // double acute
+  buildaccent("mathring|r", "aAuUwy",
+                            "åÅůŮẘẙ");  // ring
+  accents["check{\\imath}"] = "ǐ";
+  accents["check{\\jmath}"] = "ǰ";
+  buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTtzZ",
+                         "čČďĎǎǍěĚǐǏǒǑǔǓǧǦǩǨȟȞľĽňŇřŘšŠŤťžŽ");  // caron
+  accents["hat{\\imath}"] = "î";
+  accents["hat{\\jmath}"] = "ĵ";
+  buildaccent("hat|^", "aAeEiIcCgGhHjJsSwWyYzZoOuU",
+                       "âÂêÊîÎĉĈĝĜĥĤĵĴŝŜŵŴŷŶẑẐôÔûÛ");      // circ
+  accents["bar{\\imath}"] = "ī";
+  buildaccent("bar|=", "aAeEiIoOuUyY",
+                       "āĀēĒīĪōŌūŪȳȲ");    // macron
+  accents["tilde{\\imath}"] = "ĩ";
+  buildaccent("tilde", "aAnNoOiIuU",
+                       "ãÃñÑõÕĩĨũŨ");        // tilde
+  accents["breve{\\imath}"] = "ĭ";
+  buildaccent("breve|u", "aAeEgGiIoOuU",
+                         "ăĂĕĔğĞĭĬŏŎŭŬ");    // breve
+  accents["grave{\\imath}"] = "ì";
+  buildaccent("grave|`", "aAeEiIoOuUnNwWyY",
+                         "àÀèÈìÌòÒùÙǹǸẁẀỳỲ");   // grave
+  buildaccent("subdot|d", "BbDdHhKkLlMmNnRrSsTtVvWwZzAaEeIiOoUuYy",
+                          "ḄḅḌḍḤḥḲḳḶḷṂṃṆṇṚṛṢṣṬṭṾṿẈẉẒẓẠạẸẹỊịỌọỤụỴỵ");  // dot below
+  buildaccent("ogonek|k", "AaEeIiUuOo",
+                          "ĄąĘęĮįŲųǪǫ"); // ogonek
+}
+
+/*
+ * Created accents in math or regexp environment
+ * are macros, but we need the utf8 equivalent
+ */
+void Intervall::removeAccents()
+{
+  if (accents.empty())
+    buildAccentsMap();
+  static regex const accre("\\\\((.|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
+  smatch sub;
+  for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
+    sub = *itacc;
+    string key = sub.str(1);
+    if (accents.find(key) != accents.end()) {
+      string val = accents[key];
+      size_t pos = sub.position(0);
+      for (size_t i = 0; i < val.size(); i++) {
+        par[pos+i] = val[i];
+      }
+      addIntervall(pos+val.size(), pos + sub.str(0).size());
+      for (size_t i = pos+val.size(); i < pos + sub.str(0).size(); i++) {
+        // remove traces of any remaining chars
+        par[i] = ' ';
+      }
+    }
+    else {
+      LYXERR0("Not added accent for \"" << key << "\"");
+    }
+  }
+}
+
  void Intervall::handleOpenP(int i)
  {
    actualdeptindex++;
@@ -1478,6 +1598,8 @@ void LatexInfo::buildEntries(bool isPatternString)
    size_t math_pos = 10000;
    string math_end;
  
+  interval.removeAccents();
+
    for (sregex_iterator itmath(interval.par.begin(), interval.par.end(), rmath), end; itmath != end; ++itmath) {
      submath = *itmath;
      if (math_end_waiting) {
@@ -1565,8 +1687,9 @@ void LatexInfo::buildEntries(bool isPatternString)
          found.parenthesiscount = 0;
          found.head = interval.par.substr(found._tokenstart, found._tokensize);
        }
-      else
+      else {
          continue;
+      }
      }
      else {
        if (evaluatingMath) {
@@ -2118,9 +2241,9 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual)
      }
      case KeyInfo::endArguments:
        // Remove trailing '{}' too
-      actual._dataStart += 2;
-      actual._dataEnd += 2;
-      interval.addIntervall(actual._tokenstart, actual._dataEnd);
+      actual._dataStart += 1;
+      actual._dataEnd += 1;
+      interval.addIntervall(actual._tokenstart, actual._dataEnd+1);
        nextKeyIdx = getNextKey();
        break;
      case KeyInfo::noMain:
@@ -2467,14 +2590,14 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with
                 missed = 0;
                 if (withformat) {
                         regex_f = identifyFeatures(result);
-                        string features = "";
+                       string features = "";
                         for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
                                 string a = it->first;
                                 regex_with_format = true;
-                                features += " " + a;
+                               features += " " + a;
                                 // LYXERR0("Identified regex format:" << a);
                         }
-                        LYXERR(Debug::FIND, "Identified Features" << features);
+                       LYXERR(Debug::FIND, "Identified Features" << features);
  
                 }
         } else if (regex_with_format) {
@@ -2517,6 +2640,8 @@ static int identifyClosing(string & t)
         return open_braces;
  }
  
+static int num_replaced = 0;
+static bool previous_single_replace = true;
  
  MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt)
         : p_buf(&buf), p_first_buf(&buf), opt(opt)
@@ -2524,6 +2649,14 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const &
         Buffer & find_buf = *theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true);
         docstring const & ds = stringifySearchBuffer(find_buf, opt);
         use_regexp = lyx::to_utf8(ds).find("\\regexp{") != std::string::npos;
+       if (opt.replace_all && previous_single_replace) {
+               previous_single_replace = false;
+               num_replaced = 0;
+       }
+       else if (!opt.replace_all) {
+               num_replaced = 0;       // count number of replaced strings
+               previous_single_replace = true;
+       }
         // When using regexp, braces are hacked already by escape_for_regex()
         par_as_string = normalize(ds, !use_regexp);
         open_braces = 0;
@@ -2740,7 +2873,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
  
                 // Exclude from the returned match length any length
                 // due to close wildcards added at end of regexp
-               // and also the length of the leading (e.g. '\emph{')
+               // and also the length of the leading (e.g. '\emph{}')
                 //
                 // Whole found string, including the leading: m[0].second - m[0].first
                 // Size of the leading string: m[1].second - m[1].first
@@ -2792,7 +2925,8 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
                         return mres;
                 }
         } else {
-               size_t pos = str.find(par_as_string_nolead);
+               // Start the search _after_ the leading part
+               size_t pos = str.find(par_as_string_nolead, lead_as_string.size());
                 if (pos != string::npos) {
                         mres.match_len = par_as_string.size();
                         mres.match2end = str.size() - pos;
@@ -2863,14 +2997,14 @@ string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const
         while ((pos = t.find("\n")) != string::npos) {
                 if (pos > 1 && t[pos-1] == '\\' && t[pos-2] == '\\' ) {
                         // Handle '\\\n'
-                       if (std::isalnum(t[pos+1])) {
+                       if (isAlnumASCII(t[pos+1])) {
                                 t.replace(pos-2, 3, " ");
                         }
                         else {
                                 t.replace(pos-2, 3, "");
                         }
                 }
-               else if (!std::isalnum(t[pos+1]) || !std::isalnum(t[pos-1])) {
+               else if (!isAlnumASCII(t[pos+1]) || !isAlnumASCII(t[pos-1])) {
                         // '\n' adjacent to non-alpha-numerics, discard
                         t.replace(pos, 1, "");
                 }
@@ -2914,9 +3048,10 @@ docstring stringifyFromCursor(DocIterator const & cur, int len)
                 // TODO Try adding a AS_STR_INSERTS as last arg
                 pos_type end = ( len == -1 || cur.pos() + len > int(par.size()) ) ?
                         int(par.size()) : cur.pos() + len;
-               OutputParams runparams(&cur.buffer()->params().encoding());
+               // OutputParams runparams(&cur.buffer()->params().encoding());
+               OutputParams runparams(encodings.fromLyXName("utf8"));
                 runparams.nice = true;
-               runparams.flavor = OutputParams::LATEX;
+               runparams.flavor = OutputParams::XETEX;
                 runparams.linelen = 10000; //lyxrc.plaintext_linelen;
                 // No side effect of file copying and image conversion
                 runparams.dryrun = true;
@@ -2959,9 +3094,10 @@ docstring latexifyFromCursor(DocIterator const & cur, int len)
  
         odocstringstream ods;
         otexstream os(ods);
-       OutputParams runparams(&buf.params().encoding());
+       //OutputParams runparams(&buf.params().encoding());
+       OutputParams runparams(encodings.fromLyXName("utf8"));
         runparams.nice = false;
-       runparams.flavor = OutputParams::LATEX;
+       runparams.flavor = OutputParams::XETEX;
         runparams.linelen = 8000; //lyxrc.plaintext_linelen;
         // No side effect of file copying and image conversion
         runparams.dryrun = true;
@@ -3145,7 +3281,9 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
                                 DocIterator old_cur = cur;
                                 for (int i = 0; i < increment && cur; cur.forwardPos(), i++) {
                                 }
-                               if (! cur) {
+                               if (! cur || (cur.pit() > old_cur.pit())) {
+                                       // Are we outside of the paragraph?
+                                       // This can happen if moving past some UTF8-encoded chars
                                         cur = old_cur;
                                         increment /= 2;
                                 }
@@ -3303,14 +3441,8 @@ docstring stringifyFromForSearch(FindAndReplaceOptions const & opt,
                 return docstring();
         if (!opt.ignoreformat)
                 return latexifyFromCursor(cur, len);
-       else {
-               if (len < 0) {
-                       return stringifyFromCursor(cur, len);
-               }
-               else {
-                       return stringifyFromCursor(cur, len);
-               }
-       }
+       else
+               return stringifyFromCursor(cur, len);
  }
  
  
@@ -3318,10 +3450,10 @@ FindAndReplaceOptions::FindAndReplaceOptions(
         docstring const & find_buf_name, bool casesensitive,
         bool matchword, bool forward, bool expandmacros, bool ignoreformat,
         docstring const & repl_buf_name, bool keep_case,
-       SearchScope scope, SearchRestriction restr)
+       SearchScope scope, SearchRestriction restr, bool replace_all)
         : find_buf_name(find_buf_name), casesensitive(casesensitive), matchword(matchword),
           forward(forward), expandmacros(expandmacros), ignoreformat(ignoreformat),
-         repl_buf_name(repl_buf_name), keep_case(keep_case), scope(scope), restr(restr)
+         repl_buf_name(repl_buf_name), keep_case(keep_case), scope(scope), restr(restr), replace_all(replace_all)
  {
  }
  
@@ -3383,29 +3515,29 @@ static void changeFirstCase(Buffer & buffer, TextCase first_case, TextCase other
  } // namespace
  
  ///
-static void findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv)
+static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv)
  {
         Cursor & cur = bv->cursor();
         if (opt.repl_buf_name == docstring()
             || theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true) == 0
             || theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true) == 0)
-               return;
+               return 0;
  
         DocIterator sel_beg = cur.selectionBegin();
         DocIterator sel_end = cur.selectionEnd();
         if (&sel_beg.inset() != &sel_end.inset()
             || sel_beg.pit() != sel_end.pit()
             || sel_beg.idx() != sel_end.idx())
-               return;
+               return 0;
         int sel_len = sel_end.pos() - sel_beg.pos();
         LYXERR(Debug::FIND, "sel_beg: " << sel_beg << ", sel_end: " << sel_end
                << ", sel_len: " << sel_len << endl);
         if (sel_len == 0)
-               return;
-       LASSERT(sel_len > 0, return);
+               return 0;
+       LASSERT(sel_len > 0, return 0);
  
         if (!matchAdv(sel_beg, sel_len).match_len)
-               return;
+               return 0;
  
         // Build a copy of the replace buffer, adapted to the KeepCase option
         Buffer & repl_buffer_orig = *theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true);
@@ -3414,7 +3546,7 @@ static void findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, M
         string lyx = oss.str();
         Buffer repl_buffer("", false);
         repl_buffer.setUnnamed(true);
-       LASSERT(repl_buffer.readString(lyx), return);
+       LASSERT(repl_buffer.readString(lyx), return 0);
         if (opt.keep_case && sel_len >= 2) {
                 LYXERR(Debug::FIND, "keep_case true: cur.pos()=" << cur.pos() << ", sel_len=" << sel_len);
                 if (cur.inTexted()) {
@@ -3439,9 +3571,10 @@ static void findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, M
         } else if (cur.inMathed()) {
                 odocstringstream ods;
                 otexstream os(ods);
-               OutputParams runparams(&repl_buffer.params().encoding());
+               // OutputParams runparams(&repl_buffer.params().encoding());
+               OutputParams runparams(encodings.fromLyXName("utf8"));
                 runparams.nice = false;
-               runparams.flavor = OutputParams::LATEX;
+               runparams.flavor = OutputParams::XETEX;
                 runparams.linelen = 8000; //lyxrc.plaintext_linelen;
                 runparams.dryrun = true;
                 TeXOnePar(repl_buffer, repl_buffer.text(), 0, os, runparams);
@@ -3466,6 +3599,7 @@ static void findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, M
         LYXERR(Debug::FIND, "After pos adj cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len);
         bv->putSelectionAt(DocIterator(cur), sel_len, !opt.forward);
         bv->processUpdateFlags(Update::Force);
+       return 1;
  }
  
  
@@ -3485,7 +3619,7 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt)
                 int length = bv->cursor().selectionEnd().pos() - bv->cursor().selectionBegin().pos();
                 if (length > 0)
                         bv->putSelectionAt(bv->cursor().selectionBegin(), length, !opt.forward);
-               findAdvReplace(bv, opt, matchAdv);
+               num_replaced += findAdvReplace(bv, opt, matchAdv);
                 cur = bv->cursor();
                 if (opt.forward)
                         match_len = findForwardAdv(cur, matchAdv);
@@ -3498,11 +3632,31 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions const & opt)
         }
  
         if (match_len == 0) {
-               bv->message(_("Match not found!"));
+               if (num_replaced > 0) {
+                       switch (num_replaced)
+                       {
+                               case 1:
+                                       bv->message(_("One match has been replaced."));
+                                       break;
+                               case 2:
+                                       bv->message(_("Two matches have been replaced."));
+                                       break;
+                               default:
+                                       bv->message(bformat(_("%1$d matches have been replaced."), num_replaced));
+                                       break;
+                       }
+                       num_replaced = 0;
+               }
+               else {
+                       bv->message(_("Match not found."));
+               }
                 return false;
         }
  
-       bv->message(_("Match found!"));
+       if (num_replaced > 0)
+               bv->message(_("Match has been replaced."));
+       else
+               bv->message(_("Match found."));
  
         LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << match_len);
         bv->putSelectionAt(cur, match_len, !opt.forward);
@@ -3519,6 +3673,7 @@ ostringstream & operator<<(ostringstream & os, FindAndReplaceOptions const & opt
            << opt.forward << ' '
            << opt.expandmacros << ' '
            << opt.ignoreformat << ' '
+          << opt.replace_all << ' '
            << to_utf8(opt.repl_buf_name) << "\nEOSS\n"
            << opt.keep_case << ' '
            << int(opt.scope) << ' '
@@ -3546,7 +3701,7 @@ istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt)
         }
         LYXERR(Debug::FIND, "file_buf_name: '" << s << "'");
         opt.find_buf_name = from_utf8(s);
-       is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat;
+       is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat >> opt.replace_all;
         is.get();       // Waste space before replace string
         s = "";
         getline(is, line);