Use CJKutf8 package if input encoding is "utf8" and a used language requires CJK.

[lyx.git] / src / lyxfind.cpp
diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp

index 20bef8dce05e263f6015e29c4ac224448ec76a82..a2c498e9b901bfcb1e35ac6f9a99cee466ec4cc4 100644 (file)
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -32,6 +32,7 @@
  #include "ParIterator.h"
  #include "TexRow.h"
  #include "Text.h"
+#include "Encoding.h"
  
  #include "frontends/Application.h"
  #include "frontends/alert.h"
@@ -52,6 +53,7 @@
  #include "support/lstrings.h"
  
  #include "support/regex.h"
+#include "support/textutils.h"
  #include <map>
  
  using namespace std;
@@ -739,17 +741,20 @@ string escape_for_regex(string s, bool match_latex)
                 size_t new_pos = s.find("\\regexp{", pos);
                 if (new_pos == string::npos)
                         new_pos = s.size();
-               LYXERR(Debug::FIND, "new_pos: " << new_pos);
-               string t = apply_escapes(s.substr(pos, new_pos - pos), get_lyx_unescapes());
-               LYXERR(Debug::FIND, "t [lyx]: " << t);
-               t = apply_escapes(t, get_regexp_escapes());
-               LYXERR(Debug::FIND, "t [rxp]: " << t);
-               s.replace(pos, new_pos - pos, t);
-               new_pos = pos + t.size();
-               LYXERR(Debug::FIND, "Regexp after escaping: " << s);
-               LYXERR(Debug::FIND, "new_pos: " << new_pos);
-               if (new_pos == s.size())
-                       break;
+               string t;
+               if (new_pos > pos) {
+                       LYXERR(Debug::FIND, "new_pos: " << new_pos);
+                       t = apply_escapes(s.substr(pos, new_pos - pos), get_lyx_unescapes());
+                       LYXERR(Debug::FIND, "t [lyx]: " << t);
+                       t = apply_escapes(t, get_regexp_escapes());
+                       LYXERR(Debug::FIND, "t [rxp]: " << t);
+                       s.replace(pos, new_pos - pos, t);
+                       new_pos = pos + t.size();
+                       LYXERR(Debug::FIND, "Regexp after escaping: " << s);
+                       LYXERR(Debug::FIND, "new_pos: " << new_pos);
+                       if (new_pos == s.size())
+                               break;
+               }
                 // Might fail if \\endregexp{} is preceeded by unexpected stuff (weird escapes)
                 size_t end_pos = s.find("\\endregexp{}}", new_pos + 8);
                 LYXERR(Debug::FIND, "end_pos: " << end_pos);
@@ -832,6 +837,14 @@ bool braces_match(string::const_iterator const & beg,
  }
  
  
+class MatchResult {
+public:
+       int match_len;
+       int match2end;
+       int pos;
+       MatchResult(): match_len(0),match2end(0), pos(0) {};
+};
+
  /** The class performing a match between a position in the document and the FindAdvOptions.
   **/
  class MatchStringAdv {
@@ -848,7 +861,7 @@ public:
          ** @return
          ** The length of the matching text, or zero if no match was found.
          **/
-       int operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const;
+       MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const;
  
  public:
         /// buffer
@@ -860,7 +873,7 @@ public:
  
  private:
         /// Auxiliary find method (does not account for opt.matchword)
-       int findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const;
+       MatchResult findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const;
  
         /** Normalize a stringified or latexified LyX paragraph.
          **
@@ -898,12 +911,13 @@ private:
  
  static docstring buffer_to_latex(Buffer & buffer)
  {
-       OutputParams runparams(&buffer.params().encoding());
+       //OutputParams runparams(&buffer.params().encoding());
+       OutputParams runparams(encodings.fromLyXName("utf8"));
         odocstringstream ods;
         otexstream os(ods);
         runparams.nice = true;
-       runparams.flavor = OutputParams::LATEX;
-       runparams.linelen = 100000; //lyxrc.plaintext_linelen;
+       runparams.flavor = OutputParams::XETEX;
+       runparams.linelen = 10000; //lyxrc.plaintext_linelen;
         // No side effect of file copying and image conversion
         runparams.dryrun = true;
         runparams.for_search = true;
@@ -922,10 +936,11 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co
         if (!opt.ignoreformat) {
                 str = buffer_to_latex(buffer);
         } else {
-               OutputParams runparams(&buffer.params().encoding());
+               // OutputParams runparams(&buffer.params().encoding());
+               OutputParams runparams(encodings.fromLyXName("utf8"));
                 runparams.nice = true;
-               runparams.flavor = OutputParams::LATEX;
-               runparams.linelen = 100000; //lyxrc.plaintext_linelen;
+               runparams.flavor = OutputParams::XETEX;
+               runparams.linelen = 10000; //lyxrc.plaintext_linelen;
                 runparams.dryrun = true;
                 runparams.for_search = true;
                 for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) {
@@ -1040,7 +1055,7 @@ class KeyInfo {
       * Ignore all of them */
      isSize,
      invalid,
-    /* inputencoding, shortcut, ...
+    /* inputencoding, ...
       * Discard also content, because they do not help in search */
      doRemove,
      /* twocolumns, ...
@@ -1115,23 +1130,43 @@ class Intervall {
    void resetOpenedP(int openPos);
    void addIntervall(int upper);
    void addIntervall(int low, int upper); /* if explicit */
-  void setForDefaultLang(int upTo);
+  void removeAccents();
+  void setForDefaultLang(KeyInfo &defLang);
    int findclosing(int start, int end, char up, char down, int repeat);
    void handleParentheses(int lastpos, bool closingAllowed);
    bool hasTitle;
+  int isOpeningPar(int pos);
    string titleValue;
    void output(ostringstream &os, int lastpos);
    // string show(int lastpos);
  };
  
-void Intervall::setForDefaultLang(int upTo)
+int Intervall::isOpeningPar(int pos)
+{
+  if ((pos < 0) || (size_t(pos) >= par.size()))
+    return 0;
+  if (par[pos] != '{')
+    return 0;
+  if (size_t(pos) + 2 >= par.size())
+    return 1;
+  if (par[pos+2] != '}')
+    return 1;
+  if (par[pos+1] == '[' || par[pos+1] == ']')
+    return 3;
+  return 1;
+}
+
+void Intervall::setForDefaultLang(KeyInfo &defLang)
  {
    // Enable the use of first token again
    if (ignoreidx >= 0) {
-    if (borders[0].low < upTo)
-      borders[0].low = upTo;
-    if (borders[0].upper < upTo)
-      borders[0].upper = upTo;
+    int value = defLang._tokenstart + defLang._tokensize;
+    if (value > 0) {
+      if (borders[0].low < value)
+        borders[0].low = value;
+      if (borders[0].upper < value)
+        borders[0].upper = value;
+    }
    }
  }
  
@@ -1213,6 +1248,119 @@ void Intervall::addIntervall(int low, int upper)
    }
  }
  
+typedef map<string, string> AccentsMap;
+static AccentsMap accents = map<string, string>();
+
+static void buildaccent(string n, string param, string values)
+{
+  stringstream s(n);
+  string name;
+  const char delim = '|';
+  while (getline(s, name, delim)) {
+    size_t start = 0;
+    for (size_t i = 0; i < param.size(); i++) {
+      string key = name + "{" + param[i] + "}";
+      // get the corresponding utf8-value
+      if ((values[start] & 0xc0) != 0xc0) {
+        // should not happen, utf8 encoding starts at least with 11xxxxxx
+       // but value for '\dot{i}' is 'i', which is ascii
+       if ((values[start] & 0x80) == 0) {
+         // is ascii
+         accents[key] = values.substr(start, 1);
+       }
+       start++;
+       continue;
+      }
+      for (int j = 1; ;j++) {
+        if (start + j >= values.size()) {
+          accents[key] = values.substr(start, j);
+          start = values.size() - 1;
+          break;
+        }
+        else if ((values[start+j] & 0xc0) != 0x80) {
+          // This is the first byte of following utf8 char
+          accents[key] = values.substr(start, j);
+          start += j;
+          break;
+        }
+      }
+    }
+  }
+}
+
+static void buildAccentsMap()
+{
+  accents["imath"] = "ı";
+  accents["i"] = "ı";
+  accents["jmath"] = "ȷ";
+  accents["lyxmathsym{ß}"] = "ß";
+  accents["text{ß}"] = "ß";
+  accents["ddot{\\imath}"] = "ï";
+  buildaccent("ddot", "aAeEiIioOuUyY",
+                      "äÄëËïÏïöÖüÜÿŸ");   // umlaut
+  buildaccent("dot|.", "cCeEGgIizZaAoObBdDfFyY",
+                       "ċĊėĖĠġİİżŻȧȦȯȮḃḂḋḊḟḞẏẎ"); // dot{i} can only happen if ignoring case, but there is no lowercase of 'İ'
+  accents["acute{\\imath}"] = "í";
+  buildaccent("acute", "aAcCeElLoOnNrRsSuUyYzZiI",
+                       "áÁćĆéÉĺĹóÓńŃŕŔśŚúÚýÝźŹíÍ");
+  buildaccent("dacute|H|h", "oOuU", "őŐűŰ");       // double acute
+  buildaccent("mathring|r", "aAuUwy",
+                            "åÅůŮẘẙ");  // ring
+  accents["check{\\imath}"] = "ǐ";
+  accents["check{\\jmath}"] = "ǰ";
+  buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTtzZ",
+                         "čČďĎǎǍěĚǐǏǒǑǔǓǧǦǩǨȟȞľĽňŇřŘšŠŤťžŽ");  // caron
+  accents["hat{\\imath}"] = "î";
+  accents["hat{\\jmath}"] = "ĵ";
+  buildaccent("hat|^", "aAeEiIcCgGhHjJsSwWyYzZoOuU",
+                       "âÂêÊîÎĉĈĝĜĥĤĵĴŝŜŵŴŷŶẑẐôÔûÛ");      // circ
+  accents["bar{\\imath}"] = "ī";
+  buildaccent("bar|=", "aAeEiIoOuUyY",
+                       "āĀēĒīĪōŌūŪȳȲ");    // macron
+  accents["tilde{\\imath}"] = "ĩ";
+  buildaccent("tilde", "aAnNoOiIuU",
+                       "ãÃñÑõÕĩĨũŨ");        // tilde
+  accents["breve{\\imath}"] = "ĭ";
+  buildaccent("breve|u", "aAeEgGiIoOuU",
+                         "ăĂĕĔğĞĭĬŏŎŭŬ");    // breve
+  accents["grave{\\imath}"] = "ì";
+  buildaccent("grave|`", "aAeEiIoOuUnNwWyY",
+                         "àÀèÈìÌòÒùÙǹǸẁẀỳỲ");   // grave
+  buildaccent("subdot|d", "BbDdHhKkLlMmNnRrSsTtVvWwZzAaEeIiOoUuYy",
+                          "ḄḅḌḍḤḥḲḳḶḷṂṃṆṇṚṛṢṣṬṭṾṿẈẉẒẓẠạẸẹỊịỌọỤụỴỵ");  // dot below
+}
+
+/*
+ * Created accents in math or regexp environment
+ * are macros, but we need the utf8 equivalent
+ */
+void Intervall::removeAccents()
+{
+  if (accents.empty())
+    buildAccentsMap();
+  static regex const accre("\\\\((.|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
+  smatch sub;
+  for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
+    sub = *itacc;
+    string key = sub.str(1);
+    if (accents.find(key) != accents.end()) {
+      string val = accents[key];
+      size_t pos = sub.position(0);
+      for (size_t i = 0; i < val.size(); i++) {
+        par[pos+i] = val[i];
+      }
+      addIntervall(pos+val.size(), pos + sub.str(0).size());
+      for (size_t i = pos+val.size(); i < pos + sub.str(0).size(); i++) {
+        // remove traces of any remaining chars
+        par[i] = ' ';
+      }
+    }
+    else {
+      LYXERR0("Not added accent for \"" << key << "\"");
+    }
+  }
+}
+
  void Intervall::handleOpenP(int i)
  {
    actualdeptindex++;
@@ -1352,7 +1500,7 @@ class LatexInfo {
      else
        return entries[keyinfo];
    };
-  void setForDefaultLang(int upTo) {interval.setForDefaultLang(upTo);};
+  void setForDefaultLang(KeyInfo &defLang) {interval.setForDefaultLang(defLang);};
    void addIntervall(int low, int up) { interval.addIntervall(low, up); };
  };
  
@@ -1448,6 +1596,8 @@ void LatexInfo::buildEntries(bool isPatternString)
    size_t math_pos = 10000;
    string math_end;
  
+  interval.removeAccents();
+
    for (sregex_iterator itmath(interval.par.begin(), interval.par.end(), rmath), end; itmath != end; ++itmath) {
      submath = *itmath;
      if (math_end_waiting) {
@@ -1533,6 +1683,10 @@ void LatexInfo::buildEntries(bool isPatternString)
          found._dataStart = found._dataEnd;
          found._tokensize = found._dataEnd - found._tokenstart;
          found.parenthesiscount = 0;
+        found.head = interval.par.substr(found._tokenstart, found._tokensize);
+      }
+      else {
+        continue;
        }
      }
      else {
@@ -1572,6 +1726,7 @@ void LatexInfo::buildEntries(bool isPatternString)
          found._dataEnd = found._tokenstart + found._tokensize;
          found._dataStart = found._dataEnd;
          found.parenthesiscount = 0;
+        found.head = interval.par.substr(found._tokenstart, found._tokensize);
          evaluatingMath = true;
        }
        else {
@@ -1622,9 +1777,11 @@ void LatexInfo::buildEntries(bool isPatternString)
              found.head = interval.par.substr(found._tokenstart, found._tokensize);
            }
            else {
+            // Swallow possible optional params
              while (interval.par[pos1] == '[') {
                pos1 = interval.findclosing(pos1+1, interval.par.length(), '[', ']')+1;
              }
+            // Swallow also the eventual parameter
              if (interval.par[pos1] == '{') {
                found._dataEnd = interval.findclosing(pos1+1, interval.par.length()) + 1;
              }
@@ -1634,6 +1791,7 @@ void LatexInfo::buildEntries(bool isPatternString)
              found._dataStart = found._dataEnd;
              found._tokensize = count + found._dataEnd - pos;
              found.parenthesiscount = 0;
+            found.head = interval.par.substr(found._tokenstart, found._tokensize);
              found.disabled = true;
            }
          }
@@ -1643,6 +1801,7 @@ void LatexInfo::buildEntries(bool isPatternString)
            found._dataEnd = found._dataStart;
            found._tokensize = count + found._dataEnd - pos;
            found.parenthesiscount = 0;
+          found.head = interval.par.substr(found._tokenstart, found._tokensize);
            found.disabled = true;
          }
        }
@@ -1699,6 +1858,7 @@ void LatexInfo::buildEntries(bool isPatternString)
              found.head = "\\" + key + "{";
            }
          }
+        found._tokensize = found.head.length();
          found._dataStart = found._tokenstart + found.head.length();
          if (interval.par.substr(found._dataStart-1, 15).compare("\\endarguments{}") == 0) {
            found._dataStart += 15;
@@ -1814,7 +1974,7 @@ void LatexInfo::buildKeys(bool isPatternString)
    // Skip
    // makeKey("enskip|smallskip|medskip|bigskip|vfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
    // Custom space/skip, remove the content (== length value)
-  makeKey("vspace|hspace|mspace", KeyInfo(KeyInfo::noContent, 1, false), isPatternString);
+  makeKey("vspace|vspace*|hspace|hspace*|mspace", KeyInfo(KeyInfo::noContent, 1, false), isPatternString);
    // Found in fr/UserGuide.lyx
    makeKey("og|fg", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
    // quotes
@@ -1822,7 +1982,7 @@ void LatexInfo::buildKeys(bool isPatternString)
    makeKey("textquotedblleft|textquotedblright", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
    // Known macros to remove (including their parameter)
    // No split
-  makeKey("inputencoding|shortcut|label|ref|index|bibitem", KeyInfo(KeyInfo::doRemove, 1, false), isPatternString);
+  makeKey("input|inputencoding|label|ref|index|bibitem", KeyInfo(KeyInfo::doRemove, 1, false), isPatternString);
    makeKey("addtocounter|setlength",                 KeyInfo(KeyInfo::noContent, 2, true), isPatternString);
    // handle like standard keys with 1 parameter.
    makeKey("url|href|vref|thanks", KeyInfo(KeyInfo::isStandard, 1, false), isPatternString);
@@ -1840,13 +2000,14 @@ void LatexInfo::buildKeys(bool isPatternString)
    // Remove table decorations
    makeKey("hline|tabularnewline|toprule|bottomrule|midrule", KeyInfo(KeyInfo::doRemove, 0, true), isPatternString);
    // Discard shape-header.
-  // For footnote too, because of possible lang settings
+  // For footnote or shortcut too, because of lang settings
    // and wrong handling if used 'KeyInfo::noMain'
    makeKey("circlepar|diamondpar|heartpar|nutpar",  KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
    makeKey("trianglerightpar|hexagonpar|starpar",   KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
    makeKey("triangleuppar|triangledownpar|droppar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
    makeKey("triangleleftpar|shapepar|dropuppar",    KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
-  makeKey("hphantom|footnote|includegraphics",     KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
+  makeKey("hphantom|vphantom|footnote|shortcut|include|includegraphics",     KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
+  makeKey("parbox", KeyInfo(KeyInfo::doRemove, 1, true), isPatternString);
    // like ('tiny{}' or '\tiny ' ... )
    makeKey("footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge", KeyInfo(KeyInfo::isSize, 0, false), isPatternString);
  
@@ -1980,14 +2141,15 @@ void Intervall::output(ostringstream &os, int lastpos)
  void LatexInfo::processRegion(int start, int region_end)
  {
    while (start < region_end) {          /* Let {[} and {]} survive */
-    if ((interval.par[start] == '{') &&
-        (interval.par[start+1] != ']') &&
-        (interval.par[start+1] != '[')) {
+    int cnt = interval.isOpeningPar(start);
+    if (cnt == 1) {
        // Closing is allowed past the region
        int closing = interval.findclosing(start+1, interval.par.length());
        interval.addIntervall(start, start+1);
        interval.addIntervall(closing, closing+1);
      }
+    else if (cnt == 3)
+      start += 2;
      start = interval.nextNotIgnored(start+1);
    }
  }
@@ -2077,9 +2239,9 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual)
      }
      case KeyInfo::endArguments:
        // Remove trailing '{}' too
-      actual._dataStart += 2;
-      actual._dataEnd += 2;
-      interval.addIntervall(actual._tokenstart, actual._dataEnd);
+      actual._dataStart += 1;
+      actual._dataEnd += 1;
+      interval.addIntervall(actual._tokenstart, actual._dataEnd+1);
        nextKeyIdx = getNextKey();
        break;
      case KeyInfo::noMain:
@@ -2221,6 +2383,11 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual)
      case KeyInfo::isMain: {
        if (interval.par.substr(actual._dataStart, 2) == "% ")
          interval.addIntervall(actual._dataStart, actual._dataStart+2);
+      if (actual._tokenstart > 0) {
+        int prev = interval.previousNotIgnored(actual._tokenstart - 1);
+        if ((prev >= 0) && interval.par[prev] == '%')
+          interval.addIntervall(prev, prev+1);
+      }
        if (actual.disabled) {
          removeHead(actual);
          if ((interval.par.substr(actual._dataStart, 3) == " \\[") ||
@@ -2232,9 +2399,9 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual)
          // interval.resetOpenedP(actual._dataStart-1);
        }
        else {
-        if (actual._tokenstart == 0) {
+        if (actual._tokenstart < 26) {
            // for the first (and maybe dummy) language
-          interval.setForDefaultLang(actual._tokenstart + actual._tokensize);
+          interval.setForDefaultLang(actual);
          }
          interval.resetOpenedP(actual._dataStart-1);
        }
@@ -2300,7 +2467,7 @@ int LatexInfo::process(ostringstream &os, KeyInfo &actual )
    }
    // Remove possible empty data
    int dstart = interval.nextNotIgnored(actual._dataStart);
-  while ((dstart < output_end) && (interval.par[dstart] == '{')) {
+  while (interval.isOpeningPar(dstart) == 1) {
      interval.addIntervall(dstart, dstart+1);
      int dend = interval.findclosing(dstart+1, output_end);
      interval.addIntervall(dend, dend+1);
@@ -2349,10 +2516,8 @@ string splitOnKnownMacros(string par, bool isPatternString)
          firstKey._dataStart = datastart;
          firstKey._dataEnd = par.length();
          (void) li.setNextKey(nextkeyIdx);
-        if (firstKey._tokensize > 0) {
-          // Fake the last opened parenthesis
-          li.setForDefaultLang(firstKey._tokensize);
-        }
+        // Fake the last opened parenthesis
+        li.setForDefaultLang(firstKey);
          nextkeyIdx = li.process(os, firstKey);
        }
        else {
@@ -2360,8 +2525,7 @@ string splitOnKnownMacros(string par, bool isPatternString)
            firstKey._dataStart = datastart;
            firstKey._dataEnd = nextKey._dataEnd+1;
            (void) li.setNextKey(nextkeyIdx);
-          if (firstKey._tokensize > 0)
-            li.setForDefaultLang(firstKey._tokensize);
+          li.setForDefaultLang(firstKey);
            nextkeyIdx = li.process(os, firstKey);
          }
          else {
@@ -2375,8 +2539,7 @@ string splitOnKnownMacros(string par, bool isPatternString)
      // Check if ! empty
      if ((firstKey._dataStart < firstKey._dataEnd) &&
          (par[firstKey._dataStart] != '}')) {
-      if (firstKey._tokensize > 0)
-        li.setForDefaultLang(firstKey._tokensize);
+      li.setForDefaultLang(firstKey);
        (void) li.process(os, firstKey);
      }
      s = os.str();
@@ -2425,14 +2588,14 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with
                 missed = 0;
                 if (withformat) {
                         regex_f = identifyFeatures(result);
-                        string features = "";
+                       string features = "";
                         for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
                                 string a = it->first;
                                 regex_with_format = true;
-                                features += " " + a;
+                               features += " " + a;
                                 // LYXERR0("Identified regex format:" << a);
                         }
-                        LYXERR(Debug::FIND, "Identified Features" << features);
+                       LYXERR(Debug::FIND, "Identified Features" << features);
  
                 }
         } else if (regex_with_format) {
@@ -2536,10 +2699,22 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const &
                         ++close_wildcards;
                 }
                 if (!opt.ignoreformat) {
-                       // Remove extra '\}' at end
-                       while ( regex_replace(par_as_string, par_as_string, "(.*)\\\\}$", "$1")) {
-                               open_braces++;
+                       // Remove extra '\}' at end if not part of \{\.\}
+                       size_t lng = par_as_string.size();
+                       while(lng > 2) {
+                               if (par_as_string.substr(lng-2, 2).compare("\\}") == 0) {
+                                       if (lng >= 6) {
+                                               if (par_as_string.substr(lng-6,3).compare("\\{\\") == 0)
+                                                       break;
+                                       }
+                                       lng -= 2;
+                                       open_braces++;
+                               }
+                               else
+                                       break;
                         }
+                       if (lng < par_as_string.size())
+                               par_as_string = par_as_string.substr(0,lng);
                         /*
                         // save '\.'
                         regex_replace(par_as_string, par_as_string, "\\\\\\.", "_xxbdotxx_");
@@ -2635,18 +2810,23 @@ static int computeSize(string s, int len)
         return count;
  }
  
-int MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const
+MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const
  {
+       MatchResult mres;
+
         if (at_begin &&
                 (opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) )
-               return 0;
+               return mres;
  
         docstring docstr = stringifyFromForSearch(opt, cur, len);
         string str = normalize(docstr, true);
         if (!opt.ignoreformat) {
                 str = correctlanguagesetting(str, false, !opt.ignoreformat);
         }
-       if (str.empty()) return(-1);
+       if (str.empty()) {
+               mres.match_len = -1;
+               return mres;
+       }
         LYXERR(Debug::FIND, "Matching against     '" << lyx::to_utf8(docstr) << "'");
         LYXERR(Debug::FIND, "After normalization: '" << str << "'");
  
@@ -2663,21 +2843,21 @@ int MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) con
                 }
                 sregex_iterator re_it(str.begin(), str.end(), *p_regexp, flags);
                 if (re_it == sregex_iterator())
-                       return 0;
+                       return mres;
                 match_results<string::const_iterator> const & m = *re_it;
  
                 if (0) { // Kornel Benko: DO NOT CHECKK
                         // Check braces on the segment that matched the entire regexp expression,
                         // plus the last subexpression, if a (.*?) was inserted in the constructor.
                         if (!braces_match(m[0].first, m[0].second, open_braces))
-                               return 0;
+                               return mres;
                 }
  
                 // Check braces on segments that matched all (.*?) subexpressions,
                 // except the last "padding" one inserted by lyx.
                 for (size_t i = 1; i < m.size() - 1; ++i)
                         if (!braces_match(m[i].first, m[i].second, open_braces))
-                               return 0;
+                               return mres;
  
                 // Exclude from the returned match length any length
                 // due to close wildcards added at end of regexp
@@ -2710,7 +2890,10 @@ int MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) con
                         result -= leadingsize;
                 else
                         result = 0;
-               return computeSize(str.substr(pos+leadingsize,result), result);
+               mres.match_len = computeSize(str.substr(pos+leadingsize,result), result);
+               mres.match2end = str.size() - pos - leadingsize;
+               mres.pos = pos+leadingsize;
+               return mres;
         }
  
         // else !use_regexp: but all code paths above return
@@ -2723,28 +2906,39 @@ int MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) con
         if (at_begin) {
                 LYXERR(Debug::FIND, "size=" << par_as_string.size()
                                          << ", substr='" << str.substr(0, par_as_string.size()) << "'");
-               if (str.substr(0, par_as_string.size()) == par_as_string)
-                       return par_as_string.size();
+               if (str.substr(0, par_as_string.size()) == par_as_string) {
+                       mres.match_len = par_as_string.size();
+                       mres.match2end = str.size();
+                       mres.pos = 0;
+                       return mres;
+               }
         } else {
                 size_t pos = str.find(par_as_string_nolead);
-               if (pos != string::npos)
-                       return par_as_string.size();
+               if (pos != string::npos) {
+                       mres.match_len = par_as_string.size();
+                       mres.match2end = str.size() - pos;
+                       mres.pos = pos;
+                       return mres;
+               }
         }
-       return 0;
+       return mres;
  }
  
  
-int MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin) const
+MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin) const
  {
-       int res = findAux(cur, len, at_begin);
+       MatchResult mres = findAux(cur, len, at_begin);
+       int res = mres.match_len;
         LYXERR(Debug::FIND,
                "res=" << res << ", at_begin=" << at_begin
                << ", matchword=" << opt.matchword
                << ", inTexted=" << cur.inTexted());
         if (res == 0 || !at_begin || !opt.matchword || !cur.inTexted())
-               return res;
-        if ((len > 0) && (res < len))
-          return 0;
+               return mres;
+        if ((len > 0) && (res < len)) {
+         mres.match_len = 0;
+          return mres;
+       }
         Paragraph const & par = cur.paragraph();
         bool ws_left = (cur.pos() > 0)
                 ? par.isWordSeparator(cur.pos() - 1)
@@ -2760,12 +2954,15 @@ int MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin)
         if (ws_left && ws_right) {
            // Check for word separators inside the found 'word'
            for (int i = 0; i < len; i++) {
-            if (par.isWordSeparator(cur.pos() + i))
-              return 0;
+            if (par.isWordSeparator(cur.pos() + i)) {
+             mres.match_len = 0;
+              return mres;
+           }
            }
-          return res;
+          return mres;
          }
-       return 0;
+       mres.match_len = 0;
+       return mres;
  }
  
  
@@ -2787,14 +2984,14 @@ string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const
         while ((pos = t.find("\n")) != string::npos) {
                 if (pos > 1 && t[pos-1] == '\\' && t[pos-2] == '\\' ) {
                         // Handle '\\\n'
-                       if (std::isalnum(t[pos+1])) {
+                       if (isAlnumASCII(t[pos+1])) {
                                 t.replace(pos-2, 3, " ");
                         }
                         else {
                                 t.replace(pos-2, 3, "");
                         }
                 }
-               else if (!std::isalnum(t[pos+1]) || !std::isalnum(t[pos-1])) {
+               else if (!isAlnumASCII(t[pos+1]) || !isAlnumASCII(t[pos-1])) {
                         // '\n' adjacent to non-alpha-numerics, discard
                         t.replace(pos, 1, "");
                 }
@@ -2838,12 +3035,14 @@ docstring stringifyFromCursor(DocIterator const & cur, int len)
                 // TODO Try adding a AS_STR_INSERTS as last arg
                 pos_type end = ( len == -1 || cur.pos() + len > int(par.size()) ) ?
                         int(par.size()) : cur.pos() + len;
-               OutputParams runparams(&cur.buffer()->params().encoding());
+               // OutputParams runparams(&cur.buffer()->params().encoding());
+               OutputParams runparams(encodings.fromLyXName("utf8"));
                 runparams.nice = true;
-               runparams.flavor = OutputParams::LATEX;
-               runparams.linelen = 100000; //lyxrc.plaintext_linelen;
+               runparams.flavor = OutputParams::XETEX;
+               runparams.linelen = 10000; //lyxrc.plaintext_linelen;
                 // No side effect of file copying and image conversion
                 runparams.dryrun = true;
+               runparams.for_search = true;
                 LYXERR(Debug::FIND, "Stringifying with cur: "
                        << cur << ", from pos: " << cur.pos() << ", end: " << end);
                 return par.asString(cur.pos(), end,
@@ -2882,9 +3081,10 @@ docstring latexifyFromCursor(DocIterator const & cur, int len)
  
         odocstringstream ods;
         otexstream os(ods);
-       OutputParams runparams(&buf.params().encoding());
+       //OutputParams runparams(&buf.params().encoding());
+       OutputParams runparams(encodings.fromLyXName("utf8"));
         runparams.nice = false;
-       runparams.flavor = OutputParams::LATEX;
+       runparams.flavor = OutputParams::XETEX;
         runparams.linelen = 8000; //lyxrc.plaintext_linelen;
         // No side effect of file copying and image conversion
         runparams.dryrun = true;
@@ -2957,9 +3157,9 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match)
                 d = cur.depth();
                 old_cur = cur;
                 cur.forwardPos();
-       } while (cur && cur.depth() > d && match(cur) > 0);
+       } while (cur && cur.depth() > d && match(cur).match_len > 0);
         cur = old_cur;
-       int max_match = match(cur);     /* match valid only if not searching whole words */
+       int max_match = match(cur).match_len;     /* match valid only if not searching whole words */
         if (max_match <= 0) return 0;
         LYXERR(Debug::FIND, "Ok");
  
@@ -2969,17 +3169,17 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match)
           return 0;
         if (match.opt.matchword) {
            LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
-          while (cur.pos() + len <= cur.lastpos() && match(cur, len) <= 0) {
+          while (cur.pos() + len <= cur.lastpos() && match(cur, len).match_len <= 0) {
              ++len;
              LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
            }
            // Length of matched text (different from len param)
-          int old_match = match(cur, len);
+          int old_match = match(cur, len).match_len;
            if (old_match < 0)
              old_match = 0;
            int new_match;
            // Greedy behaviour while matching regexps
-          while ((new_match = match(cur, len + 1)) > old_match) {
+          while ((new_match = match(cur, len + 1).match_len) > old_match) {
              ++len;
              old_match = new_match;
              LYXERR(Debug::FIND, "verifying   match with len = " << len);
@@ -2992,7 +3192,7 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match)
           int maxl = cur.lastpos() - cur.pos();
           // Greedy behaviour while matching regexps
           while (maxl > minl) {
-           int actual_match = match(cur, len);
+           int actual_match = match(cur, len).match_len;
             if (actual_match >= max_match) {
               // actual_match > max_match _can_ happen,
               // if the search area splits
@@ -3023,7 +3223,7 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match)
              }
              if (cur.pos() != old_cur.pos()) {
                // OK, forwarded 1 pos in actual inset
-              actual_match = match(cur, len-1);
+              actual_match = match(cur, len-1).match_len;
                if (actual_match == max_match) {
                  // Ha, got it! The shorter selection has the same match length
                  len--;
@@ -3037,7 +3237,7 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match)
              }
              else {
                LYXERR0("cur.pos() == old_cur.pos(), this should never happen");
-              actual_match = match(cur, len);
+              actual_match = match(cur, len).match_len;
                if (actual_match == max_match)
                  old_cur = cur;
              }
@@ -3054,16 +3254,58 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
                 return 0;
         while (!theApp()->longOperationCancelled() && cur) {
                 LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
-               int match_len = match(cur, -1, false);
+               MatchResult mres = match(cur, -1, false);
+               int match_len = mres.match_len;
                 LYXERR(Debug::FIND, "match_len: " << match_len);
+               if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) {
+                       LYXERR0("BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end);
+                       match_len = 0;
+               }
                 if (match_len > 0) {
+                       // Try to find the begin of searched string
+                       int increment = mres.pos/2;
+                       while (mres.pos > 5 && (increment > 5)) {
+                               DocIterator old_cur = cur;
+                               for (int i = 0; i < increment && cur; cur.forwardPos(), i++) {
+                               }
+                               if (! cur || (cur.pit() > old_cur.pit())) {
+                                       // Are we outside of the paragraph?
+                                       // This can happen if moving past some UTF8-encoded chars
+                                       cur = old_cur;
+                                       increment /= 2;
+                               }
+                               else {
+                                       MatchResult mres2 = match(cur, -1, false);
+                                       if ((mres2.match2end < mres.match2end) ||
+                                         (mres2.match_len < mres.match_len)) {
+                                               cur = old_cur;
+                                               increment /= 2;
+                                       }
+                                       else {
+                                               mres = mres2;
+                                               increment -= 2;
+                                               if (increment > mres.pos/2)
+                                                       increment = mres.pos/2;
+                                       }
+                               }
+                       }
                         int match_len_zero_count = 0;
-                       for (; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) {
+                       for (int i = 0; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) {
+                               if (i++ > 10) {
+                                       int remaining_len = match(cur, -1, false).match_len;
+                                       if (remaining_len <= 0) {
+                                               // Apparently the searched string is not in the remaining part
+                                               break;
+                                       }
+                                       else {
+                                               i = 0;
+                                       }
+                               }
                                 LYXERR(Debug::FIND, "Advancing cur: " << cur);
-                               int match_len3 = match(cur, 1);
+                               int match_len3 = match(cur, 1).match_len;
                                 if (match_len3 < 0)
                                         continue;
-                               int match_len2 = match(cur);
+                               int match_len2 = match(cur).match_len;
                                 LYXERR(Debug::FIND, "match_len2: " << match_len2);
                                 if (match_len2 > 0) {
                                         // Sometimes in finalize we understand it wasn't a match
@@ -3115,7 +3357,7 @@ int findMostBackwards(DocIterator & cur, MatchStringAdv const & match)
                 LYXERR(Debug::FIND, "findMostBackwards(): cur=" << cur);
                 DocIterator new_cur = cur;
                 new_cur.backwardPos();
-               if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur))
+               if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len)
                         break;
                 int new_len = findAdvFinalize(new_cur, match);
                 if (new_len == len)
@@ -3141,7 +3383,7 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match)
         bool pit_changed = false;
         do {
                 cur.pos() = 0;
-               bool found_match = match(cur, -1, false);
+               bool found_match = (match(cur, -1, false).match_len > 0);
  
                 if (found_match) {
                         if (pit_changed)
@@ -3151,7 +3393,7 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match)
                         LYXERR(Debug::FIND, "findBackAdv2: cur: " << cur);
                         DocIterator cur_prev_iter;
                         do {
-                               found_match = match(cur);
+                               found_match = (match(cur).match_len > 0);
                                 LYXERR(Debug::FIND, "findBackAdv3: found_match="
                                        << found_match << ", cur: " << cur);
                                 if (found_match)
@@ -3281,7 +3523,7 @@ static void findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, M
                 return;
         LASSERT(sel_len > 0, return);
  
-       if (!matchAdv(sel_beg, sel_len))
+       if (!matchAdv(sel_beg, sel_len).match_len)
                 return;
  
         // Build a copy of the replace buffer, adapted to the KeepCase option
@@ -3316,9 +3558,10 @@ static void findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, M
         } else if (cur.inMathed()) {
                 odocstringstream ods;
                 otexstream os(ods);
-               OutputParams runparams(&repl_buffer.params().encoding());
+               // OutputParams runparams(&repl_buffer.params().encoding());
+               OutputParams runparams(encodings.fromLyXName("utf8"));
                 runparams.nice = false;
-               runparams.flavor = OutputParams::LATEX;
+               runparams.flavor = OutputParams::XETEX;
                 runparams.linelen = 8000; //lyxrc.plaintext_linelen;
                 runparams.dryrun = true;
                 TeXOnePar(repl_buffer, repl_buffer.text(), 0, os, runparams);