Use CJKutf8 package if input encoding is "utf8" and a used language requires CJK.

[lyx.git] / src / lyxfind.cpp
diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp

index d6391d549f606785aeb48f2e25cda8fc392bba93..a2c498e9b901bfcb1e35ac6f9a99cee466ec4cc4 100644 (file)
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -32,6 +32,7 @@
  #include "ParIterator.h"
  #include "TexRow.h"
  #include "Text.h"
+#include "Encoding.h"
  
  #include "frontends/Application.h"
  #include "frontends/alert.h"
@@ -910,7 +911,8 @@ private:
  
  static docstring buffer_to_latex(Buffer & buffer)
  {
-       OutputParams runparams(&buffer.params().encoding());
+       //OutputParams runparams(&buffer.params().encoding());
+       OutputParams runparams(encodings.fromLyXName("utf8"));
         odocstringstream ods;
         otexstream os(ods);
         runparams.nice = true;
@@ -934,7 +936,8 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co
         if (!opt.ignoreformat) {
                 str = buffer_to_latex(buffer);
         } else {
-               OutputParams runparams(&buffer.params().encoding());
+               // OutputParams runparams(&buffer.params().encoding());
+               OutputParams runparams(encodings.fromLyXName("utf8"));
                 runparams.nice = true;
                 runparams.flavor = OutputParams::XETEX;
                 runparams.linelen = 10000; //lyxrc.plaintext_linelen;
@@ -1248,25 +1251,38 @@ void Intervall::addIntervall(int low, int upper)
  typedef map<string, string> AccentsMap;
  static AccentsMap accents = map<string, string>();
  
-static void buildaccent(string name, string param, string values)
+static void buildaccent(string n, string param, string values)
  {
-  size_t start = 0;
-  for (size_t i = 0; i < param.size(); i++) {
-    string key = name + "{" + param[i] + "}";
-    // get the corresponding utf8-value
-    if ((values[start] & 0xc0) != 0xc0) {
-      // should not happen, utf8 encoding starts at least with 11xxxxxx
-      start++;
-      continue;
-    }
-    for (int j = 1; ;j++) {
-      if (start + j >= values.size())
-        break;
-      if ((values[start+j] & 0xc0) == 0xc0) {
-        // This is the first byte of following utf8 char
-        accents[key] = values.substr(start, j);
-        start += j;
-        break;
+  stringstream s(n);
+  string name;
+  const char delim = '|';
+  while (getline(s, name, delim)) {
+    size_t start = 0;
+    for (size_t i = 0; i < param.size(); i++) {
+      string key = name + "{" + param[i] + "}";
+      // get the corresponding utf8-value
+      if ((values[start] & 0xc0) != 0xc0) {
+        // should not happen, utf8 encoding starts at least with 11xxxxxx
+       // but value for '\dot{i}' is 'i', which is ascii
+       if ((values[start] & 0x80) == 0) {
+         // is ascii
+         accents[key] = values.substr(start, 1);
+       }
+       start++;
+       continue;
+      }
+      for (int j = 1; ;j++) {
+        if (start + j >= values.size()) {
+          accents[key] = values.substr(start, j);
+          start = values.size() - 1;
+          break;
+        }
+        else if ((values[start+j] & 0xc0) != 0x80) {
+          // This is the first byte of following utf8 char
+          accents[key] = values.substr(start, j);
+          start += j;
+          break;
+        }
        }
      }
    }
@@ -1275,21 +1291,43 @@ static void buildaccent(string name, string param, string values)
  static void buildAccentsMap()
  {
    accents["imath"] = "ı";
+  accents["i"] = "ı";
+  accents["jmath"] = "ȷ";
+  accents["lyxmathsym{ß}"] = "ß";
+  accents["text{ß}"] = "ß";
    accents["ddot{\\imath}"] = "ï";
+  buildaccent("ddot", "aAeEiIioOuUyY",
+                      "äÄëËïÏïöÖüÜÿŸ");   // umlaut
+  buildaccent("dot|.", "cCeEGgIizZaAoObBdDfFyY",
+                       "ċĊėĖĠġİİżŻȧȦȯȮḃḂḋḊḟḞẏẎ"); // dot{i} can only happen if ignoring case, but there is no lowercase of 'İ'
    accents["acute{\\imath}"] = "í";
-  accents["lyxmathsym{ß}"] = "ß";
-  buildaccent("ddot", "aeouyAEOUY", "äëöüÿÄËÖÜŸ");
-  buildaccent("dot", "aeoyzAEOYZ", "ȧėȯẏżȦĖȮẎŻ");
-  buildaccent("acute", "aeouyAEOUY", "äëöüÿÄËÖÜŸ");
-  /*
-  buildaccent("dacute", "oOuU", "őŐűŰ");
-  buildaccent("H", "oOuU", "őŐűŰ");        // dacute in text
-  */
-  buildaccent("mathring", "uU", "ůŮ");
-  buildaccent("r", "uU", "ůŮ");      //mathring in text
-  buildaccent("check", "cdnrszCDNRSZ", "čďřňšžČĎŘŇŠŽ");
-  buildaccent("hat", "cCoOgGhHsS", "ĉĈôÔĝĜĥĤŝŜ");
-  buildaccent("bar", "aAeE", "āĀēĒ");
+  buildaccent("acute", "aAcCeElLoOnNrRsSuUyYzZiI",
+                       "áÁćĆéÉĺĹóÓńŃŕŔśŚúÚýÝźŹíÍ");
+  buildaccent("dacute|H|h", "oOuU", "őŐűŰ");       // double acute
+  buildaccent("mathring|r", "aAuUwy",
+                            "åÅůŮẘẙ");  // ring
+  accents["check{\\imath}"] = "ǐ";
+  accents["check{\\jmath}"] = "ǰ";
+  buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTtzZ",
+                         "čČďĎǎǍěĚǐǏǒǑǔǓǧǦǩǨȟȞľĽňŇřŘšŠŤťžŽ");  // caron
+  accents["hat{\\imath}"] = "î";
+  accents["hat{\\jmath}"] = "ĵ";
+  buildaccent("hat|^", "aAeEiIcCgGhHjJsSwWyYzZoOuU",
+                       "âÂêÊîÎĉĈĝĜĥĤĵĴŝŜŵŴŷŶẑẐôÔûÛ");      // circ
+  accents["bar{\\imath}"] = "ī";
+  buildaccent("bar|=", "aAeEiIoOuUyY",
+                       "āĀēĒīĪōŌūŪȳȲ");    // macron
+  accents["tilde{\\imath}"] = "ĩ";
+  buildaccent("tilde", "aAnNoOiIuU",
+                       "ãÃñÑõÕĩĨũŨ");        // tilde
+  accents["breve{\\imath}"] = "ĭ";
+  buildaccent("breve|u", "aAeEgGiIoOuU",
+                         "ăĂĕĔğĞĭĬŏŎŭŬ");    // breve
+  accents["grave{\\imath}"] = "ì";
+  buildaccent("grave|`", "aAeEiIoOuUnNwWyY",
+                         "àÀèÈìÌòÒùÙǹǸẁẀỳỲ");   // grave
+  buildaccent("subdot|d", "BbDdHhKkLlMmNnRrSsTtVvWwZzAaEeIiOoUuYy",
+                          "ḄḅḌḍḤḥḲḳḶḷṂṃṆṇṚṛṢṣṬṭṾṿẈẉẒẓẠạẸẹỊịỌọỤụỴỵ");  // dot below
  }
  
  /*
@@ -1300,7 +1338,7 @@ void Intervall::removeAccents()
  {
    if (accents.empty())
      buildAccentsMap();
-  static regex const accre("\\\\((lyxmathsym|ddot|dot|acute|mathring|r|check|check|hat|bar)\\{[^\\{\\}]+\\}|imath)");
+  static regex const accre("\\\\((.|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
    smatch sub;
    for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
      sub = *itacc;
@@ -1312,6 +1350,10 @@ void Intervall::removeAccents()
          par[pos+i] = val[i];
        }
        addIntervall(pos+val.size(), pos + sub.str(0).size());
+      for (size_t i = pos+val.size(); i < pos + sub.str(0).size(); i++) {
+        // remove traces of any remaining chars
+        par[i] = ' ';
+      }
      }
      else {
        LYXERR0("Not added accent for \"" << key << "\"");
@@ -2546,14 +2588,14 @@ static string correctlanguagesetting(string par, bool isPatternString, bool with
                 missed = 0;
                 if (withformat) {
                         regex_f = identifyFeatures(result);
-                        string features = "";
+                       string features = "";
                         for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
                                 string a = it->first;
                                 regex_with_format = true;
-                                features += " " + a;
+                               features += " " + a;
                                 // LYXERR0("Identified regex format:" << a);
                         }
-                        LYXERR(Debug::FIND, "Identified Features" << features);
+                       LYXERR(Debug::FIND, "Identified Features" << features);
  
                 }
         } else if (regex_with_format) {
@@ -2993,7 +3035,8 @@ docstring stringifyFromCursor(DocIterator const & cur, int len)
                 // TODO Try adding a AS_STR_INSERTS as last arg
                 pos_type end = ( len == -1 || cur.pos() + len > int(par.size()) ) ?
                         int(par.size()) : cur.pos() + len;
-               OutputParams runparams(&cur.buffer()->params().encoding());
+               // OutputParams runparams(&cur.buffer()->params().encoding());
+               OutputParams runparams(encodings.fromLyXName("utf8"));
                 runparams.nice = true;
                 runparams.flavor = OutputParams::XETEX;
                 runparams.linelen = 10000; //lyxrc.plaintext_linelen;
@@ -3038,7 +3081,8 @@ docstring latexifyFromCursor(DocIterator const & cur, int len)
  
         odocstringstream ods;
         otexstream os(ods);
-       OutputParams runparams(&buf.params().encoding());
+       //OutputParams runparams(&buf.params().encoding());
+       OutputParams runparams(encodings.fromLyXName("utf8"));
         runparams.nice = false;
         runparams.flavor = OutputParams::XETEX;
         runparams.linelen = 8000; //lyxrc.plaintext_linelen;
@@ -3224,7 +3268,9 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
                                 DocIterator old_cur = cur;
                                 for (int i = 0; i < increment && cur; cur.forwardPos(), i++) {
                                 }
-                               if (! cur) {
+                               if (! cur || (cur.pit() > old_cur.pit())) {
+                                       // Are we outside of the paragraph?
+                                       // This can happen if moving past some UTF8-encoded chars
                                         cur = old_cur;
                                         increment /= 2;
                                 }
@@ -3512,7 +3558,8 @@ static void findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, M
         } else if (cur.inMathed()) {
                 odocstringstream ods;
                 otexstream os(ods);
-               OutputParams runparams(&repl_buffer.params().encoding());
+               // OutputParams runparams(&repl_buffer.params().encoding());
+               OutputParams runparams(encodings.fromLyXName("utf8"));
                 runparams.nice = false;
                 runparams.flavor = OutputParams::XETEX;
                 runparams.linelen = 8000; //lyxrc.plaintext_linelen;