From c041439c517e501f123eb12fe7af47e0929c5b53 Mon Sep 17 00:00:00 2001
From: Kornel Benko <kornel@lyx.org>
Date: Sun, 10 Mar 2019 00:29:56 +0100
Subject: [PATCH] =?utf8?q?FindAdv:=20Special=20handling=20for=20\dot{i}=20?=
 =?utf8?q?and=20'=C3=9F'?=
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Different behaviour in regexp{..} for 'Ä°' and 'Ã':
1.) lowercase routine for 'Ä°' gives 'Ä°', so that if we are searching
  while ignoring case, the string '\dot{I}' is converted to '\dot{i}'.
  In this case we have to change it to 'Ä°' (instead of 'i', as one would expect).

2.) If 'Ã' is inserted via keybord on fresh created regexp box it appears as \lyxmathsym{Ã},
  if pasted from the lyx-screen it appears as \text{Ã}
---
 src/lyxfind.cpp | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp
index 8c11bdbe8e..a2c498e9b9 100644
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -1263,8 +1263,13 @@ static void buildaccent(string n, string param, string values)
       // get the corresponding utf8-value
       if ((values[start] & 0xc0) != 0xc0) {
         // should not happen, utf8 encoding starts at least with 11xxxxxx
-        start++;
-        continue;
+	// but value for '\dot{i}' is 'i', which is ascii
+	if ((values[start] & 0x80) == 0) {
+	  // is ascii
+	  accents[key] = values.substr(start, 1);
+	}
+	start++;
+	continue;
       }
       for (int j = 1; ;j++) {
         if (start + j >= values.size()) {
@@ -1272,7 +1277,7 @@ static void buildaccent(string n, string param, string values)
           start = values.size() - 1;
           break;
         }
-        else if ((values[start+j] & 0xc0) == 0xc0) {
+        else if ((values[start+j] & 0xc0) != 0x80) {
           // This is the first byte of following utf8 char
           accents[key] = values.substr(start, j);
           start += j;
@@ -1289,11 +1294,12 @@ static void buildAccentsMap()
   accents["i"] = "Ä±";
   accents["jmath"] = "È·";
   accents["lyxmathsym{Ã}"] = "Ã";
+  accents["text{Ã}"] = "Ã";
   accents["ddot{\\imath}"] = "Ã¯";
-  buildaccent("ddot", "aAeEiIoOuUyY",
-                      "Ã¤ÃÃ«ÃÃ¯ÃÃ¶ÃÃ¼ÃÃ¿Å¸");	// umlaut
-  buildaccent("dot|.", "cCeEgGiIzZaAoObBdDfFyY",
-                       "ÄÄÄÄÄ¡Ä iÄ°Å¼Å»È§È¦È¯È®á¸á¸á¸á¸á¸á¸áºáº");
+  buildaccent("ddot", "aAeEiIioOuUyY",
+                      "Ã¤ÃÃ«ÃÃ¯ÃÃ¯Ã¶ÃÃ¼ÃÃ¿Å¸");	// umlaut
+  buildaccent("dot|.", "cCeEGgIizZaAoObBdDfFyY",
+                       "ÄÄÄÄÄ Ä¡Ä°Ä°Å¼Å»È§È¦È¯È®á¸á¸á¸á¸á¸á¸áºáº"); // dot{i} can only happen if ignoring case, but there is no lowercase of 'Ä°'
   accents["acute{\\imath}"] = "Ã­";
   buildaccent("acute", "aAcCeElLoOnNrRsSuUyYzZiI",
                        "Ã¡ÃÄÄÃ©ÃÄºÄ¹Ã³ÃÅÅÅÅÅÅÃºÃÃ½ÃÅºÅ¹Ã­Ã");
@@ -1302,8 +1308,8 @@ static void buildAccentsMap()
                             "Ã¥ÃÅ¯Å®áºáº");  // ring
   accents["check{\\imath}"] = "Ç";
   accents["check{\\jmath}"] = "Ç°";
-  buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTzZ",
-                         "ÄÄÄÄÇÇÄÄÇÇÇÇÇÇÇ§Ç¦Ç©Ç¨ÈÈÄ¾Ä½ÅÅÅÅÅ¡Å Å¤Å¾Å½");	// caron
+  buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTtzZ",
+                         "ÄÄÄÄÇÇÄÄÇÇÇÇÇÇÇ§Ç¦Ç©Ç¨ÈÈÄ¾Ä½ÅÅÅÅÅ¡Å Å¤Å¥Å¾Å½");	// caron
   accents["hat{\\imath}"] = "Ã®";
   accents["hat{\\jmath}"] = "Äµ";
   buildaccent("hat|^", "aAeEiIcCgGhHjJsSwWyYzZoOuU",
@@ -1332,7 +1338,7 @@ void Intervall::removeAccents()
 {
   if (accents.empty())
     buildAccentsMap();
-  static regex const accre("\\\\((.|grave|breve|lyxmathsym|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
+  static regex const accre("\\\\((.|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
   smatch sub;
   for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
     sub = *itacc;
-- 
2.39.2