tex2lyx: fix import of umlauts and ß in math (#12739)

[lyx.git] / src / lyxfind.cpp
diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp

index ac6366c2620e4ad655b03063078a2d32f9247dd8..2ddedcb4818bf21d37ae1c406fd7de9326990cd5 100644 (file)
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -14,6 +14,7 @@
   */
  
  #include <config.h>
+#include <iterator>
  
  #include "lyxfind.h"
  
@@ -56,13 +57,8 @@
  //#define ResultsDebug
  #define USE_QT_FOR_SEARCH
  #if defined(USE_QT_FOR_SEARCH)
-       #include <QtCore>       // sets QT_VERSION
-       #if (QT_VERSION >= 0x050000)
-               #include <QRegularExpression>
-               #define QTSEARCH 1
-       #else
-               #define QTSEARCH 0
-       #endif
+       #include <QRegularExpression>
+       #define QTSEARCH 1
  #else
         #define QTSEARCH 0
  #endif
@@ -337,7 +333,7 @@ bool findOne(BufferView * bv, docstring const & searchstr,
  
         if (match_len > 0)
                 bv->putSelectionAt(cur, match_len, !forward);
-       else if (onlysel) {
+       else if (onlysel && bv->cursor().selection()) {
                 docstring q = _("The search string was not found within the selection.\n"
                                 "Continue search outside?");
                 int search_answer = frontend::Alert::prompt(_("Search outside selection?"),
@@ -901,7 +897,7 @@ string correctRegex(string t, bool withformat)
                 }
                 else {
                         if (sub.str(4) == "backslash") {
-                               replace = "\\";
+                               replace = string("\\");
                                 {
                                         // transforms '\backslash \{' into '\{'
                                         string next = t.substr(sub.position(2) + sub.str(2).length(), 2);
@@ -1110,7 +1106,7 @@ private:
          ** @todo Normalization should also expand macros, if the corresponding
          ** search option was checked.
          **/
-       string normalize(docstring const & s, bool ignore_fomat) const;
+       string convertLF2Space(docstring const & s, bool ignore_fomat) const;
         // normalized string to search
         string par_as_string;
         // regular expression to use for searching
@@ -2423,7 +2419,7 @@ void LatexInfo::buildEntries(bool isPatternString)
         }
         // Ignore language if there is math somewhere in pattern-string
         if (isPatternString) {
-               for (auto s: usedText) {
+               for (auto const & s: usedText) {
                         // Remove entries created in previous search runs
                         keys.erase(s);
                 }
@@ -3668,7 +3664,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
                 previous_single_replace = true;
         }
         // When using regexp, braces are hacked already by escape_for_regex()
-       par_as_string = normalize(ds, opt.ignoreformat);
+       par_as_string = convertLF2Space(ds, opt.ignoreformat);
         open_braces = 0;
         close_wildcards = 0;
  
@@ -3774,7 +3770,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
                         }
                         if (opt.matchword) {
                                 modifyRegexForMatchWord(par_as_string);
-                               opt.matchword = false;
+                               // opt.matchword = false;
                         }
                         regexp_str = "(" + lead_as_regexp + ")()" + par_as_string;
                         regexp2_str = "(" + lead_as_regexp + ")(.*?)" + par_as_string;
@@ -3793,7 +3789,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, MatchStrin
  
         docstring docstr = stringifyFromForSearch(opt, cur, len);
         string str;
-       str = normalize(docstr, opt.ignoreformat);
+       str = convertLF2Space(docstr, opt.ignoreformat);
         if (!opt.ignoreformat) {
                 str = correctlanguagesetting(str, false, !opt.ignoreformat);
                 // remove closing '}' and '\n' to allow for use of '$' in regex
@@ -3949,20 +3945,17 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, MatchStrin
  MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, MatchStringAdv::matchType at_begin) const
  {
         MatchResult mres = findAux(cur, len, at_begin);
-       int res = mres.match_len;
         LYXERR(Debug::FINDVERBOSE,
-              "res=" << res << ", at_begin=" << matchTypeAsString(at_begin)
+              "res=" << mres.match_len << ", at_begin=" << matchTypeAsString(at_begin)
                << ", matchAtStart=" << opt.matchAtStart
                << ", inTexted=" << cur.inTexted());
-       if (opt.matchAtStart) {
-               if (cur.pos() != 0)
-                       mres.match_len = 0;
-               else if (mres.match_prefix > 0)
-                       mres.match_len = 0;
-               return mres;
+       if (mres.match_len > 0) {
+               if (opt.matchAtStart) {
+                       if (cur.pos() > 0 || mres.match_prefix > 0)
+                               mres.match_len = 0;
+               }
         }
-       else
-               return mres;
+       return mres;
  }
  
  #if 0
@@ -3990,8 +3983,7 @@ static bool simple_replace(string &t, string from, string to)
  }
  #endif
  
-#if 1
-static string convertLF2Space(docstring const &s, bool ignore_format)
+string MatchStringAdv::convertLF2Space(docstring const &s, bool ignore_format) const
  {
         // Using original docstring to handle '\n'
  
@@ -4058,77 +4050,6 @@ static string convertLF2Space(docstring const &s, bool ignore_format)
         return(t.str());
  }
  
-#else
-static string convertLF2Space(docstring const & s, bool ignore_format)
-{
-       // Using utf8-converted string to handle '\n'
-
-       string t;
-       t = lyx::to_utf8(s);
-       // Remove \n at begin
-       while (!t.empty() && t[0] == '\n')
-               t = t.substr(1);
-       // Remove \n* at end
-       while (!t.empty() && t[t.size() - 1] == '\n') {
-               t = t.substr(0, t.size() - 1);
-       }
-       size_t pos;
-       // Handle all other '\n'
-       while ((pos = t.find("\n")) != string::npos) {
-               if (pos > 1 && t[pos-1] == '\\' && t[pos-2] == '\\' ) {
-                       // Handle '\\\n'
-                       if (isPrintableNonspace(t[pos+1]) && ((pos < 3) || isPrintableNonspace(t[pos-3]))) {
-                               t.replace(pos-2, 3, " ");
-                       }
-                       else {
-                               // Already a space there
-                               t.replace(pos-2, 3, "");
-                       }
-               }
-               else {
-                       if (!isAlnumASCII(t[pos+1]) || !isAlnumASCII(t[pos-1])) {
-                               // '\n' adjacent to non-alpha-numerics, discard
-                               t.replace(pos, 1, "");
-                       }
-                       else {
-                               // Replace all other \n with spaces
-                               t.replace(pos, 1, " ");
-                       }
-                       if (!ignore_format) {
-                               size_t count = 0;
-                               while ((pos > count + 1) && (t[pos - 1 -count] == '%')) {
-                                       count++;
-                               }
-                               if (count > 0) {
-                                       t.replace(pos - count, count, "");
-                               }
-                       }
-               }
-       }
-       return(t);
-
-}
-#endif
-
-string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const
-{
-       string t = convertLF2Space(s, ignore_format);
-
-       // The following replaces are not appropriate in non-format-search mode
-       if (!ignore_format) {
-               // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify
-               // Kornel: Added textsl, textsf, textit, texttt and noun
-               // + allow to seach for colored text too
-               LYXERR(Debug::FINDVERBOSE, "Removing stale empty macros from: " << t);
-               while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", ""))
-                       LYXERR(Debug::FINDVERBOSE, "  further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
-               while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
-                       LYXERR(Debug::FINDVERBOSE, "  further removing stale empty \\section{}, \\part{}, \\paragraph{} macros from: " << t);
-               while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", ""));
-       }
-       return t;
-}
-
  docstring stringifyFromCursor(DocIterator const & cur, int len)
  {
         LYXERR(Debug::FINDVERBOSE, "Stringifying with len=" << len << " from cursor at pos: " << cur);
@@ -4446,15 +4367,20 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
  {
         if (!cur)
                 return 0;
-       bool repeat = false;
+       int repeat = 0;
         DocIterator orig_cur;   // to be used if repeat not successful
         MatchResult orig_mres;
+       do {
+               orig_cur = cur;
+               cur.forwardPos();
+       } while (cur.depth() > orig_cur.depth());
+       cur = orig_cur;
         while (!theApp()->longOperationCancelled() && cur) {
                 //(void) findAdvForwardInnermost(cur);
                 LYXERR(Debug::FINDVERBOSE, "findForwardAdv() cur: " << cur);
                 MatchResult mres = match(cur, -1, MatchStringAdv::MatchAnyPlace);
                 string msg = "Starting";
-               if (repeat)
+               if (repeat > 0)
                         msg = "Repeated";
                 displayMres(mres, msg + " findForwardAdv", cur)
                                 int match_len = mres.match_len;
@@ -4463,8 +4389,13 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
                         match_len = 0;
                 }
                 if (match_len <= 0) {
-                       // This should exit nested insets, if any, or otherwise undefine the currsor.
-                       cur.pos() = cur.lastpos();
+                       if (repeat > 0) {
+                               repeat--;
+                       }
+                       else {
+                               // This should exit nested insets, if any, or otherwise undefine the currsor.
+                               cur.pos() = cur.lastpos();
+                       }
                         LYXERR(Debug::FINDVERBOSE, "Advancing pos: cur=" << cur);
                         cur.forwardPos();
                 }
@@ -4492,7 +4423,7 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
                                 cur.pos() = cur.pos() + increment;
                                 MatchResult mres2 = match(cur, -1, MatchStringAdv::MatchAnyPlace);
                                 displayMres(mres2, "findForwardAdv loop", cur)
-                                               switch (interpretMatch(mres, mres2)) {
+                               switch (interpretMatch(mres, mres2)) {
                                         case MatchResult::newIsTooFar:
                                                 // behind the expected match
                                                 firstInvalid = increment;
@@ -4500,7 +4431,7 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
                                                 increment /= 2;
                                                 break;
                                         case MatchResult::newIsBetter:
-                                               // not reached yet, but cur.pos()+increment is bettert
+                                               // not reached yet, but cur.pos()+increment is better
                                                 mres = mres2;
                                                 firstInvalid -= increment;
                                                 if (increment > firstInvalid*3/4)
@@ -4513,7 +4444,7 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
                                         default:
                                                 // Todo@
                                                 // Handle not like MatchResult::newIsTooFar
-                                               LYXERR0( "Probably too far: Increment = " << increment << " match_prefix = " << mres.match_prefix);
+                                               LYXERR(Debug::FINDVERBOSE, "Probably too far: Increment = " << increment << " match_prefix = " << mres.match_prefix);
                                                 firstInvalid--;
                                                 increment = increment*3/4;
                                                 cur = old_cur;
@@ -4523,14 +4454,14 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
                         if (mres.match_len > 0) {
                                 if (mres.match_prefix + mres.pos - mres.leadsize > 0) {
                                         // The match seems to indicate some deeper level
-                                       repeat = true;
+                                       repeat = 2;
                                         orig_cur = cur;
                                         orig_mres = mres;
                                         cur.forwardPos();
                                         continue;
                                 }
                         }
-                       else if (repeat) {
+                       else if (repeat > 0) {
                                 // should never be reached.
                                 cur = orig_cur;
                                 mres = orig_mres;
@@ -4833,6 +4764,10 @@ static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, Ma
         return 1;
  }
  
+static bool isWordChar(char_type c)
+{
+       return isLetterChar(c) || isNumberChar(c);
+}
  
  /// Perform a FindAdv operation.
  bool findAdv(BufferView * bv, FindAndReplaceOptions & opt)
@@ -4858,8 +4793,52 @@ bool findAdv(BufferView * bv, FindAndReplaceOptions & opt)
                         bv->putSelectionAt(bv->cursor().selectionBegin(), length, !opt.forward);
                 num_replaced += findAdvReplace(bv, opt, matchAdv);
                 cur = bv->cursor();
-               if (opt.forward)
+               if (opt.forward) {
+                       if (opt.matchword && cur.pos() > 0) {  // Skip word-characters if we are in the mid of a word
+                               if (cur.inTexted()) {
+                                       Paragraph const & par = cur.paragraph();
+                                       int len_limit, new_pos;
+                                       if (cur.lastpos() < par.size())
+                                               len_limit = cur.lastpos();
+                                       else
+                                               len_limit = par.size();
+                                       for (new_pos = cur.pos() - 1; new_pos < len_limit; new_pos++) {
+                                               if (!isWordChar(par.getChar(new_pos)))
+                                                       break;
+                                       }
+                                       if (new_pos > cur.pos())
+                                               cur.pos() = new_pos;
+                               }
+                               else if (cur.inMathed()) {
+                                       // Check if 'cur.pos()-1' and 'cur.pos()' both point to a letter,
+                                       // I am not sure, we should consider the selection
+                                       bool sel = bv->cursor().selection();
+                                       if (!sel && cur.pos() < cur.lastpos()) {
+                                               CursorSlice const & cs = cur.top();
+                                               MathData md = cs.cell();
+                                               int len = -1;
+                                               MathData::const_iterator it_end = md.end();
+                                               MathData md2;
+                                               // Start the check with one character before actual cursor position
+                                               for (MathData::const_iterator it = md.begin() + cs.pos() - 1;
+                                                   it != it_end; ++it)
+                                                       md2.push_back(*it);
+                                               docstring inp = asString(md2);
+                                               for (len = 0; (unsigned) len < inp.size() && len + cur.pos() <= cur.lastpos(); len++) {
+                                                       if (!isWordChar(inp[len]))
+                                                               break;
+                                               }
+                                               // len == 0 means previous char was a word separator
+                                               // len == 1       search starts with a word separator
+                                               // len == 2 ...   we have to skip len -1 chars
+                                               if (len > 1)
+                                                       cur.pos() = cur.pos() + len - 1;
+                                       }
+                               }
+                               opt.matchword = false;
+                       }
                         pos_len = findForwardAdv(cur, matchAdv);
+               }
                 else
                         pos_len = findBackwardsAdv(cur, matchAdv);
         } catch (exception & ex) {