Added better handling for languages and colors for advanced F&R

author Kornel Benko <kornel@lyx.org>

Fri, 5 Oct 2018 18:26:44 +0000 (20:26 +0200)

committer Jean-Marc Lasgouttes <lasgouttes@lyx.org>

Thu, 18 Jun 2020 12:39:50 +0000 (14:39 +0200)
author Kornel Benko <kornel@lyx.org>
Fri, 5 Oct 2018 18:26:44 +0000 (20:26 +0200)
committer Jean-Marc Lasgouttes <lasgouttes@lyx.org>
Thu, 18 Jun 2020 12:39:50 +0000 (14:39 +0200)
diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp

index 936ea2410e762ec9d8ad46f2f015044511c474ac..cbb9289c8a5a947ecc8b2f00255e9f728fd21a89 100644 (file)
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -52,6 +52,7 @@
  #include "support/lstrings.h"
  
  #include "support/regex.h"
+#include <map>
  
  using namespace std;
  using namespace lyx::support;
@@ -799,6 +800,7 @@ static docstring buffer_to_latex(Buffer & buffer)
         runparams.linelen = 80; //lyxrc.plaintext_linelen;
         // No side effect of file copying and image conversion
         runparams.dryrun = true;
+       runparams.for_search = true;
         pit_type const endpit = buffer.paragraphs().size();
         for (pit_type pit = 0; pit != endpit; ++pit) {
                 TeXOnePar(buffer, buffer.text(), pit, os, runparams);
@@ -843,15 +845,167 @@ static size_t identifyLeading(string const & s)
         // @TODO Support \item[text]
         // Kornel: Added textsl, textsf, textit, texttt and noun
         // + allow to seach for colored text too
-       while (regex_replace(t, t, REGEX_BOS "\\\\(emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave|subsubsection|subsection|section|subparagraph|paragraph|part)\\*?\\{", "")
+       while (regex_replace(t, t, REGEX_BOS "\\\\(((emph|noun|text(bf|sl|sf|it|tt))|((textcolor|foreignlanguage)\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)\\{", "")
                || regex_replace(t, t, REGEX_BOS "\\$", "")
                || regex_replace(t, t, REGEX_BOS "\\\\\\[ ", "")
                || regex_replace(t, t, REGEX_BOS "\\\\item ", "")
                || regex_replace(t, t, REGEX_BOS "\\\\begin\\{[a-zA-Z_]*\\*?\\} ", ""))
-               LYXERR(Debug::FIND, "  after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'");
+              ;
+       LYXERR(Debug::FIND, "  after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'");
         return s.find(t);
  }
  
+typedef map<string, bool> Features;
+
+static Features identifyFeatures(string const & s)
+{
+       static regex const feature("\\\\(([a-z]+(\\{([a-z]+)\\}|\\*)?))\\{");
+       static regex const valid("^(((emph|noun|text(bf|sl|sf|it|tt)|(textcolor|foreignlanguage)\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)$");
+       smatch sub;
+       bool displ = true;
+       Features info;
+
+       for (sregex_iterator it(s.begin(), s.end(), feature), end; it != end; ++it) {
+               sub = *it;
+               if (displ) {
+                       if (sub.str(1).compare("regexp") == 0) {
+                               displ = false;
+                               continue;
+                       }
+                       string token = sub.str(1);
+                       smatch sub2;
+                       if (regex_match(token, sub2, valid)) {
+                               info[token] = true;
+                       }
+                       else {
+                               // ignore
+                       }
+               }
+               else {
+                       if (sub.str(1).compare("endregexp") == 0) {
+                               displ = true;
+                               continue;
+                       }
+               }
+       }
+       return(info);
+}
+
+static int findclosing(string p, int start, int end)
+{
+       int skip = 0;
+       int depth = 0;
+       for (int i = start; i < end; i += 1 + skip) {
+               char c;
+               c = p[i];
+               skip = 0;
+               if (c == '\\') skip = 1;
+               else if (c == '{') depth++;
+               else if (c == '}') {
+                       if (depth == 0) return(i);
+                       --depth;
+               }
+       }
+       return(-1);
+}
+
+
+static string correctlanguagesetting(string par, bool from_regex, bool withformat)
+{
+       static string langstart = "\\foreignlanguage{";
+       static int llen = langstart.length();
+       static bool removefirstlang = false;
+       static Features regex_f;
+       static int missed = 0;
+       static bool regex_with_format = false;
+
+       int parlen = par.length();
+       string result = par;
+
+       while ((parlen > 0) && (par[parlen-1] == '\n')) {
+               parlen--;
+       }
+       if (from_regex) {
+               missed = 0;
+               if (withformat) {
+                       regex_f = identifyFeatures(par);
+                       for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
+                               string a = it->first;
+                               regex_with_format = true;
+                               // LYXERR0("Identified regex format:" << a);
+                       }
+
+               }
+       } else if (regex_with_format) {
+               Features info = identifyFeatures(par);
+               for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
+                       string a = it->first;
+                       bool b = it->second;
+                       if (b && ! info[a]) {
+                               missed++;
+                               // LYXERR0("Missed(" << missed << ", srclen = " << parlen );
+                               return("");
+                       }
+               }
+       }
+       else {
+               // LYXERR0("No regex formats");
+       }
+       if (par.compare(0, llen, langstart) == 0) {
+               if (from_regex) {
+                       removefirstlang = false;
+               }
+               int i = findclosing(par, llen, par.length());
+               if (removefirstlang) {
+                       if (i < 0)
+                               result = "";
+                       else {
+                               int closepos = findclosing(par, i+2, par.length());
+                               if (closepos > 0) {
+                                       result = par.substr(i+2, closepos-i-2) + par.substr(closepos+1, parlen - closepos-1);
+                               }
+                               else {
+                                       result = par.substr(i+2, parlen-i-2);
+                               }
+                       }
+               }
+               else if (i > 0) {
+                       // skip '}{' after the language spec
+                       int closepos = findclosing(par, i+2, par.length());
+                       size_t insertpos = par.find(langstart, i+2);
+                       if (closepos < 0) {
+                               if (insertpos == string::npos) {
+                                       // there are no closing in par, and no next lang spec
+                                       result = par.substr(0, parlen) + "}";
+                               }
+                               else {
+                                       // Add '}' at insertpos only, because closing is missing
+                                       result = par.substr(0,insertpos) + "}" + par.substr(insertpos, parlen-insertpos);
+                               }
+                       }
+                       else if ((size_t) closepos > insertpos) {
+                               // Add '}' at insertpos and remove from closepos if closepos > insertpos
+                               result = par.substr(0,insertpos) + "}" + par.substr(insertpos, closepos - insertpos) + par.substr(closepos+1, parlen -closepos-1);
+                       }
+               }
+               else {
+                       result = par;
+                       // For i == 0, it is empty language spec
+                       // and for i < 0 it is Error
+               }
+       }
+       else {
+               if (from_regex) {
+                       removefirstlang = true;
+               }
+       }
+       // remove possible \inputencoding entries
+       while (regex_replace(result, result, "\\\\inputencoding\\{[^\\}]*}", ""))
+               ;
+       // Either not found language spec,or is single and closed spec or empty
+       return(result);
+}
+
  
  // Remove trailing closure of math, macros and environments, so to catch parts of them.
  static int identifyClosing(string & t)
@@ -887,6 +1041,8 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const &
         close_wildcards = 0;
  
         size_t lead_size = 0;
+       // correct the language settings
+       par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat);
         if (opt.ignoreformat) {
                 if (!use_regexp) {
                         // if par_as_string_nolead were emty,
@@ -897,6 +1053,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const &
                 }
         } else {
                 lead_size = identifyLeading(par_as_string);
+               LYXERR(Debug::FIND, "Lead_size: " << lead_size);
                 lead_as_string = par_as_string.substr(0, lead_size);
                 par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size);
         }
@@ -985,6 +1142,7 @@ int MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) con
  
         docstring docstr = stringifyFromForSearch(opt, cur, len);
         string str = normalize(docstr, true);
+       if (str.empty()) return(-1);
         LYXERR(Debug::FIND, "Matching against     '" << lyx::to_utf8(docstr) << "'");
         LYXERR(Debug::FIND, "After normalization: '" << str << "'");
  
@@ -1108,9 +1266,10 @@ string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const
         // Kornel: Added textsl, textsf, textit, texttt and noun
         // + allow to seach for colored text too
         LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, \\*section{} macros from: " << t);
-       while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave|subsubsection|subsection|section|subparagraph|paragraph|part)(\\{\\})+", ""))
+       while (regex_replace(t, t, "\\\\((emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)(\\{\\})+", ""))
                 LYXERR(Debug::FIND, "  further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
  
+       while (regex_replace(t, t, "\\\\foreignlanguage\\{[a-z]+\\}(\\{(\\\\item )?\\})+", ""));
         // FIXME - check what preceeds the brace
         if (hack_braces) {
                 if (opt.ignoreformat)
@@ -1185,6 +1344,7 @@ docstring latexifyFromCursor(DocIterator const & cur, int len)
         runparams.linelen = 8000; //lyxrc.plaintext_linelen;
         // No side effect of file copying and image conversion
         runparams.dryrun = true;
+       runparams.for_search = true;
  
         if (cur.inTexted()) {
                 // @TODO what about searching beyond/across paragraph breaks ?
@@ -1194,6 +1354,9 @@ docstring latexifyFromCursor(DocIterator const & cur, int len)
                 TeXOnePar(buf, *cur.innerText(), cur.pit(), os, runparams,
                           string(), cur.pos(), endpos);
                 LYXERR(Debug::FIND, "Latexified text: '" << lyx::to_utf8(ods.str()) << "'");
+               string s = correctlanguagesetting(lyx::to_utf8(ods.str()), false, false);
+               LYXERR(Debug::FIND, "Latexified text: '" << s << "'");
+               return(lyx::from_utf8(s));
         } else if (cur.inMathed()) {
                 // Retrieve the math environment type, and add '$' or '$[' or others (\begin{equation}) accordingly
                 for (int s = cur.depth() - 1; s >= 0; --s) {
@@ -1259,12 +1422,13 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match)
         if (cur.pos() + len > cur.lastpos())
                 return 0;
         LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
-       while (cur.pos() + len <= cur.lastpos() && match(cur, len) == 0) {
+       while (cur.pos() + len <= cur.lastpos() && match(cur, len) <= 0) {
                 ++len;
                 LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
         }
         // Length of matched text (different from len param)
         int old_len = match(cur, len);
+       if (old_len < 0) old_len = 0;
         int new_len;
         // Greedy behaviour while matching regexps
         while ((new_len = match(cur, len + 1)) > old_len) {
@@ -1281,27 +1445,46 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
  {
         if (!cur)
                 return 0;
+       static int max_missed = 0;
         while (!theApp()->longOperationCancelled() && cur) {
                 LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
                 int match_len = match(cur, -1, false);
                 LYXERR(Debug::FIND, "match_len: " << match_len);
-               if (match_len) {
+               if (match_len > 0) {
+                       int count = 0;
+                       int match_len_zero_count = 0;
                         for (; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) {
                                 LYXERR(Debug::FIND, "Advancing cur: " << cur);
                                 int match_len2 = match(cur);
-                               LYXERR(Debug::FIND, "match_len: " << match_len2);
-                               if (match_len2) {
+                               LYXERR(Debug::FIND, "match_len2: " << match_len2);
+                               if (match_len2 > 0) {
                                         // Sometimes in finalize we understand it wasn't a match
                                         // and we need to continue the outest loop
                                         int len = findAdvFinalize(cur, match);
-                                       if (len > 0)
+                                       if (len > 0) {
                                                 return len;
+                                       }
+                               }
+                               if (match_len2 >= 0) {
+                                       count = 0;
+                                       if (match_len2 == 0)
+                                               match_len_zero_count++;
+                                       else
+                                               match_len_zero_count = 0;
+                               }
+                               else {
+                                       count++;
+                                       if (count > max_missed) max_missed = count;
+                                       if (count > 5) {
+                                               LYXERR(Debug::FIND, "match_len2_zero_count: " << match_len_zero_count << ", match_len was " << match_len);
+                                               break;
+                                       }
                                 }
                         }
                         if (!cur)
                                 return 0;
                 }
-               if (cur.pit() < cur.lastpit()) {
+               if (match_len >= 0 && cur.pit() < cur.lastpit()) {
                         LYXERR(Debug::FIND, "Advancing par: cur=" << cur);
                         cur.forwardPar();
                 } else {
@@ -1393,8 +1576,8 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match)
  docstring stringifyFromForSearch(FindAndReplaceOptions const & opt,
                                  DocIterator const & cur, int len)
  {
-       LASSERT(cur.pos() >= 0 && cur.pos() <= cur.lastpos(),
-               return docstring());
+       if (cur.pos() < 0 || cur.pos() > cur.lastpos())
+               return docstring();
         if (!opt.ignoreformat)
                 return latexifyFromCursor(cur, len);
         else
diff --git a/src/output_latex.cpp b/src/output_latex.cpp

index f73990de21ef7da699f3e9cd5f38d3fe72e5f740..96f51df5d3ecbddf8f1c13d14d19f746acea40ba 100644 (file)
--- a/src/output_latex.cpp
+++ b/src/output_latex.cpp
@@ -814,10 +814,12 @@ void TeXOnePar(Buffer const & buf,
                             || (priorpar->getDepth() == par.getDepth()
                                     && priorpar->layout() != par.layout()));
         Language const * const prev_language =
-               (priorpar && !priorpar->isPassThru())
-               ? (use_prev_env_language ? state->prev_env_language_
-                                        : priorpar->getParLanguage(bparams))
-               : outer_language;
+               runparams_in.for_search ?
+                       languages.getLanguage("ignore")
+               :(priorpar && !priorpar->isPassThru())
+                       ? (use_prev_env_language ? state->prev_env_language_
+                                               : priorpar->getParLanguage(bparams))
+                       : outer_language;
  
         bool const use_polyglossia = runparams.use_polyglossia;
         string const par_lang = use_polyglossia ?
@@ -854,7 +856,8 @@ void TeXOnePar(Buffer const & buf,
                 && runparams.local_font != 0
                 && outer_language->rightToLeft()
                 && !par_language->rightToLeft();
-       bool const localswitch = text.inset().forceLocalFontSwitch()
+       bool const localswitch = runparams_in.for_search
+                       || text.inset().forceLocalFontSwitch()
                         || (using_begin_end && text.inset().forcePlainLayout())
                         || in_polyglossia_rtl_env;
         if (localswitch) {
author	Kornel Benko <kornel@lyx.org>
	Fri, 5 Oct 2018 18:26:44 +0000 (20:26 +0200)
committer	Jean-Marc Lasgouttes <lasgouttes@lyx.org>
	Thu, 18 Jun 2020 12:39:50 +0000 (14:39 +0200)
src/lyxfind.cpp		patch \| blob \| history
src/output_latex.cpp		patch \| blob \| history