]> git.lyx.org Git - features.git/commitdiff
Added better handling for languages and colors for advanced F&R
authorKornel Benko <kornel@lyx.org>
Fri, 5 Oct 2018 18:26:44 +0000 (20:26 +0200)
committerJean-Marc Lasgouttes <lasgouttes@lyx.org>
Thu, 18 Jun 2020 12:39:50 +0000 (14:39 +0200)
The change is significant if the search format is not disabled.
We try to analyze the pattern string first to get needed features
for the search.
We try to analyse the searched string and if it does not
contain all expected featers (color, language, char style, char decoration)

Still some problems though

src/lyxfind.cpp
src/output_latex.cpp

index 936ea2410e762ec9d8ad46f2f015044511c474ac..cbb9289c8a5a947ecc8b2f00255e9f728fd21a89 100644 (file)
@@ -52,6 +52,7 @@
 #include "support/lstrings.h"
 
 #include "support/regex.h"
+#include <map>
 
 using namespace std;
 using namespace lyx::support;
@@ -799,6 +800,7 @@ static docstring buffer_to_latex(Buffer & buffer)
        runparams.linelen = 80; //lyxrc.plaintext_linelen;
        // No side effect of file copying and image conversion
        runparams.dryrun = true;
+       runparams.for_search = true;
        pit_type const endpit = buffer.paragraphs().size();
        for (pit_type pit = 0; pit != endpit; ++pit) {
                TeXOnePar(buffer, buffer.text(), pit, os, runparams);
@@ -843,15 +845,167 @@ static size_t identifyLeading(string const & s)
        // @TODO Support \item[text]
        // Kornel: Added textsl, textsf, textit, texttt and noun
        // + allow to seach for colored text too
-       while (regex_replace(t, t, REGEX_BOS "\\\\(emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave|subsubsection|subsection|section|subparagraph|paragraph|part)\\*?\\{", "")
+       while (regex_replace(t, t, REGEX_BOS "\\\\(((emph|noun|text(bf|sl|sf|it|tt))|((textcolor|foreignlanguage)\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)\\{", "")
               || regex_replace(t, t, REGEX_BOS "\\$", "")
               || regex_replace(t, t, REGEX_BOS "\\\\\\[ ", "")
               || regex_replace(t, t, REGEX_BOS "\\\\item ", "")
               || regex_replace(t, t, REGEX_BOS "\\\\begin\\{[a-zA-Z_]*\\*?\\} ", ""))
-               LYXERR(Debug::FIND, "  after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'");
+              ;
+       LYXERR(Debug::FIND, "  after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'");
        return s.find(t);
 }
 
+typedef map<string, bool> Features;
+
+static Features identifyFeatures(string const & s)
+{
+       static regex const feature("\\\\(([a-z]+(\\{([a-z]+)\\}|\\*)?))\\{");
+       static regex const valid("^(((emph|noun|text(bf|sl|sf|it|tt)|(textcolor|foreignlanguage)\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)$");
+       smatch sub;
+       bool displ = true;
+       Features info;
+
+       for (sregex_iterator it(s.begin(), s.end(), feature), end; it != end; ++it) {
+               sub = *it;
+               if (displ) {
+                       if (sub.str(1).compare("regexp") == 0) {
+                               displ = false;
+                               continue;
+                       }
+                       string token = sub.str(1);
+                       smatch sub2;
+                       if (regex_match(token, sub2, valid)) {
+                               info[token] = true;
+                       }
+                       else {
+                               // ignore
+                       }
+               }
+               else {
+                       if (sub.str(1).compare("endregexp") == 0) {
+                               displ = true;
+                               continue;
+                       }
+               }
+       }
+       return(info);
+}
+
+static int findclosing(string p, int start, int end)
+{
+       int skip = 0;
+       int depth = 0;
+       for (int i = start; i < end; i += 1 + skip) {
+               char c;
+               c = p[i];
+               skip = 0;
+               if (c == '\\') skip = 1;
+               else if (c == '{') depth++;
+               else if (c == '}') {
+                       if (depth == 0) return(i);
+                       --depth;
+               }
+       }
+       return(-1);
+}
+
+
+static string correctlanguagesetting(string par, bool from_regex, bool withformat)
+{
+       static string langstart = "\\foreignlanguage{";
+       static int llen = langstart.length();
+       static bool removefirstlang = false;
+       static Features regex_f;
+       static int missed = 0;
+       static bool regex_with_format = false;
+
+       int parlen = par.length();
+       string result = par;
+
+       while ((parlen > 0) && (par[parlen-1] == '\n')) {
+               parlen--;
+       }
+       if (from_regex) {
+               missed = 0;
+               if (withformat) {
+                       regex_f = identifyFeatures(par);
+                       for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
+                               string a = it->first;
+                               regex_with_format = true;
+                               // LYXERR0("Identified regex format:" << a);
+                       }
+
+               }
+       } else if (regex_with_format) {
+               Features info = identifyFeatures(par);
+               for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
+                       string a = it->first;
+                       bool b = it->second;
+                       if (b && ! info[a]) {
+                               missed++;
+                               // LYXERR0("Missed(" << missed << ", srclen = " << parlen );
+                               return("");
+                       }
+               }
+       }
+       else {
+               // LYXERR0("No regex formats");
+       }
+       if (par.compare(0, llen, langstart) == 0) {
+               if (from_regex) {
+                       removefirstlang = false;
+               }
+               int i = findclosing(par, llen, par.length());
+               if (removefirstlang) {
+                       if (i < 0)
+                               result = "";
+                       else {
+                               int closepos = findclosing(par, i+2, par.length());
+                               if (closepos > 0) {
+                                       result = par.substr(i+2, closepos-i-2) + par.substr(closepos+1, parlen - closepos-1);
+                               }
+                               else {
+                                       result = par.substr(i+2, parlen-i-2);
+                               }
+                       }
+               }
+               else if (i > 0) {
+                       // skip '}{' after the language spec
+                       int closepos = findclosing(par, i+2, par.length());
+                       size_t insertpos = par.find(langstart, i+2);
+                       if (closepos < 0) {
+                               if (insertpos == string::npos) {
+                                       // there are no closing in par, and no next lang spec
+                                       result = par.substr(0, parlen) + "}";
+                               }
+                               else {
+                                       // Add '}' at insertpos only, because closing is missing
+                                       result = par.substr(0,insertpos) + "}" + par.substr(insertpos, parlen-insertpos);
+                               }
+                       }
+                       else if ((size_t) closepos > insertpos) {
+                               // Add '}' at insertpos and remove from closepos if closepos > insertpos
+                               result = par.substr(0,insertpos) + "}" + par.substr(insertpos, closepos - insertpos) + par.substr(closepos+1, parlen -closepos-1);
+                       }
+               }
+               else {
+                       result = par;
+                       // For i == 0, it is empty language spec
+                       // and for i < 0 it is Error
+               }
+       }
+       else {
+               if (from_regex) {
+                       removefirstlang = true;
+               }
+       }
+       // remove possible \inputencoding entries
+       while (regex_replace(result, result, "\\\\inputencoding\\{[^\\}]*}", ""))
+               ;
+       // Either not found language spec,or is single and closed spec or empty
+       return(result);
+}
+
 
 // Remove trailing closure of math, macros and environments, so to catch parts of them.
 static int identifyClosing(string & t)
@@ -887,6 +1041,8 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const &
        close_wildcards = 0;
 
        size_t lead_size = 0;
+       // correct the language settings
+       par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat);
        if (opt.ignoreformat) {
                if (!use_regexp) {
                        // if par_as_string_nolead were emty,
@@ -897,6 +1053,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const &
                }
        } else {
                lead_size = identifyLeading(par_as_string);
+               LYXERR(Debug::FIND, "Lead_size: " << lead_size);
                lead_as_string = par_as_string.substr(0, lead_size);
                par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size);
        }
@@ -985,6 +1142,7 @@ int MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) con
 
        docstring docstr = stringifyFromForSearch(opt, cur, len);
        string str = normalize(docstr, true);
+       if (str.empty()) return(-1);
        LYXERR(Debug::FIND, "Matching against     '" << lyx::to_utf8(docstr) << "'");
        LYXERR(Debug::FIND, "After normalization: '" << str << "'");
 
@@ -1108,9 +1266,10 @@ string MatchStringAdv::normalize(docstring const & s, bool hack_braces) const
        // Kornel: Added textsl, textsf, textit, texttt and noun
        // + allow to seach for colored text too
        LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, \\*section{} macros from: " << t);
-       while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave|subsubsection|subsection|section|subparagraph|paragraph|part)(\\{\\})+", ""))
+       while (regex_replace(t, t, "\\\\((emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)(\\{\\})+", ""))
                LYXERR(Debug::FIND, "  further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
 
+       while (regex_replace(t, t, "\\\\foreignlanguage\\{[a-z]+\\}(\\{(\\\\item )?\\})+", ""));
        // FIXME - check what preceeds the brace
        if (hack_braces) {
                if (opt.ignoreformat)
@@ -1185,6 +1344,7 @@ docstring latexifyFromCursor(DocIterator const & cur, int len)
        runparams.linelen = 8000; //lyxrc.plaintext_linelen;
        // No side effect of file copying and image conversion
        runparams.dryrun = true;
+       runparams.for_search = true;
 
        if (cur.inTexted()) {
                // @TODO what about searching beyond/across paragraph breaks ?
@@ -1194,6 +1354,9 @@ docstring latexifyFromCursor(DocIterator const & cur, int len)
                TeXOnePar(buf, *cur.innerText(), cur.pit(), os, runparams,
                          string(), cur.pos(), endpos);
                LYXERR(Debug::FIND, "Latexified text: '" << lyx::to_utf8(ods.str()) << "'");
+               string s = correctlanguagesetting(lyx::to_utf8(ods.str()), false, false);
+               LYXERR(Debug::FIND, "Latexified text: '" << s << "'");
+               return(lyx::from_utf8(s));
        } else if (cur.inMathed()) {
                // Retrieve the math environment type, and add '$' or '$[' or others (\begin{equation}) accordingly
                for (int s = cur.depth() - 1; s >= 0; --s) {
@@ -1259,12 +1422,13 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv const & match)
        if (cur.pos() + len > cur.lastpos())
                return 0;
        LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
-       while (cur.pos() + len <= cur.lastpos() && match(cur, len) == 0) {
+       while (cur.pos() + len <= cur.lastpos() && match(cur, len) <= 0) {
                ++len;
                LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
        }
        // Length of matched text (different from len param)
        int old_len = match(cur, len);
+       if (old_len < 0) old_len = 0;
        int new_len;
        // Greedy behaviour while matching regexps
        while ((new_len = match(cur, len + 1)) > old_len) {
@@ -1281,27 +1445,46 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
 {
        if (!cur)
                return 0;
+       static int max_missed = 0;
        while (!theApp()->longOperationCancelled() && cur) {
                LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
                int match_len = match(cur, -1, false);
                LYXERR(Debug::FIND, "match_len: " << match_len);
-               if (match_len) {
+               if (match_len > 0) {
+                       int count = 0;
+                       int match_len_zero_count = 0;
                        for (; !theApp()->longOperationCancelled() && cur; cur.forwardPos()) {
                                LYXERR(Debug::FIND, "Advancing cur: " << cur);
                                int match_len2 = match(cur);
-                               LYXERR(Debug::FIND, "match_len: " << match_len2);
-                               if (match_len2) {
+                               LYXERR(Debug::FIND, "match_len2: " << match_len2);
+                               if (match_len2 > 0) {
                                        // Sometimes in finalize we understand it wasn't a match
                                        // and we need to continue the outest loop
                                        int len = findAdvFinalize(cur, match);
-                                       if (len > 0)
+                                       if (len > 0) {
                                                return len;
+                                       }
+                               }
+                               if (match_len2 >= 0) {
+                                       count = 0;
+                                       if (match_len2 == 0)
+                                               match_len_zero_count++;
+                                       else
+                                               match_len_zero_count = 0;
+                               }
+                               else {
+                                       count++;
+                                       if (count > max_missed) max_missed = count;
+                                       if (count > 5) {
+                                               LYXERR(Debug::FIND, "match_len2_zero_count: " << match_len_zero_count << ", match_len was " << match_len);
+                                               break;
+                                       }
                                }
                        }
                        if (!cur)
                                return 0;
                }
-               if (cur.pit() < cur.lastpit()) {
+               if (match_len >= 0 && cur.pit() < cur.lastpit()) {
                        LYXERR(Debug::FIND, "Advancing par: cur=" << cur);
                        cur.forwardPar();
                } else {
@@ -1393,8 +1576,8 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match)
 docstring stringifyFromForSearch(FindAndReplaceOptions const & opt,
                                 DocIterator const & cur, int len)
 {
-       LASSERT(cur.pos() >= 0 && cur.pos() <= cur.lastpos(),
-               return docstring());
+       if (cur.pos() < 0 || cur.pos() > cur.lastpos())
+               return docstring();
        if (!opt.ignoreformat)
                return latexifyFromCursor(cur, len);
        else
index f73990de21ef7da699f3e9cd5f38d3fe72e5f740..96f51df5d3ecbddf8f1c13d14d19f746acea40ba 100644 (file)
@@ -814,10 +814,12 @@ void TeXOnePar(Buffer const & buf,
                            || (priorpar->getDepth() == par.getDepth()
                                    && priorpar->layout() != par.layout()));
        Language const * const prev_language =
-               (priorpar && !priorpar->isPassThru())
-               ? (use_prev_env_language ? state->prev_env_language_
-                                        : priorpar->getParLanguage(bparams))
-               : outer_language;
+               runparams_in.for_search ?
+                       languages.getLanguage("ignore")
+               :(priorpar && !priorpar->isPassThru())
+                       ? (use_prev_env_language ? state->prev_env_language_
+                                               : priorpar->getParLanguage(bparams))
+                       : outer_language;
 
        bool const use_polyglossia = runparams.use_polyglossia;
        string const par_lang = use_polyglossia ?
@@ -854,7 +856,8 @@ void TeXOnePar(Buffer const & buf,
                && runparams.local_font != 0
                && outer_language->rightToLeft()
                && !par_language->rightToLeft();
-       bool const localswitch = text.inset().forceLocalFontSwitch()
+       bool const localswitch = runparams_in.for_search
+                       || text.inset().forceLocalFontSwitch()
                        || (using_begin_end && text.inset().forcePlainLayout())
                        || in_polyglossia_rtl_env;
        if (localswitch) {