]> git.lyx.org Git - features.git/blobdiff - src/lyxfind.cpp
CALS tables: base implementation of row separators on the new code from XHTML.
[features.git] / src / lyxfind.cpp
index 6308c64de8f4696f0a6ca0f783a8e71e756c1823..53058ddace7f247fceb0448e6a75c663f1ca393e 100644 (file)
@@ -192,7 +192,7 @@ IgnoreFormats ignoreFormats;
 
 void setIgnoreFormat(string const & type, bool value, bool fromUser)
 {
-  ignoreFormats.setIgnoreFormat(type, value, fromUser);
+       ignoreFormats.setIgnoreFormat(type, value, fromUser);
 }
 
 
@@ -830,16 +830,22 @@ string string2regex(string in)
        return temp2;
 }
 
+static void buildAccentsMap();
+
 string correctRegex(string t, bool withformat)
 {
        /* Convert \backslash => \
         * and \{, \}, \[, \] => {, }, [, ]
         */
        string s("");
-       regex wordre("(\\\\)*(\\\\((backslash|mathcircumflex) ?|[\\[\\]\\{\\}]))");
+       regex wordre("(\\\\)*(\\\\(([A-Za-z]+|[\\{\\}])( |\\{\\})?|[\\[\\]\\{\\}]))");
        size_t lastpos = 0;
        smatch sub;
        bool backslashed = false;
+       if (accents.empty())
+               buildAccentsMap();
+
+       //LYXERR0("correctRegex input '" << t << "'");
        for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) {
                sub = *it;
                string replace;
@@ -849,9 +855,8 @@ string correctRegex(string t, bool withformat)
                else {
                        if (sub.str(4) == "backslash") {
                                replace = "\\";
-                               if (withformat) {
+                               {
                                        // transforms '\backslash \{' into '\{'
-                                       // and '\{' into '{'
                                        string next = t.substr(sub.position(2) + sub.str(2).length(), 2);
                                        if ((next == "\\{") || (next == "\\}")) {
                                                replace = "";
@@ -863,17 +868,40 @@ string correctRegex(string t, bool withformat)
                                replace = "^";
                        else if (backslashed) {
                                backslashed = false;
-                               if (withformat && (sub.str(3) == "{"))
-                                       replace = accents["braceleft"];
-                               else if (withformat && (sub.str(3) == "}"))
-                                       replace = accents["braceright"];
+                               if (withformat) {
+                                       if (sub.str(3) == "{")
+                                               replace = accents["braceleft"];
+                                       else if (sub.str(3) == "}")
+                                               replace = accents["braceright"];
+                                       else {
+                                               // else part should not exist
+                                               LASSERT(1, /**/);
+                                       }
+                               }
                                else {
-                                       // else part should not exist
-                                       LASSERT(1, /**/);
+                                       if (sub.str(3) == "{")
+                                               replace = "\\{";
+                                       else if (sub.str(3) == "}")
+                                               replace = "\\}";
+                                       else {
+                                               // else part should not exist
+                                               LASSERT(1, /**/);
+                                       }
+                               }
+                       }
+                       else if (sub.str(4) == "{") // transforms '\{' into '{'
+                               replace = "{";
+                       else if (sub.str(4) == "}")
+                                replace = "}";
+                       else {
+                               AccentsIterator it_ac = accents.find(sub.str(4));
+                               if (it_ac == accents.end()) {
+                                       replace = sub.str(2);
+                               }
+                               else {
+                                       replace = it_ac->second;
                                }
                        }
-                       else
-                               replace = sub.str(3);
                }
                if (lastpos < (size_t) sub.position(2))
                        s += t.substr(lastpos, sub.position(2) - lastpos);
@@ -884,6 +912,7 @@ string correctRegex(string t, bool withformat)
                return t;
        else if (lastpos < t.length())
                s += t.substr(lastpos, t.length() - lastpos);
+       //LYXERR0("correctRegex output '" << s << "'");
        return s;
 }
 
@@ -905,7 +934,7 @@ string escape_for_regex(string s, bool withformat)
                        if (lastpos == s.size())
                                break;
                }
-               size_t end_pos = s.find("\\endregexp{}}", regex_pos + 8);
+               size_t end_pos = s.find("\\endregexp", regex_pos + 8);
                result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)), withformat);
                lastpos = end_pos + 13;
        }
@@ -1008,7 +1037,7 @@ private:
         ** @todo Normalization should also expand macros, if the corresponding
         ** search option was checked.
         **/
-       string normalize(docstring const & s) const;
+       string normalize(docstring const & s, bool ignore_fomat) const;
        // normalized string to search
        string par_as_string;
        // regular expression to use for searching
@@ -1077,6 +1106,52 @@ static docstring buffer_to_latex(Buffer & buffer)
        return ods.str();
 }
 
+static string latexNamesToUtf8(docstring strIn)
+{
+       string addtmp = to_utf8(strIn);
+       static regex const rmAcc("(\\\\)*("
+                                        "\\\\([A-Za-z]+\\{.\\})"       // e.g. "ddot{A}" == sub.str(3)
+                                       "|\\\\([A-Za-z]+)( |\\{\\})?"   // e.g. "LyX", "LyX{}", "LyX " == sub.str(4)
+                                       ")"
+                               );
+       size_t lastpos = 0;
+       smatch sub;
+       string replace;
+       string add("");
+       if (accents.empty())
+               buildAccentsMap();
+       for (sregex_iterator it_add(addtmp.begin(), addtmp.end(), rmAcc), end; it_add != end; ++it_add) {
+               sub = *it_add;
+               if ((sub.position(2) - sub.position(0)) % 2 == 1) {
+                       continue;
+               }
+               else {
+                       string key;
+                       if (sub.length(3) > 0)
+                               key = sub.str(3);
+                       else
+                               key = sub.str(4);
+                       AccentsIterator it_ac = accents.find(key);
+                       if (it_ac == accents.end()) {
+                               replace = sub.str(2);
+                       }
+                       else {
+                               replace = it_ac->second;
+                       }
+               }
+               if (lastpos < (size_t) sub.position(2))
+                       add += addtmp.substr(lastpos, sub.position(2) - lastpos);
+               add += replace;
+               lastpos = sub.position(2) + sub.length(2);
+       }
+       if (lastpos == 0)
+               add = addtmp;
+       else if (addtmp.length() > lastpos)
+               add += addtmp.substr(lastpos, addtmp.length() - lastpos);
+       LYXERR(Debug::FIND, "Adding to search string: '"
+                       << add << "'");
+       return add;
+}
 
 static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions const & opt)
 {
@@ -1101,19 +1176,17 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co
                if (ignoreFormats.getNonContent()) {
                        runparams.for_searchAdv |= OutputParams::SearchNonOutput;
                }
+               string t("");
                for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) {
                        Paragraph const & par = buffer.paragraphs().at(pit);
+                       string add = latexNamesToUtf8(par.asString(pos_type(0), par.size(),
+                                                               option,
+                                                               &runparams));
                        LYXERR(Debug::FIND, "Adding to search string: '"
-                              << par.asString(pos_type(0), par.size(),
-                                              option,
-                                              &runparams)
-                              << "'");
-                       str += par.asString(pos_type(0), par.size(),
-                                           option,
-                                           &runparams);
+                               << add << "'");
+                       t += add;
                }
                // Even in ignore-format we have to remove "\text{}, \lyxmathsym{}" parts
-               string t = to_utf8(str);
                while (regex_replace(t, t, "\\\\(text|lyxmathsym|ensuremath)\\{([^\\}]*)\\}", "$2"));
                str = from_utf8(t);
        }
@@ -1547,6 +1620,8 @@ void static fillMissingUnicodesymbols()
   addAccents("\\o", getutf8(0x00f8));
   addAccents("\\textcrlambda", getutf8(0x019b));
   addAccents("\\j", getutf8(0x0237));
+  addAccents("\\textrevepsilon", getutf8(0x025c));
+  addAccents("\\textbaru", getutf8(0x0289));
   addAccents("\\textquoteleft", getutf8(0x02bb));
   addAccents("\\textGamma", getutf8(0x0393));
   addAccents("\\Gamma", getutf8(0x0393));
@@ -1895,9 +1970,13 @@ void Intervall::removeAccents()
 {
   if (accents.empty())
     buildAccentsMap();
-  static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|"
-         "cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}"
-      "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|(textquote|brace|guillemot)(left|right)|textasciicircum|mathcircumflex|sim)(?![a-zA-Z]))");
+  static regex const accre("\\\\("
+      "([\\S]|[A-Za-z]+)\\{[^\\{\\}]+\\}"
+      "|("
+        "(backslash ([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))"
+       "|[A-Za-z]+"
+      ")"
+      "(?![a-zA-Z]))");
   smatch sub;
   for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
     sub = *itacc;
@@ -2950,7 +3029,7 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual)
       // Remove the key with all parameters and following spaces
       size_t pos;
       size_t start;
-      if (interval_.par[actual._dataEnd-1] == ' ')
+      if (interval_.par[actual._dataEnd-1] == ' ' || interval_.par[actual._dataEnd-1] == '}')
         start = actual._dataEnd;
       else
         start = actual._dataEnd+1;
@@ -3386,9 +3465,9 @@ void MatchStringAdv::CreateRegexp(FindAndReplaceOptions const & opt, string rege
                                balanced--;
                                if (balanced < 0)
                                        break;
-                               }
-                               skip = 1;
                        }
+                       skip = 1;
+               }
                if (balanced != 0) {
                        regexIsValid = false;
                        regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\"";
@@ -3457,7 +3536,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
                previous_single_replace = true;
        }
        // When using regexp, braces are hacked already by escape_for_regex()
-       par_as_string = normalize(ds);
+       par_as_string = normalize(ds, opt.ignoreformat);
        open_braces = 0;
        close_wildcards = 0;
 
@@ -3505,6 +3584,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
                string lead_as_regexp;
                if (lead_size > 0) {
                        lead_as_regexp = string2regex(par_as_string.substr(0, lead_size));
+                       (void)regex_replace(par_as_string_nolead, par_as_string_nolead, "\\$$", "");
                        (void)regex_replace(par_as_string_nolead, par_as_string_nolead, "}$", "");
                        par_as_string = par_as_string_nolead;
                        LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'");
@@ -3580,7 +3660,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
 
        docstring docstr = stringifyFromForSearch(opt, cur, len);
        string str;
-       str = normalize(docstr);
+       str = normalize(docstr, opt.ignoreformat);
        if (!opt.ignoreformat) {
                str = correctlanguagesetting(str, false, !opt.ignoreformat);
                // remove closing '}' and '\n' to allow for use of '$' in regex
@@ -3774,16 +3854,22 @@ static bool simple_replace(string &t, string from, string to)
 }
 #endif
 
-string MatchStringAdv::normalize(docstring const & s) const
+string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const
 {
        string t;
        t = lyx::to_utf8(s);
        // Remove \n at begin
        while (!t.empty() && t[0] == '\n')
                t = t.substr(1);
-       // Remove \n at end
-       while (!t.empty() && t[t.size() - 1] == '\n')
-               t = t.substr(0, t.size() - 1);
+       // Remove [%]*\n at end
+       while (!t.empty() && t[t.size() - 1] == '\n') {
+               int count = 1;
+               if (!ignore_format) {
+                       while ((t.size() > 1 + count) && (t[t.size() - 1 - count] == '%'))
+                               count++;
+               }
+               t = t.substr(0, t.size() - count);
+       }
        size_t pos;
        // Handle all other '\n'
        while ((pos = t.find("\n")) != string::npos) {
@@ -3796,13 +3882,24 @@ string MatchStringAdv::normalize(docstring const & s) const
                                t.replace(pos-2, 3, "");
                        }
                }
-               else if (!isAlnumASCII(t[pos+1]) || !isAlnumASCII(t[pos-1])) {
-                       // '\n' adjacent to non-alpha-numerics, discard
-                       t.replace(pos, 1, "");
-               }
                else {
-                       // Replace all other \n with spaces
-                       t.replace(pos, 1, " ");
+                       if (!isAlnumASCII(t[pos+1]) || !isAlnumASCII(t[pos-1])) {
+                               // '\n' adjacent to non-alpha-numerics, discard
+                               t.replace(pos, 1, "");
+                       }
+                       else {
+                               // Replace all other \n with spaces
+                               t.replace(pos, 1, " ");
+                       }
+                       if (!ignore_format) {
+                               int count = 0;
+                               while ((pos > count + 1) && (t[pos - 1 -count] == '%')) {
+                                       count++;
+                               }
+                               if (count > 0) {
+                                       t.replace(pos - count, count, "");
+                               }
+                       }
                }
        }
        // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify
@@ -3847,10 +3944,10 @@ docstring stringifyFromCursor(DocIterator const & cur, int len)
                        runparams.for_searchAdv |= OutputParams::SearchNonOutput;
                }
                LYXERR(Debug::FIND, "Stringifying with cur: "
-                      << cur << ", from pos: " << cur.pos() << ", end: " << end);
-               return par.asString(cur.pos(), end,
-                       option,
-                       &runparams);
+                       << cur << ", from pos: " << cur.pos() << ", end: " << end);
+               return from_utf8(latexNamesToUtf8(par.asString(cur.pos(), end,
+                                                               option,
+                                                               &runparams)));
        } else if (cur.inMathed()) {
                CursorSlice cs = cur.top();
                MathData md = cs.cell();
@@ -3859,10 +3956,9 @@ docstring stringifyFromCursor(DocIterator const & cur, int len)
                         ? md.end()
                         : md.begin() + cs.pos() + len );
                MathData md2;
-               for (MathData::const_iterator it = md.begin() + cs.pos();
-                    it != it_end; ++it)
+               for (MathData::const_iterator it = md.begin() + cs.pos(); it != it_end; ++it)
                        md2.push_back(*it);
-               docstring s = asString(md2);
+               docstring s = from_utf8(latexNamesToUtf8(asString(md2)));
                LYXERR(Debug::FIND, "Stringified math: '" << s << "'");
                return s;
        }