CALS tables: base implementation of row separators on the new code from XHTML.

[features.git] / src / lyxfind.cpp
diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp

index 6308c64de8f4696f0a6ca0f783a8e71e756c1823..53058ddace7f247fceb0448e6a75c663f1ca393e 100644 (file)
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -192,7 +192,7 @@ IgnoreFormats ignoreFormats;
  
  void setIgnoreFormat(string const & type, bool value, bool fromUser)
  {
-  ignoreFormats.setIgnoreFormat(type, value, fromUser);
+       ignoreFormats.setIgnoreFormat(type, value, fromUser);
  }
  
  
@@ -830,16 +830,22 @@ string string2regex(string in)
         return temp2;
  }
  
+static void buildAccentsMap();
+
  string correctRegex(string t, bool withformat)
  {
         /* Convert \backslash => \
          * and \{, \}, \[, \] => {, }, [, ]
          */
         string s("");
-       regex wordre("(\\\\)*(\\\\((backslash|mathcircumflex) ?|[\\[\\]\\{\\}]))");
+       regex wordre("(\\\\)*(\\\\(([A-Za-z]+|[\\{\\}])( |\\{\\})?|[\\[\\]\\{\\}]))");
         size_t lastpos = 0;
         smatch sub;
         bool backslashed = false;
+       if (accents.empty())
+               buildAccentsMap();
+
+       //LYXERR0("correctRegex input '" << t << "'");
         for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) {
                 sub = *it;
                 string replace;
@@ -849,9 +855,8 @@ string correctRegex(string t, bool withformat)
                 else {
                         if (sub.str(4) == "backslash") {
                                 replace = "\\";
-                               if (withformat) {
+                               {
                                         // transforms '\backslash \{' into '\{'
-                                       // and '\{' into '{'
                                         string next = t.substr(sub.position(2) + sub.str(2).length(), 2);
                                         if ((next == "\\{") || (next == "\\}")) {
                                                 replace = "";
@@ -863,17 +868,40 @@ string correctRegex(string t, bool withformat)
                                 replace = "^";
                         else if (backslashed) {
                                 backslashed = false;
-                               if (withformat && (sub.str(3) == "{"))
-                                       replace = accents["braceleft"];
-                               else if (withformat && (sub.str(3) == "}"))
-                                       replace = accents["braceright"];
+                               if (withformat) {
+                                       if (sub.str(3) == "{")
+                                               replace = accents["braceleft"];
+                                       else if (sub.str(3) == "}")
+                                               replace = accents["braceright"];
+                                       else {
+                                               // else part should not exist
+                                               LASSERT(1, /**/);
+                                       }
+                               }
                                 else {
-                                       // else part should not exist
-                                       LASSERT(1, /**/);
+                                       if (sub.str(3) == "{")
+                                               replace = "\\{";
+                                       else if (sub.str(3) == "}")
+                                               replace = "\\}";
+                                       else {
+                                               // else part should not exist
+                                               LASSERT(1, /**/);
+                                       }
+                               }
+                       }
+                       else if (sub.str(4) == "{") // transforms '\{' into '{'
+                               replace = "{";
+                       else if (sub.str(4) == "}")
+                                replace = "}";
+                       else {
+                               AccentsIterator it_ac = accents.find(sub.str(4));
+                               if (it_ac == accents.end()) {
+                                       replace = sub.str(2);
+                               }
+                               else {
+                                       replace = it_ac->second;
                                 }
                         }
-                       else
-                               replace = sub.str(3);
                 }
                 if (lastpos < (size_t) sub.position(2))
                         s += t.substr(lastpos, sub.position(2) - lastpos);
@@ -884,6 +912,7 @@ string correctRegex(string t, bool withformat)
                 return t;
         else if (lastpos < t.length())
                 s += t.substr(lastpos, t.length() - lastpos);
+       //LYXERR0("correctRegex output '" << s << "'");
         return s;
  }
  
@@ -905,7 +934,7 @@ string escape_for_regex(string s, bool withformat)
                         if (lastpos == s.size())
                                 break;
                 }
-               size_t end_pos = s.find("\\endregexp{}}", regex_pos + 8);
+               size_t end_pos = s.find("\\endregexp", regex_pos + 8);
                 result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)), withformat);
                 lastpos = end_pos + 13;
         }
@@ -1008,7 +1037,7 @@ private:
          ** @todo Normalization should also expand macros, if the corresponding
          ** search option was checked.
          **/
-       string normalize(docstring const & s) const;
+       string normalize(docstring const & s, bool ignore_fomat) const;
         // normalized string to search
         string par_as_string;
         // regular expression to use for searching
@@ -1077,6 +1106,52 @@ static docstring buffer_to_latex(Buffer & buffer)
         return ods.str();
  }
  
+static string latexNamesToUtf8(docstring strIn)
+{
+       string addtmp = to_utf8(strIn);
+       static regex const rmAcc("(\\\\)*("
+                                        "\\\\([A-Za-z]+\\{.\\})"       // e.g. "ddot{A}" == sub.str(3)
+                                       "|\\\\([A-Za-z]+)( |\\{\\})?"   // e.g. "LyX", "LyX{}", "LyX " == sub.str(4)
+                                       ")"
+                               );
+       size_t lastpos = 0;
+       smatch sub;
+       string replace;
+       string add("");
+       if (accents.empty())
+               buildAccentsMap();
+       for (sregex_iterator it_add(addtmp.begin(), addtmp.end(), rmAcc), end; it_add != end; ++it_add) {
+               sub = *it_add;
+               if ((sub.position(2) - sub.position(0)) % 2 == 1) {
+                       continue;
+               }
+               else {
+                       string key;
+                       if (sub.length(3) > 0)
+                               key = sub.str(3);
+                       else
+                               key = sub.str(4);
+                       AccentsIterator it_ac = accents.find(key);
+                       if (it_ac == accents.end()) {
+                               replace = sub.str(2);
+                       }
+                       else {
+                               replace = it_ac->second;
+                       }
+               }
+               if (lastpos < (size_t) sub.position(2))
+                       add += addtmp.substr(lastpos, sub.position(2) - lastpos);
+               add += replace;
+               lastpos = sub.position(2) + sub.length(2);
+       }
+       if (lastpos == 0)
+               add = addtmp;
+       else if (addtmp.length() > lastpos)
+               add += addtmp.substr(lastpos, addtmp.length() - lastpos);
+       LYXERR(Debug::FIND, "Adding to search string: '"
+                       << add << "'");
+       return add;
+}
  
  static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions const & opt)
  {
@@ -1101,19 +1176,17 @@ static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions co
                 if (ignoreFormats.getNonContent()) {
                         runparams.for_searchAdv |= OutputParams::SearchNonOutput;
                 }
+               string t("");
                 for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) {
                         Paragraph const & par = buffer.paragraphs().at(pit);
+                       string add = latexNamesToUtf8(par.asString(pos_type(0), par.size(),
+                                                               option,
+                                                               &runparams));
                         LYXERR(Debug::FIND, "Adding to search string: '"
-                              << par.asString(pos_type(0), par.size(),
-                                              option,
-                                              &runparams)
-                              << "'");
-                       str += par.asString(pos_type(0), par.size(),
-                                           option,
-                                           &runparams);
+                               << add << "'");
+                       t += add;
                 }
                 // Even in ignore-format we have to remove "\text{}, \lyxmathsym{}" parts
-               string t = to_utf8(str);
                 while (regex_replace(t, t, "\\\\(text|lyxmathsym|ensuremath)\\{([^\\}]*)\\}", "$2"));
                 str = from_utf8(t);
         }
@@ -1547,6 +1620,8 @@ void static fillMissingUnicodesymbols()
    addAccents("\\o", getutf8(0x00f8));
    addAccents("\\textcrlambda", getutf8(0x019b));
    addAccents("\\j", getutf8(0x0237));
+  addAccents("\\textrevepsilon", getutf8(0x025c));
+  addAccents("\\textbaru", getutf8(0x0289));
    addAccents("\\textquoteleft", getutf8(0x02bb));
    addAccents("\\textGamma", getutf8(0x0393));
    addAccents("\\Gamma", getutf8(0x0393));
@@ -1895,9 +1970,13 @@ void Intervall::removeAccents()
  {
    if (accents.empty())
      buildAccentsMap();
-  static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|"
-         "cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}"
-      "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|(textquote|brace|guillemot)(left|right)|textasciicircum|mathcircumflex|sim)(?![a-zA-Z]))");
+  static regex const accre("\\\\("
+      "([\\S]|[A-Za-z]+)\\{[^\\{\\}]+\\}"
+      "|("
+        "(backslash ([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))"
+       "|[A-Za-z]+"
+      ")"
+      "(?![a-zA-Z]))");
    smatch sub;
    for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
      sub = *itacc;
@@ -2950,7 +3029,7 @@ int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual)
        // Remove the key with all parameters and following spaces
        size_t pos;
        size_t start;
-      if (interval_.par[actual._dataEnd-1] == ' ')
+      if (interval_.par[actual._dataEnd-1] == ' ' || interval_.par[actual._dataEnd-1] == '}')
          start = actual._dataEnd;
        else
          start = actual._dataEnd+1;
@@ -3386,9 +3465,9 @@ void MatchStringAdv::CreateRegexp(FindAndReplaceOptions const & opt, string rege
                                 balanced--;
                                 if (balanced < 0)
                                         break;
-                               }
-                               skip = 1;
                         }
+                       skip = 1;
+               }
                 if (balanced != 0) {
                         regexIsValid = false;
                         regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\"";
@@ -3457,7 +3536,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
                 previous_single_replace = true;
         }
         // When using regexp, braces are hacked already by escape_for_regex()
-       par_as_string = normalize(ds);
+       par_as_string = normalize(ds, opt.ignoreformat);
         open_braces = 0;
         close_wildcards = 0;
  
@@ -3505,6 +3584,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
                 string lead_as_regexp;
                 if (lead_size > 0) {
                         lead_as_regexp = string2regex(par_as_string.substr(0, lead_size));
+                       (void)regex_replace(par_as_string_nolead, par_as_string_nolead, "\\$$", "");
                         (void)regex_replace(par_as_string_nolead, par_as_string_nolead, "}$", "");
                         par_as_string = par_as_string_nolead;
                         LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'");
@@ -3580,7 +3660,7 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be
  
         docstring docstr = stringifyFromForSearch(opt, cur, len);
         string str;
-       str = normalize(docstr);
+       str = normalize(docstr, opt.ignoreformat);
         if (!opt.ignoreformat) {
                 str = correctlanguagesetting(str, false, !opt.ignoreformat);
                 // remove closing '}' and '\n' to allow for use of '$' in regex
@@ -3774,16 +3854,22 @@ static bool simple_replace(string &t, string from, string to)
  }
  #endif
  
-string MatchStringAdv::normalize(docstring const & s) const
+string MatchStringAdv::normalize(docstring const & s, bool ignore_format) const
  {
         string t;
         t = lyx::to_utf8(s);
         // Remove \n at begin
         while (!t.empty() && t[0] == '\n')
                 t = t.substr(1);
-       // Remove \n at end
-       while (!t.empty() && t[t.size() - 1] == '\n')
-               t = t.substr(0, t.size() - 1);
+       // Remove [%]*\n at end
+       while (!t.empty() && t[t.size() - 1] == '\n') {
+               int count = 1;
+               if (!ignore_format) {
+                       while ((t.size() > 1 + count) && (t[t.size() - 1 - count] == '%'))
+                               count++;
+               }
+               t = t.substr(0, t.size() - count);
+       }
         size_t pos;
         // Handle all other '\n'
         while ((pos = t.find("\n")) != string::npos) {
@@ -3796,13 +3882,24 @@ string MatchStringAdv::normalize(docstring const & s) const
                                 t.replace(pos-2, 3, "");
                         }
                 }
-               else if (!isAlnumASCII(t[pos+1]) || !isAlnumASCII(t[pos-1])) {
-                       // '\n' adjacent to non-alpha-numerics, discard
-                       t.replace(pos, 1, "");
-               }
                 else {
-                       // Replace all other \n with spaces
-                       t.replace(pos, 1, " ");
+                       if (!isAlnumASCII(t[pos+1]) || !isAlnumASCII(t[pos-1])) {
+                               // '\n' adjacent to non-alpha-numerics, discard
+                               t.replace(pos, 1, "");
+                       }
+                       else {
+                               // Replace all other \n with spaces
+                               t.replace(pos, 1, " ");
+                       }
+                       if (!ignore_format) {
+                               int count = 0;
+                               while ((pos > count + 1) && (t[pos - 1 -count] == '%')) {
+                                       count++;
+                               }
+                               if (count > 0) {
+                                       t.replace(pos - count, count, "");
+                               }
+                       }
                 }
         }
         // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify
@@ -3847,10 +3944,10 @@ docstring stringifyFromCursor(DocIterator const & cur, int len)
                         runparams.for_searchAdv |= OutputParams::SearchNonOutput;
                 }
                 LYXERR(Debug::FIND, "Stringifying with cur: "
-                      << cur << ", from pos: " << cur.pos() << ", end: " << end);
-               return par.asString(cur.pos(), end,
-                       option,
-                       &runparams);
+                       << cur << ", from pos: " << cur.pos() << ", end: " << end);
+               return from_utf8(latexNamesToUtf8(par.asString(cur.pos(), end,
+                                                               option,
+                                                               &runparams)));
         } else if (cur.inMathed()) {
                 CursorSlice cs = cur.top();
                 MathData md = cs.cell();
@@ -3859,10 +3956,9 @@ docstring stringifyFromCursor(DocIterator const & cur, int len)
                          ? md.end()
                          : md.begin() + cs.pos() + len );
                 MathData md2;
-               for (MathData::const_iterator it = md.begin() + cs.pos();
-                    it != it_end; ++it)
+               for (MathData::const_iterator it = md.begin() + cs.pos(); it != it_end; ++it)
                         md2.push_back(*it);
-               docstring s = asString(md2);
+               docstring s = from_utf8(latexNamesToUtf8(asString(md2)));
                 LYXERR(Debug::FIND, "Stringified math: '" << s << "'");
                 return s;
         }