Fix bug #5408: tex2lyx cannot handle verbatim code

author Jean-Marc Lasgouttes <lasgouttes@lyx.org>

Fri, 25 Jan 2013 11:48:52 +0000 (12:48 +0100)

committer Jean-Marc Lasgouttes <lasgouttes@lyx.org>

Mon, 4 Feb 2013 09:25:58 +0000 (10:25 +0100)
author Jean-Marc Lasgouttes <lasgouttes@lyx.org>
Fri, 25 Jan 2013 11:48:52 +0000 (12:48 +0100)
committer Jean-Marc Lasgouttes <lasgouttes@lyx.org>
Mon, 4 Feb 2013 09:25:58 +0000 (10:25 +0100)
diff --git a/lib/layouts/stdlayouts.inc b/lib/layouts/stdlayouts.inc

index 3ec97bd48e74b32763ff87cb02fba30c7da5e3d2..a767d74780290155802f9b48f76222a655e7c126 100644 (file)
--- a/lib/layouts/stdlayouts.inc
+++ b/lib/layouts/stdlayouts.inc
@@ -79,6 +79,7 @@ Style Verbatim
         ParbreakIsNewline       1
         FreeSpacing             1
         PassThru                1
+       KeepEmpty               1
         NewLine                 0
         ParSkip                 0.4
         TopSep                  0.7
diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp

index 6f7dbf31e916704483ece7e1b384f0d227083999..e2af5f21bbb9139a8fe58a00b69e1a821d01eadb 100644 (file)
--- a/src/tex2lyx/Parser.cpp
+++ b/src/tex2lyx/Parser.cpp
@@ -22,39 +22,6 @@ namespace lyx {
  
  namespace {
  
-CatCode theCatcode[256];
-
-void catInit()
-{
-       static bool init_done = false;
-       if (init_done)
-               return;
-       init_done = true;
-
-       fill(theCatcode, theCatcode + 256, catOther);
-       fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
-       fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
-
-       theCatcode[int('\\')] = catEscape;
-       theCatcode[int('{')]  = catBegin;
-       theCatcode[int('}')]  = catEnd;
-       theCatcode[int('$')]  = catMath;
-       theCatcode[int('&')]  = catAlign;
-       theCatcode[int('\n')] = catNewline;
-       theCatcode[int('#')]  = catParameter;
-       theCatcode[int('^')]  = catSuper;
-       theCatcode[int('_')]  = catSub;
-       theCatcode[0x7f]      = catIgnore;
-       theCatcode[int(' ')]  = catSpace;
-       theCatcode[int('\t')] = catSpace;
-       theCatcode[int('\r')] = catNewline;
-       theCatcode[int('~')]  = catActive;
-       theCatcode[int('%')]  = catComment;
-
-       // This is wrong!
-       theCatcode[int('@')]  = catLetter;
-}
-
  /*!
   * Translate a line ending to '\n'.
   * \p c must have catcode catNewline, and it must be the last character read
@@ -79,16 +46,8 @@ char_type getNewline(idocstream & is, char_type c)
         return c;
  }
  
-CatCode catcode(char_type c)
-{
-       if (c < 256)
-               return theCatcode[(unsigned char)c];
-       return catOther;
  }
  
-}
-
-
  //
  // Token
  //
@@ -158,7 +117,8 @@ void debugToken(std::ostream & os, Token const & t, unsigned int flags)
  
  
  Parser::Parser(idocstream & is)
-       : lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8")
+       : lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8"),
+         theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES)
  {
  }
  
@@ -166,7 +126,8 @@ Parser::Parser(idocstream & is)
  Parser::Parser(string const & s)
         : lineno_(0), pos_(0),
           iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
-         encoding_iconv_("UTF-8")
+         encoding_iconv_("UTF-8"),
+         theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES)
  {
  }
  
@@ -192,6 +153,57 @@ void Parser::setEncoding(std::string const & e, int const & p)
  }
  
  
+void Parser::catInit()
+{
+       if (curr_cat_ == theCatcodesType_)
+               return;
+       curr_cat_ = theCatcodesType_;
+
+       fill(theCatcode_, theCatcode_ + 256, catOther);
+       fill(theCatcode_ + 'a', theCatcode_ + 'z' + 1, catLetter);
+       fill(theCatcode_ + 'A', theCatcode_ + 'Z' + 1, catLetter);
+       // This is wrong!
+       theCatcode_[int('@')]  = catLetter;
+
+       if (theCatcodesType_ == NORMAL_CATCODES) {
+               theCatcode_[int('\\')] = catEscape;
+               theCatcode_[int('{')]  = catBegin;
+               theCatcode_[int('}')]  = catEnd;
+               theCatcode_[int('$')]  = catMath;
+               theCatcode_[int('&')]  = catAlign;
+               theCatcode_[int('\n')] = catNewline;
+               theCatcode_[int('#')]  = catParameter;
+               theCatcode_[int('^')]  = catSuper;
+               theCatcode_[int('_')]  = catSub;
+               theCatcode_[0x7f]      = catIgnore;
+               theCatcode_[int(' ')]  = catSpace;
+               theCatcode_[int('\t')] = catSpace;
+               theCatcode_[int('\r')] = catNewline;
+               theCatcode_[int('~')]  = catActive;
+               theCatcode_[int('%')]  = catComment;
+       }
+}
+
+CatCode Parser::catcode(char_type c) const
+{
+       if (c < 256)
+               return theCatcode_[(unsigned char)c];
+       return catOther;
+}
+
+
+void Parser::setCatcode(char c, CatCode cat)
+{
+       theCatcode_[(unsigned char)c] = cat;
+}
+
+
+void Parser::setCatcodes(cat_type t)
+{
+       theCatcodesType_ = t;
+}
+
+
  void Parser::setEncoding(std::string const & e)
  {
         //cerr << "setting encoding to " << e << std::endl;
@@ -472,7 +484,7 @@ string Parser::getFullParentheseArg()
  }
  
  
-string const Parser::verbatimEnvironment(string const & name)
+string const Parser::ertEnvironment(string const & name)
  {
         if (!good())
                 return string();
@@ -485,7 +497,7 @@ string const Parser::verbatimEnvironment(string const & name)
                 } else if (t.asInput() == "\\begin") {
                         string const env = getArg('{', '}');
                         os << "\\begin{" << env << '}'
-                          << verbatimEnvironment(env)
+                          << ertEnvironment(env)
                            << "\\end{" << env << '}';
                 } else if (t.asInput() == "\\end") {
                         string const end = getArg('{', '}');
@@ -545,6 +557,34 @@ string const Parser::plainCommand(char left, char right, string const & name)
  }
  
  
+string const Parser::verbatimStuff(string const & end_string)
+{
+       if (!good())
+               return string();
+
+       ostringstream oss;
+       size_t match_index = 0;
+       setCatcodes(VERBATIM_CATCODES);
+       for (Token t = get_token(); good(); t = get_token()) {
+               // FIXME t.asInput() might be longer than we need ?
+               if (t.asInput() == end_string.substr(match_index,
+                                                    t.asInput().length())) {
+                       match_index += t.asInput().length();
+                       if (match_index >= end_string.length())
+                               break;
+               } else if (match_index) {
+                       oss << end_string.substr(0, match_index) << t.asInput();
+                       match_index = 0;
+               } else
+                       oss << t.asInput();
+       }
+       setCatcodes(NORMAL_CATCODES);
+       if (!good())
+               cerr << "unexpected end of input" << endl;
+       return oss.str();
+}
+
+
  void Parser::tokenize_one()
  {
         catInit();
@@ -687,16 +727,4 @@ void Parser::reset()
  }
  
  
-void Parser::setCatCode(char c, CatCode cat)
-{
-       theCatcode[(unsigned char)c] = cat;
-}
-
-
-CatCode Parser::getCatCode(char c) const
-{
-       return theCatcode[(unsigned char)c];
-}
-
-
  } // namespace lyx
diff --git a/src/tex2lyx/Parser.h b/src/tex2lyx/Parser.h

index 18a08ebe86a3c5c548c1ff621e16c5aadb3bfe2d..558b5560bafcd7136ee8939b5a065722049a32f1 100644 (file)
--- a/src/tex2lyx/Parser.h
+++ b/src/tex2lyx/Parser.h
@@ -46,6 +46,12 @@ enum CatCode {
         catInvalid     // 15   <delete>
  };
  
+enum cat_type {
+       NORMAL_CATCODES,
+       VERBATIM_CATCODES,
+       UNDECIDED_CATCODES
+};
+
  
  enum {
         FLAG_BRACE_LAST = 1 << 1,  //  last closing brace ends the parsing
@@ -135,6 +141,13 @@ public:
         ///
         ~Parser();
  
+       ///
+       CatCode catcode(char_type c) const;
+       ///
+       void setCatcode(char c, CatCode cat);
+       /// set parser to normal or verbatim mode
+       void setCatcodes(cat_type t);
+
         /// change the iconv encoding of the input stream
         /// according to the latex encoding and package
         void setEncoding(std::string const & encoding, int const & package);
@@ -202,11 +215,11 @@ public:
         /*!
          * \returns the contents of the environment \p name.
          * <tt>\begin{name}</tt> must be parsed already, <tt>\end{name}</tt>
-        * is parsed but not returned.
+        * is parsed but not returned. This parses nested environments properly.
          */
-       std::string const verbatimEnvironment(std::string const & name);
+       std::string const ertEnvironment(std::string const & name);
         /*
-        * The same as verbatimEnvironment(std::string const & name) but
+        * The same as ertEnvironment(std::string const & name) but
          * \begin and \end commands inside the name environment are not parsed.
          * This function is designed to parse verbatim environments.
          */
@@ -218,6 +231,14 @@ public:
          * This function is designed to parse verbatim commands.
          */
         std::string const plainCommand(char left, char right, std::string const & name);
+       /*
+        * Basically the same as plainEnvironment() but the parsing is
+        * stopped at string \p end_string. Contrary to the other
+        * methods, this uses proper catcode setting. This function is
+        * designed to parse verbatim environments and command. The
+        * intention is to eventually replace all of its siblings.
+        */
+       std::string const verbatimStuff(std::string const & end_string);
         /*!
          * Returns the character of the current token and increments
          * the token position.
@@ -225,7 +246,7 @@ public:
         char getChar();
         ///
         void error(std::string const & msg);
-       /// Parses one token from \p is 
+       /// Parses one token from \p is
         void tokenize_one();
         ///
         void push_back(Token const & t);
@@ -256,12 +277,10 @@ public:
         std::string verbatimOption();
         /// resets the parser to initial state
         void reset();
-       ///
-       void setCatCode(char c, CatCode cat);
-       ///
-       CatCode getCatCode(char c) const;
  
  private:
+       /// Setup catcode table
+       void catInit();
         ///
         int lineno_;
         ///
@@ -276,6 +295,12 @@ private:
         idocstream & is_;
         /// iconv name of the current encoding
         std::string encoding_iconv_;
+       ///
+       CatCode theCatcode_[256];
+       //
+       cat_type theCatcodesType_;
+       //
+       cat_type curr_cat_;
  };
  
  
diff --git a/src/tex2lyx/Preamble.cpp b/src/tex2lyx/Preamble.cpp

index 484b00912b9d76f4ca47ec56c0b1fa775da9f2b3..8d82f6c532fc63c316c707d651bef9b098552157 100644 (file)
--- a/src/tex2lyx/Preamble.cpp
+++ b/src/tex2lyx/Preamble.cpp
@@ -1299,12 +1299,12 @@ void Preamble::parse(Parser & p, string const & forceclass,
  
                 else if (t.cs() == "makeatletter") {
                         // LyX takes care of this
-                       p.setCatCode('@', catLetter);
+                       p.setCatcode('@', catLetter);
                 }
  
                 else if (t.cs() == "makeatother") {
                         // LyX takes care of this
-                       p.setCatCode('@', catOther);
+                       p.setCatcode('@', catOther);
                 }
  
                 else if (t.cs() == "newcommand" || t.cs() == "newcommandx"
diff --git a/src/tex2lyx/table.cpp b/src/tex2lyx/table.cpp

index 751020bb74b2b6552237f1e552768552ee605cf7..eed7bb91c81ad92db450fa87f925cd14c46344f5 100644 (file)
--- a/src/tex2lyx/table.cpp
+++ b/src/tex2lyx/table.cpp
@@ -786,7 +786,7 @@ void parse_table(Parser & p, ostream & os, bool is_long_tabular,
                         // treat the nested environment as a block, don't
                         // parse &, \\ etc, because they don't belong to our
                         // table if they appear.
-                       os << p.verbatimEnvironment(name);
+                       os << p.ertEnvironment(name);
                         os << "\\end{" << name << '}';
                         active_environments.pop_back();
                 }
@@ -1227,7 +1227,7 @@ void handle_tabular(Parser & p, ostream & os, string const & name,
                                                         angle = p.getArg('{', '}');
                                                 }
                                                 active_environments.push_back(env);
-                                               p.verbatimEnvironment(env);
+                                               p.ertEnvironment(env);
                                                 active_environments.pop_back();
                                                 p.skip_spaces();
                                                 if (!p.good() && support::isStrInt(angle))
diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp

index 52047536b34255a3c21174a4cf2caaa34777310e..41ef192041910106e6254ec01646a9cd8222d5c2 100644 (file)
--- a/src/tex2lyx/text.cpp
+++ b/src/tex2lyx/text.cpp
@@ -924,7 +924,7 @@ void parse_box(Parser & p, ostream & os, unsigned outer_flags,
                 // If yes, we need to output ERT.
                 p.pushPosition();
                 if (inner_flags & FLAG_END)
-                       p.verbatimEnvironment(inner_type);
+                       p.ertEnvironment(inner_type);
                 else
                         p.verbatim_item();
                 p.skip_spaces(true);
@@ -1435,27 +1435,36 @@ void parse_environment(Parser & p, ostream & os, bool outer,
         }
  
         else if (name == "verbatim") {
-               os << "\n\\end_layout\n\n\\begin_layout Verbatim\n";
-               string const s = p.plainEnvironment("verbatim");
+               // FIXME: this should go in the generic code that
+               // handles environments defined in layout file that
+               // have "PassThru 1". However, the code there is
+               // already too complicated for my taste.
+               parent_context.new_paragraph(os);
+               Context context(true, parent_context.textclass,
+                                  &parent_context.textclass[from_ascii("Verbatim")]);
+               context.check_layout(os);
+               string s = p.verbatimStuff("\\end{verbatim}");
+               // ignore one newline at beginning or end of string
+               if (prefixIs(s, "\n"))
+                       s.erase(0,1);
+               if (suffixIs(s, "\n"))
+                       s.erase(s.length(),1);
+
                 string::const_iterator it2 = s.begin();
                 for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
-                       if (*it == '\\')
-                               os << "\\backslash ";
-                       else if (*it == '\n') {
-                               it2 = it + 1;
-                               // avoid adding an empty paragraph at the end
-                               // FIXME: if there are 2 consecutive spaces at the end ignore it
-                               // because LyX will re-add a \n
-                               // This hack must be removed once bug 8049 is fixed!
-                               if ((it + 1 != et) && (it + 2 != et || *it2 != '\n'))
-                                       os << "\n\\end_layout\n\\begin_layout Verbatim\n";
-                       } else
+                       context.check_layout(os);
+                       if (*it == '\\') {
+                               os << "\n\\backslash\n";
+                               context.need_end_layout = true;
+                       } else if (*it == '\n') {
+                               context.new_paragraph(os);
+                       } else {
                                 os << *it;
+                               context.need_end_layout = true;
+                       }
                 }
-               os << "\n\\end_layout\n\n";
+               context.new_paragraph(os);
                 p.skip_spaces();
-               // reset to Standard layout
-               os << "\n\\begin_layout Standard\n";
         }
  
         else if (name == "CJK") {
@@ -1758,7 +1767,7 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                 parse_arguments("\\begin{" + name + "}", arguments, p, os,
                                 outer, parent_context);
                 if (contents == verbatim)
-                       handle_ert(os, p.verbatimEnvironment(name),
+                       handle_ert(os, p.ertEnvironment(name),
                                    parent_context);
                 else
                         parse_text_snippet(p, os, FLAG_END, outer,
@@ -3819,15 +3828,11 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
  
                 else if (t.cs() == "verb") {
                         context.check_layout(os);
-                       char const delimiter = p.next_token().character();
-                       // \verb is special: The usual escaping rules do not
-                       // apply, e.g. "\verb+\+" is valid and denotes a single
-                       // backslash (bug #4468). Therefore we do not allow
-                       // escaping in getArg().
-                       string const arg = p.getArg(delimiter, delimiter, false);
-                       ostringstream oss;
-                       oss << "\\verb" << delimiter << arg << delimiter;
-                       handle_ert(os, oss.str(), context);
+                       // set catcodes to verbatim early, just in case.
+                       p.setCatcodes(VERBATIM_CATCODES);
+                       string delim = p.get_token().asInput();
+                       string const arg = p.verbatimStuff(delim);
+                       handle_ert(os, "\\verb" + delim + arg + delim, context);
                 }
  
                 // Problem: \= creates a tabstop inside the tabbing environment
@@ -4574,7 +4579,7 @@ string guessLanguage(Parser & p, string const & lang)
                         p.setEncoding(encoding, Encoding::CJK);
                 else
                         p.setEncoding("UTF-8");
-               string const text = p.verbatimEnvironment("CJK");
+               string const text = p.ertEnvironment("CJK");
                 p.setEncoding(encoding_old);
                 p.skip_spaces();
                 if (!where) {
author	Jean-Marc Lasgouttes <lasgouttes@lyx.org>
	Fri, 25 Jan 2013 11:48:52 +0000 (12:48 +0100)
committer	Jean-Marc Lasgouttes <lasgouttes@lyx.org>
	Mon, 4 Feb 2013 09:25:58 +0000 (10:25 +0100)
lib/layouts/stdlayouts.inc		patch \| blob \| history
src/tex2lyx/Parser.cpp		patch \| blob \| history
src/tex2lyx/Parser.h		patch \| blob \| history
src/tex2lyx/Preamble.cpp		patch \| blob \| history
src/tex2lyx/table.cpp		patch \| blob \| history
src/tex2lyx/text.cpp		patch \| blob \| history