Fix bug 2667

[lyx.git] / src / tex2lyx / texparser.C
diff --git a/src/tex2lyx/texparser.C b/src/tex2lyx/texparser.C

index 46bb0d957e6476242fcb4a1f0aaa17f0f33e73d6..bdfea89a5357358bb9bc5f4ff6e4495f985adbec 100644 (file)
--- a/src/tex2lyx/texparser.C
+++ b/src/tex2lyx/texparser.C
@@ -20,6 +20,7 @@ using std::endl;
  using std::fill;
  using std::istream;
  using std::istringstream;
+using std::ostringstream;
  using std::ostream;
  using std::string;
  
@@ -39,14 +40,14 @@ void catInit()
         theCatcode[int('}')]  = catEnd;
         theCatcode[int('$')]  = catMath;
         theCatcode[int('&')]  = catAlign;
-       theCatcode[10]   = catNewline;
+       theCatcode[int('\n')] = catNewline;
         theCatcode[int('#')]  = catParameter;
         theCatcode[int('^')]  = catSuper;
         theCatcode[int('_')]  = catSub;
-       theCatcode[0x7f] = catIgnore;
+       theCatcode[0x7f]      = catIgnore;
         theCatcode[int(' ')]  = catSpace;
         theCatcode[int('\t')] = catSpace;
-       theCatcode[13]   = catIgnore;
+       theCatcode[int('\r')] = catNewline;
         theCatcode[int('~')]  = catActive;
         theCatcode[int('%')]  = catComment;
  
@@ -54,6 +55,30 @@ void catInit()
         theCatcode[int('@')]  = catLetter;
  }
  
+
+/*!
+ * Translate a line ending to '\n'.
+ * \p c must have catcode catNewline, and it must be the last character read
+ * from \p is.
+ */
+char getNewline(istream & is, char c)
+{
+       // we have to handle 3 different line endings:
+       // - UNIX (\n)
+       // - MAC  (\r)
+       // - DOS  (\r\n)
+       if (c == '\r') {
+               // MAC or DOS
+               if (is.get(c) && c != '\n') {
+                       // MAC
+                       is.putback(c);
+               }
+               return '\n';
+       }
+       // UNIX
+       return c;
+}
+
  }
  
  
@@ -61,16 +86,6 @@ void catInit()
  // catcodes
  //
  
-mode_type asMode(mode_type oldmode, string const & str)
-{
-       if (str == "mathmode")
-               return MATH_MODE;
-       if (str == "textmode" || str == "forcetext")
-               return TEXT_MODE;
-       return oldmode;
-}
-
-
  CatCode catcode(unsigned char c)
  {
         return theCatcode[c];
@@ -177,20 +192,45 @@ Token const & Parser::get_token()
  }
  
  
+bool Parser::isParagraph() const
+{
+       // A new paragraph in TeX ist started
+       // - either by a newline, following any amount of whitespace
+       //   characters (including zero), and another newline
+       // - or the token \par
+       if (curr_token().cat() == catNewline &&
+           (curr_token().cs().size() > 1 ||
+            (next_token().cat() == catSpace &&
+             pos_ < tokens_.size() - 1 &&
+             tokens_[pos_ + 1].cat() == catNewline)))
+               return true;
+       if (curr_token().cat() == catEscape && curr_token().cs() == "par")
+               return true;
+       return false;
+}
+
+
  void Parser::skip_spaces(bool skip_comments)
  {
         // We just silently return if we have no more tokens.
         // skip_spaces() should be callable at any time,
         // the caller must check p::good() anyway.
         while (good()) {
-               if ( next_token().cat() == catSpace ||
-                   (next_token().cat() == catNewline && next_token().cs().size() == 1) ||
-                    next_token().cat() == catComment && next_token().cs().empty())
-                       get_token();
-               else if (skip_comments && next_token().cat() == catComment)
-                       cerr << "  Ignoring comment: " << get_token().asInput();
-               else
+               get_token();
+               if (isParagraph()) {
+                       putback();
                         break;
+               }
+               if ( curr_token().cat() == catSpace ||
+                    curr_token().cat() == catNewline ||
+                   (curr_token().cat() == catComment && curr_token().cs().empty()))
+                       continue;
+               if (skip_comments && curr_token().cat() == catComment)
+                       cerr << "  Ignoring comment: " << curr_token().asInput();
+               else {
+                       putback();
+                       break;
+               }
         }
  }
  
@@ -232,41 +272,85 @@ char Parser::getChar()
  }
  
  
-string Parser::getArg(char left, char right)
+Parser::Arg Parser::getFullArg(char left, char right)
  {
         skip_spaces(true);
  
         // This is needed if a partial file ends with a command without arguments,
         // e. g. \medskip
         if (! good())
-               return string();
+               return std::make_pair(false, string());
  
         string result;
         char c = getChar();
  
-       if (c != left)
+       if (c != left) {
                 putback();
-       else
+               return std::make_pair(false, string());
+       } else
                 while ((c = getChar()) != right && good()) {
                         // Ignore comments
                         if (curr_token().cat() == catComment) {
                                 if (!curr_token().cs().empty())
                                         cerr << "Ignoring comment: " << curr_token().asInput();
                         }
-                       else if (curr_token().cat() == catSpace || curr_token().cat() == catNewline)
-                               result += curr_token().cs();
                         else
-                               result += c;
+                               result += curr_token().asInput();
                 }
  
-       return result;
+       return std::make_pair(true, result);
+}
+
+
+string Parser::getArg(char left, char right)
+{
+       return getFullArg(left, right).second;
+}
+
+
+string Parser::getFullOpt()
+{
+       Arg arg = getFullArg('[', ']');
+       if (arg.first)
+               return '[' + arg.second + ']';
+       return arg.second;
  }
  
  
  string Parser::getOpt()
  {
         string const res = getArg('[', ']');
-       return res.size() ? '[' + res + ']' : string();
+       return res.empty() ? string() : '[' + res + ']';
+}
+
+
+string const Parser::verbatimEnvironment(string const & name)
+{
+       if (!good())
+               return string();
+
+       ostringstream os;
+       for (Token t = get_token(); good(); t = get_token()) {
+               if (t.cat() == catBegin) {
+                       putback();
+                       os << '{' << verbatim_item() << '}';
+               } else if (t.asInput() == "\\begin") {
+                       string const env = getArg('{', '}');
+                       os << "\\begin{" << env << '}'
+                          << verbatimEnvironment(env)
+                          << "\\end{" << env << '}';
+               } else if (t.asInput() == "\\end") {
+                       string const end = getArg('{', '}');
+                       if (end != name)
+                               cerr << "\\end{" << end
+                                    << "} does not match \\begin{" << name
+                                    << "}." << endl;
+                       return os.str();
+               } else
+                       os << t.asInput();
+       }
+       cerr << "unexpected end of input" << endl;
+       return os.str();
  }
  
  
@@ -296,10 +380,10 @@ void Parser::tokenize(istream & is)
  
                         case catNewline: {
                                 ++lineno_;
-                               string s(1, c);
+                               string s(1, getNewline(is, c));
                                 while (is.get(c) && catcode(c) == catNewline) {
                                         ++lineno_;
-                                       s += c;
+                                       s += getNewline(is, c);
                                 }
                                 if (catcode(c) != catNewline)
                                         is.putback(c);
@@ -313,6 +397,9 @@ void Parser::tokenize(istream & is)
                                 string s;
                                 while (is.get(c) && catcode(c) != catNewline)
                                         s += c;
+                               // handle possible DOS line ending
+                               if (catcode(c) == catNewline)
+                                       c = getNewline(is, c);
                                 // Note: The '%' at the beginning and the '\n' at the end
                                 // of the comment are not stored.
                                 ++lineno_;
@@ -339,8 +426,7 @@ void Parser::tokenize(istream & is)
                         }
  
                         case catIgnore: {
-                               if (c != 13)
-                                       cerr << "ignoring a char: " << int(c) << "\n";
+                               cerr << "ignoring a char: " << int(c) << "\n";
                                 break;
                         }