]> git.lyx.org Git - lyx.git/blobdiff - src/tex2lyx/texparser.C
Fix bug 2667
[lyx.git] / src / tex2lyx / texparser.C
index 46bb0d957e6476242fcb4a1f0aaa17f0f33e73d6..bdfea89a5357358bb9bc5f4ff6e4495f985adbec 100644 (file)
@@ -20,6 +20,7 @@ using std::endl;
 using std::fill;
 using std::istream;
 using std::istringstream;
+using std::ostringstream;
 using std::ostream;
 using std::string;
 
@@ -39,14 +40,14 @@ void catInit()
        theCatcode[int('}')]  = catEnd;
        theCatcode[int('$')]  = catMath;
        theCatcode[int('&')]  = catAlign;
-       theCatcode[10]   = catNewline;
+       theCatcode[int('\n')] = catNewline;
        theCatcode[int('#')]  = catParameter;
        theCatcode[int('^')]  = catSuper;
        theCatcode[int('_')]  = catSub;
-       theCatcode[0x7f] = catIgnore;
+       theCatcode[0x7f]      = catIgnore;
        theCatcode[int(' ')]  = catSpace;
        theCatcode[int('\t')] = catSpace;
-       theCatcode[13]   = catIgnore;
+       theCatcode[int('\r')] = catNewline;
        theCatcode[int('~')]  = catActive;
        theCatcode[int('%')]  = catComment;
 
@@ -54,6 +55,30 @@ void catInit()
        theCatcode[int('@')]  = catLetter;
 }
 
+
+/*!
+ * Translate a line ending to '\n'.
+ * \p c must have catcode catNewline, and it must be the last character read
+ * from \p is.
+ */
+char getNewline(istream & is, char c)
+{
+       // we have to handle 3 different line endings:
+       // - UNIX (\n)
+       // - MAC  (\r)
+       // - DOS  (\r\n)
+       if (c == '\r') {
+               // MAC or DOS
+               if (is.get(c) && c != '\n') {
+                       // MAC
+                       is.putback(c);
+               }
+               return '\n';
+       }
+       // UNIX
+       return c;
+}
+
 }
 
 
@@ -61,16 +86,6 @@ void catInit()
 // catcodes
 //
 
-mode_type asMode(mode_type oldmode, string const & str)
-{
-       if (str == "mathmode")
-               return MATH_MODE;
-       if (str == "textmode" || str == "forcetext")
-               return TEXT_MODE;
-       return oldmode;
-}
-
-
 CatCode catcode(unsigned char c)
 {
        return theCatcode[c];
@@ -177,20 +192,45 @@ Token const & Parser::get_token()
 }
 
 
+bool Parser::isParagraph() const
+{
+       // A new paragraph in TeX ist started
+       // - either by a newline, following any amount of whitespace
+       //   characters (including zero), and another newline
+       // - or the token \par
+       if (curr_token().cat() == catNewline &&
+           (curr_token().cs().size() > 1 ||
+            (next_token().cat() == catSpace &&
+             pos_ < tokens_.size() - 1 &&
+             tokens_[pos_ + 1].cat() == catNewline)))
+               return true;
+       if (curr_token().cat() == catEscape && curr_token().cs() == "par")
+               return true;
+       return false;
+}
+
+
 void Parser::skip_spaces(bool skip_comments)
 {
        // We just silently return if we have no more tokens.
        // skip_spaces() should be callable at any time,
        // the caller must check p::good() anyway.
        while (good()) {
-               if ( next_token().cat() == catSpace ||
-                   (next_token().cat() == catNewline && next_token().cs().size() == 1) ||
-                    next_token().cat() == catComment && next_token().cs().empty())
-                       get_token();
-               else if (skip_comments && next_token().cat() == catComment)
-                       cerr << "  Ignoring comment: " << get_token().asInput();
-               else
+               get_token();
+               if (isParagraph()) {
+                       putback();
                        break;
+               }
+               if ( curr_token().cat() == catSpace ||
+                    curr_token().cat() == catNewline ||
+                   (curr_token().cat() == catComment && curr_token().cs().empty()))
+                       continue;
+               if (skip_comments && curr_token().cat() == catComment)
+                       cerr << "  Ignoring comment: " << curr_token().asInput();
+               else {
+                       putback();
+                       break;
+               }
        }
 }
 
@@ -232,41 +272,85 @@ char Parser::getChar()
 }
 
 
-string Parser::getArg(char left, char right)
+Parser::Arg Parser::getFullArg(char left, char right)
 {
        skip_spaces(true);
 
        // This is needed if a partial file ends with a command without arguments,
        // e. g. \medskip
        if (! good())
-               return string();
+               return std::make_pair(false, string());
 
        string result;
        char c = getChar();
 
-       if (c != left)
+       if (c != left) {
                putback();
-       else
+               return std::make_pair(false, string());
+       } else
                while ((c = getChar()) != right && good()) {
                        // Ignore comments
                        if (curr_token().cat() == catComment) {
                                if (!curr_token().cs().empty())
                                        cerr << "Ignoring comment: " << curr_token().asInput();
                        }
-                       else if (curr_token().cat() == catSpace || curr_token().cat() == catNewline)
-                               result += curr_token().cs();
                        else
-                               result += c;
+                               result += curr_token().asInput();
                }
 
-       return result;
+       return std::make_pair(true, result);
+}
+
+
+string Parser::getArg(char left, char right)
+{
+       return getFullArg(left, right).second;
+}
+
+
+string Parser::getFullOpt()
+{
+       Arg arg = getFullArg('[', ']');
+       if (arg.first)
+               return '[' + arg.second + ']';
+       return arg.second;
 }
 
 
 string Parser::getOpt()
 {
        string const res = getArg('[', ']');
-       return res.size() ? '[' + res + ']' : string();
+       return res.empty() ? string() : '[' + res + ']';
+}
+
+
+string const Parser::verbatimEnvironment(string const & name)
+{
+       if (!good())
+               return string();
+
+       ostringstream os;
+       for (Token t = get_token(); good(); t = get_token()) {
+               if (t.cat() == catBegin) {
+                       putback();
+                       os << '{' << verbatim_item() << '}';
+               } else if (t.asInput() == "\\begin") {
+                       string const env = getArg('{', '}');
+                       os << "\\begin{" << env << '}'
+                          << verbatimEnvironment(env)
+                          << "\\end{" << env << '}';
+               } else if (t.asInput() == "\\end") {
+                       string const end = getArg('{', '}');
+                       if (end != name)
+                               cerr << "\\end{" << end
+                                    << "} does not match \\begin{" << name
+                                    << "}." << endl;
+                       return os.str();
+               } else
+                       os << t.asInput();
+       }
+       cerr << "unexpected end of input" << endl;
+       return os.str();
 }
 
 
@@ -296,10 +380,10 @@ void Parser::tokenize(istream & is)
 
                        case catNewline: {
                                ++lineno_;
-                               string s(1, c);
+                               string s(1, getNewline(is, c));
                                while (is.get(c) && catcode(c) == catNewline) {
                                        ++lineno_;
-                                       s += c;
+                                       s += getNewline(is, c);
                                }
                                if (catcode(c) != catNewline)
                                        is.putback(c);
@@ -313,6 +397,9 @@ void Parser::tokenize(istream & is)
                                string s;
                                while (is.get(c) && catcode(c) != catNewline)
                                        s += c;
+                               // handle possible DOS line ending
+                               if (catcode(c) == catNewline)
+                                       c = getNewline(is, c);
                                // Note: The '%' at the beginning and the '\n' at the end
                                // of the comment are not stored.
                                ++lineno_;
@@ -339,8 +426,7 @@ void Parser::tokenize(istream & is)
                        }
 
                        case catIgnore: {
-                               if (c != 13)
-                                       cerr << "ignoring a char: " << int(c) << "\n";
+                               cerr << "ignoring a char: " << int(c) << "\n";
                                break;
                        }