Georg's latest improvements

author André Pönitz <poenitz@gmx.net>

Wed, 5 Nov 2003 10:14:13 +0000 (10:14 +0000)

committer André Pönitz <poenitz@gmx.net>

Wed, 5 Nov 2003 10:14:13 +0000 (10:14 +0000)
author André Pönitz <poenitz@gmx.net>
Wed, 5 Nov 2003 10:14:13 +0000 (10:14 +0000)
committer André Pönitz <poenitz@gmx.net>
Wed, 5 Nov 2003 10:14:13 +0000 (10:14 +0000)
diff --git a/src/tex2lyx/ChangeLog b/src/tex2lyx/ChangeLog

index 95e15dfd121b967b69f5e263541f61fb8089832e..d054b82d63b6c0a9304c6223eb61f85e44d5b5d1 100644 (file)
--- a/src/tex2lyx/ChangeLog
+++ b/src/tex2lyx/ChangeLog
@@ -1,3 +1,27 @@
+2003-11-03  Georg Baum  <Georg.Baum@post.rwth-aachen.de>
+
+       * math.C:
+       * table.C:
+       * text.C:
+       * context.[Ch]: New functions Context::set_item(),
+       Context::new_paragraph(ostream & os) and Context::atParagraphStart()
+       to make Context usage more explicit
+       * texparser.[Ch]: Rework Parser::tokenize (see comment in texparser.h)
+       * table.C:
+       * math.C:
+       * texparser.C: Don't silently drop comments
+       * texparser.C: Token::asInput() does not append a space anymore
+       * texparser.[Ch]: Renamed Parser::prev_token() to Parser::curr_token().
+       New function Parser::prev_token() returns now really the previous token
+       * Context.[Ch]:
+       * text.C: Convert known vspaces at paragraph start to \\added_space_top
+       * preamble.C: Don't put out newlines twice.
+       * text.C: Fix minipage position bug
+       * text.C: Fix \labelwidthstring bug
+       * text.C: Recognize alignment environments
+       * text.C: Fix a few cases of incorrect context usage, resulting
+       in missing or superflous \begin_layout / \end_laout lines.
+
  2003-10-23  Georg Baum  <Georg.Baum@post.rwth-aachen.de>
  
         * math.C:
@@ -33,8 +57,6 @@
         - handle optional arg to \item as ERT in itemize environment, since LyX
                      does not support it directly
  
-
-
  2003-10-18  Lars Gullik Bjønnes  <larsbj@gullik.net>
  
         * Makefile.am (BUILT_SOURCES): use this instead of "linked_files"
diff --git a/src/tex2lyx/context.C b/src/tex2lyx/context.C

index 3318058f3b5f2829c40f9401077e4d1f526f6490..c1a89075ed9806c00346f8239af977b744a20650 100644 (file)
--- a/src/tex2lyx/context.C
+++ b/src/tex2lyx/context.C
@@ -23,7 +23,7 @@ namespace {
  
  void begin_layout(ostream & os, LyXLayout_ptr layout)
  {
-       os << "\n\\begin_layout " << layout->name() << "\n\n";
+       os << "\n\\begin_layout " << layout->name() << "\n";
  }
  
  
@@ -94,11 +94,12 @@ void Context::check_layout(ostream & os)
                         begin_layout(os, layout);
                         need_layout=false;
                         need_end_layout = true;
-                       if (!extra_stuff.empty()) {
-                               os << extra_stuff;
-                               extra_stuff.erase();
-                       }
                 }
+               if (!extra_stuff.empty()) {
+                       os << extra_stuff;
+                       extra_stuff.erase();
+               }
+               os << "\n";
         }
  }
  
@@ -140,6 +141,20 @@ void Context::check_end_deeper(ostream & os)
  }
  
  
+void Context::set_item()
+{
+       need_layout = true;
+       has_item = true;
+}
+
+
+void Context::new_paragraph(ostream & os)
+{
+       check_end_layout(os);
+       need_layout = true;
+}
+
+
  void Context::dump(ostream & os, string const & desc) const
  {
         os << "\n" << desc <<" [";
@@ -147,6 +162,12 @@ void Context::dump(ostream & os, string const & desc) const
                 os << "need_layout ";
         if (need_end_layout)
                 os << "need_end_layout ";
+       if (need_end_deeper)
+               os << "need_end_deeper ";
+       if (has_item)
+               os << "has_item ";
+       if (deeper_paragraph)
+               os << "deeper_paragraph ";
         if (!extra_stuff.empty())
                 os << "extrastuff=[" << extra_stuff << "] ";
         os << "layout=" << layout->name();
diff --git a/src/tex2lyx/context.h b/src/tex2lyx/context.h

index 6bf72e066e564ddbaa171c3c44c755af6c27418a..e8c70eec6a30b93b5f17a52ae1c85048caf880ab 100644 (file)
--- a/src/tex2lyx/context.h
+++ b/src/tex2lyx/context.h
@@ -37,6 +37,15 @@ struct Context {
         // description \c desc.
         void dump(std::ostream &, std::string const & desc = "context") const;
  
+       /// Are we just beginning a new paragraph?
+       bool atParagraphStart() const { return need_layout; }
+
+       /// Begin an item in a list environment
+       void set_item();
+
+       /// Start a new paragraph
+       void new_paragraph(std::ostream & os);
+
         // Do we need to output some \begin_layout command before the
         // next characters?
         bool need_layout;
diff --git a/src/tex2lyx/math.C b/src/tex2lyx/math.C

index 5e84b5d50dc23f25d6a45a2b92ada7720e3b73a8..a170da7db9f29a74aecfd74b293919b2b987a5dc 100644 (file)
--- a/src/tex2lyx/math.C
+++ b/src/tex2lyx/math.C
@@ -100,7 +100,6 @@ void parse_math(Parser & p, ostream & os, unsigned flags, const mode_type mode)
                 }
  
                 else if (t.cat() == catLetter ||
-                              t.cat() == catSpace ||
                                t.cat() == catSuper ||
                                t.cat() == catSub ||
                                t.cat() == catOther ||
@@ -109,15 +108,6 @@ void parse_math(Parser & p, ostream & os, unsigned flags, const mode_type mode)
                                t.cat() == catParameter)
                         os << t.character();
  
-               else if (t.cat() == catNewline) {
-                       //if (p.next_token().cat() == catNewline) {
-                       //      p.get_token();
-                       //      handle_par(os);
-                       //} else {
-                               os << "\n "; // note the space
-                       //}
-               }
-
                 else if (t.cat() == catBegin) {
                         os << '{';
                         parse_math(p, os, FLAG_BRACE_LAST, mode);
@@ -130,8 +120,13 @@ void parse_math(Parser & p, ostream & os, unsigned flags, const mode_type mode)
                         os << "unexpected '}' in math\n";
                 }
  
-               else if (t.cat() == catComment)
-                       handle_comment(p);
+               else if (t.cat() == catComment) {
+                       if (t.cs().size())
+                               cerr << "Ignoring comment: " << t.asInput();
+                       else
+                               // "%\n" combination
+                               p.skip_spaces();
+               }
  
                 //
                 // control sequences
diff --git a/src/tex2lyx/preamble.C b/src/tex2lyx/preamble.C

index 3eb8d0fd0303585f9b4207338d6ad363d453f6b8..708fdd90e13c851ab9d3cb46bd695d2b944e5e76 100644 (file)
--- a/src/tex2lyx/preamble.C
+++ b/src/tex2lyx/preamble.C
@@ -188,7 +188,7 @@ LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & force
         while (p.good()) {
                 Token const & t = p.get_token();
  
-               if (t.cs() == "documentclass") {
+               if (t.cat() == catEscape && t.cs() == "documentclass") {
                         is_full_document = true;
                         break;
                 }
@@ -206,7 +206,6 @@ LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & force
                 // cat codes
                 //
                 if (t.cat() == catLetter ||
-                         t.cat() == catSpace ||
                           t.cat() == catSuper ||
                           t.cat() == catSub ||
                           t.cat() == catOther ||
@@ -215,24 +214,26 @@ LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & force
                           t.cat() == catBegin ||
                           t.cat() == catEnd ||
                           t.cat() == catAlign ||
-                         t.cat() == catNewline ||
                           t.cat() == catParameter)
                 h_preamble << t.character();
  
+               else if (t.cat() == catSpace || t.cat() == catNewline)
+                       h_preamble << t.asInput();
+
                 else if (t.cat() == catComment)
-                       handle_comment(p);
+                       h_preamble << t.asInput();
  
                 else if (t.cs() == "pagestyle")
                         h_paperpagestyle = p.verbatim_item();
  
                 else if (t.cs() == "makeatletter") {
                         p.setCatCode('@', catLetter);
-                       h_preamble << "\\makeatletter\n";
+                       h_preamble << "\\makeatletter";
                 }
  
                 else if (t.cs() == "makeatother") {
                         p.setCatCode('@', catOther);
-                       h_preamble << "\\makeatother\n";
+                       h_preamble << "\\makeatother";
                 }
  
                 else if (t.cs() == "newcommand" || t.cs() == "renewcommand"
@@ -246,24 +247,24 @@ LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & force
                         string const opts = p.getOpt();
                         string const body = p.verbatim_item();
                         // only non-lyxspecific stuff
-                       if (name != "\\noun "
-                                 && name != "\\tabularnewline "
-                           && name != "\\LyX "
-                                 && name != "\\lyxline "
-                                 && name != "\\lyxaddress "
-                                 && name != "\\lyxrightaddress "
-                                 && name != "\\boldsymbol "
-                                 && name != "\\lyxarrow ") {
+                       if (   name != "\\noun"
+                           && name != "\\tabularnewline"
+                           && name != "\\LyX"
+                           && name != "\\lyxline"
+                           && name != "\\lyxaddress"
+                           && name != "\\lyxrightaddress"
+                           && name != "\\boldsymbol"
+                           && name != "\\lyxarrow") {
                                 ostringstream ss;
                                 ss << '\\' << t.cs();
                                 if (star)
                                         ss << '*';
-                               ss << '{' << name << '}' << opts << '{' << body << "}\n";
+                               ss << '{' << name << '}' << opts << '{' << body << "}";
                                 h_preamble << ss.str();
  /*
                                 ostream & out = in_preamble ? h_preamble : os;
                                 out << "\\" << t.cs() << "{" << name << "}"
-                                   << opts << "{" << body << "}\n";
+                                   << opts << "{" << body << "}";
  */
                         }
                 }
@@ -301,7 +302,6 @@ LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & force
                         ss << p.getOpt();
                         ss << '{' << p.verbatim_item() << '}';
                         ss << '{' << p.verbatim_item() << '}';
-                       ss << '\n';
                         if (name != "lyxcode" && name != "lyxlist"
                                         && name != "lyxrightadress" && name != "lyxaddress")
                                 h_preamble << ss.str();
@@ -311,7 +311,7 @@ LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & force
                         string name = p.get_token().cs();
                         while (p.next_token().cat() != catBegin)
                                 name += p.get_token().asString();
-                       h_preamble << "\\def\\" << name << '{' << p.verbatim_item() << "}\n";
+                       h_preamble << "\\def\\" << name << '{' << p.verbatim_item() << "}";
                 }
  
                 else if (t.cs() == "newcolumntype") {
@@ -328,7 +328,7 @@ LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & force
                         h_preamble << "\\newcolumntype{" << name << "}";
                         if (nargs)
                                 h_preamble << "[" << nargs << "]";
-                       h_preamble << "{" << p.verbatim_item() << "}\n";
+                       h_preamble << "{" << p.verbatim_item() << "}";
                 }
  
                 else if (t.cs() == "setcounter") {
@@ -339,23 +339,21 @@ LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & force
                         else if (name == "tocdepth")
                                 h_tocdepth = content;
                         else
-                               h_preamble << "\\setcounter{" << name << "}{" << content << "}\n";
+                               h_preamble << "\\setcounter{" << name << "}{" << content << "}";
                 }
  
                 else if (t.cs() == "setlength") {
                         string const name = p.verbatim_item();
                         string const content = p.verbatim_item();
+                       // Is this correct?
                         if (name == "parskip")
                                 h_paragraph_separation = "skip";
                         else if (name == "parindent")
                                 h_paragraph_separation = "skip";
                         else
-                               h_preamble << "\\setlength{" << name << "}{" << content << "}\n";
+                               h_preamble << "\\setlength{" << name << "}{" << content << "}";
                 }
  
-               else if (t.cs() == "par")
-                       h_preamble << '\n';
-
                 else if (t.cs() == "begin") {
                         string const name = p.getArg('{', '}');
                         if (name == "document")
@@ -364,8 +362,9 @@ LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & force
                 }
  
                 else if (t.cs().size())
-                       h_preamble << '\\' << t.cs() << ' ';
+                       h_preamble << '\\' << t.cs();
         }
+       p.skip_spaces();
  
         // Force textclass if the user wanted it
         if (forceclass.size()) {
diff --git a/src/tex2lyx/table.C b/src/tex2lyx/table.C

index 70ae2e47084cc3d27cb532c0bc12b3b52c99dd37..2f6e18b2c4dacb9b4140c390dc3b34d606294618 100644 (file)
--- a/src/tex2lyx/table.C
+++ b/src/tex2lyx/table.C
@@ -192,13 +192,14 @@ void parse_table(Parser & p, ostream & os, unsigned flags)
                         }
                 }
  
+               else if (t.cat() == catSpace || t.cat() == catNewline)
+                               os << t.cs();
+
                 else if (t.cat() == catLetter ||
-                              t.cat() == catSpace ||
                                t.cat() == catSuper ||
                                t.cat() == catSub ||
                                t.cat() == catOther ||
                                t.cat() == catActive ||
-                              t.cat() == catNewline ||
                                t.cat() == catParameter)
                         os << t.character();
  
@@ -216,6 +217,7 @@ void parse_table(Parser & p, ostream & os, unsigned flags)
  
                 else if (t.cat() == catAlign) {
                         os << TAB;
+                       p.skip_spaces();
                 }
  
                 else if (t.cs() == "tabularnewline" || t.cs() == "\\") {
@@ -232,7 +234,7 @@ void parse_table(Parser & p, ostream & os, unsigned flags)
                         hlines += "\\cline{" + p.verbatim_item() + '}';
  
                 else if (t.cat() == catComment)
-                       handle_comment(p);
+                       os << t.asInput();
  
                 else if (t.cs() == "(") {
                         os << "\\(";
diff --git a/src/tex2lyx/tex2lyx.C b/src/tex2lyx/tex2lyx.C

index fcb63efb4032127bb76f5e4509199fcd6b1319f0..5dad8e63c87a74e447005b24ece19181018e9ea2 100644 (file)
--- a/src/tex2lyx/tex2lyx.C
+++ b/src/tex2lyx/tex2lyx.C
@@ -52,19 +52,6 @@ using lyx::support::IsFileWriteable;
  // Hacks to allow the thing to link in the lyxlayout stuff
  LyXErr lyxerr(std::cerr.rdbuf());
  
-void handle_comment(Parser & p)
-{
-       string s;
-       while (p.good()) {
-               Token const & t = p.get_token();
-               if (t.cat() == catNewline)
-                       break;
-               s += t.asString();
-       }
-       //cerr << "comment: " << s << "\n";
-       p.skip_spaces();
-}
-
  
  string const trim(string const & a, char const * p)
  {
@@ -238,6 +225,13 @@ void tex2lyx(std::istream &is, std::ostream &os)
         active_environments.pop_back();
         ss.seekg(0);
         os << ss.str();
+#ifdef TEST_PARSER
+       p.reset();
+       ofstream parsertest("parsertest.tex");
+       while (p.good())
+               parsertest << p.get_token().asInput();
+       // <origfile> and parsertest.tex should now have identical content
+#endif
  }
  
  
diff --git a/src/tex2lyx/tex2lyx.h b/src/tex2lyx/tex2lyx.h

index d000b9f8eb83c53d2a4581b85f53fbdc7035ffa9..df6e0371d150629c1b036892609d4322d973c3d1 100644 (file)
--- a/src/tex2lyx/tex2lyx.h
+++ b/src/tex2lyx/tex2lyx.h
@@ -46,7 +46,6 @@ void handle_tabular(Parser & p, std::ostream & os, Context & context);
  
  
  /// in tex2lyx.C
-void handle_comment(Parser & p);
  std::string const trim(std::string const & a, char const * p = " \t\n\r");
  
  void split(std::string const & s, std::vector<std::string> & result,
diff --git a/src/tex2lyx/texparser.C b/src/tex2lyx/texparser.C

index a5eac7930c57f2455fd439629523beadde5d3c4b..bd51e3b12251c9d1d6356178ac2e6678cb8d4bd3 100644 (file)
--- a/src/tex2lyx/texparser.C
+++ b/src/tex2lyx/texparser.C
@@ -28,17 +28,6 @@ namespace {
  
  CatCode theCatcode[256];
  
-void skipSpaceTokens(istream & is, char c)
-{
-       // skip trailing spaces
-       while (catcode(c) == catSpace || catcode(c) == catNewline)
-               if (!is.get(c))
-                       break;
-       //cerr << "putting back: " << c << "\n";
-       is.putback(c);
-}
-
-
  void catInit()
  {
         fill(theCatcode, theCatcode + 256, catOther);
@@ -95,12 +84,16 @@ CatCode catcode(unsigned char c)
  
  ostream & operator<<(ostream & os, Token const & t)
  {
-       if (t.cs().size())
+       if (t.cat() == catComment)
+               os << '%' << t.cs() << '\n';
+       else if (t.cat() == catSpace)
+               os << t.cs();
+       else if (t.cat() == catEscape)
                 os << '\\' << t.cs() << ' ';
         else if (t.cat() == catLetter)
                 os << t.character();
         else if (t.cat() == catNewline)
-               os << "[\\n," << t.cat() << "]\n";
+               os << "[" << t.cs().size() << "\\n," << t.cat() << "]\n";
         else
                 os << '[' << t.character() << ',' << t.cat() << ']';
         return os;
@@ -115,7 +108,11 @@ string Token::asString() const
  
  string Token::asInput() const
  {
-       return char_ ? string(1, char_) : '\\' + cs_ + ' ';
+       if (cat_ == catComment)
+               return '%' + cs_ + '\n';
+       if (cat_ == catSpace || cat_ == catNewline)
+               return cs_;
+       return char_ ? string(1, char_) : '\\' + cs_;
  }
  
  
@@ -152,6 +149,13 @@ void Parser::pop_back()
  
  
  Token const & Parser::prev_token() const
+{
+       static const Token dummy;
+       return pos_ > 1 ? tokens_[pos_ - 2] : dummy;
+}
+
+
+Token const & Parser::curr_token() const
  {
         static const Token dummy;
         return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
@@ -173,14 +177,35 @@ Token const & Parser::get_token()
  }
  
  
-void Parser::skip_spaces()
+void Parser::skip_spaces(bool skip_comments)
  {
-       while (1) {
-               if (next_token().cat() == catSpace || next_token().cat() == catNewline)
+       // We just silently return if we have no more tokens.
+       // skip_spaces() should be callable at any time,
+       // the caller must check p::good() anyway.
+       while (good()) {
+               if ( next_token().cat() == catSpace ||
+                   (next_token().cat() == catNewline && next_token().cs().size() == 1) ||
+                    next_token().cat() == catComment && next_token().cs().empty())
                         get_token();
-               else if (next_token().cat() == catComment)
-                       while (next_token().cat() != catNewline)
-                               get_token();
+               else if (skip_comments && next_token().cat() == catComment)
+                       cerr << "  Ignoring comment: " << get_token().asInput();
+               else
+                       break;
+       }
+}
+
+
+void Parser::unskip_spaces(bool skip_comments)
+{
+       while (pos_ > 0) {
+               if ( curr_token().cat() == catSpace ||
+                   (curr_token().cat() == catNewline && curr_token().cs().size() == 1))
+                       putback();
+               else if (skip_comments && curr_token().cat() == catComment) {
+                       // TODO: Get rid of this
+                       cerr << "Unignoring comment: " << curr_token().asInput();
+                       putback();
+               }
                 else
                         break;
         }
@@ -209,7 +234,12 @@ char Parser::getChar()
  
  string Parser::getArg(char left, char right)
  {
-       skip_spaces();
+       skip_spaces(true);
+
+       // This is needed if a partial file ends with a command without arguments,
+       // e. g. \medskip
+       if (! good())
+               return string();
  
         string result;
         char c = getChar();
@@ -217,8 +247,17 @@ string Parser::getArg(char left, char right)
         if (c != left)
                 putback();
         else
-               while ((c = getChar()) != right && good())
-                       result += c;
+               while ((c = getChar()) != right && good()) {
+                       // Ignore comments
+                       if (curr_token().cat() == catComment) {
+                               if (curr_token().cs().size())
+                                       cerr << "Ignoring comment: " << curr_token().asInput();
+                       }
+                       else if (curr_token().cat() == catSpace || curr_token().cat() == catNewline)
+                               result += curr_token().cs();
+                       else
+                               result += c;
+               }
  
         return result;
  }
@@ -245,34 +284,39 @@ void Parser::tokenize(istream & is)
                 //cerr << "reading c: " << c << "\n";
  
                 switch (catcode(c)) {
+                       case catSpace: {
+                               string s(1, c);
+                               while (is.get(c) && catcode(c) == catSpace)
+                                       s += c;
+                               if (catcode(c) != catSpace)
+                                       is.putback(c);
+                               push_back(Token(s, catSpace));
+                               break;
+                       }
+
                         case catNewline: {
                                 ++lineno_;
-                               is.get(c);
-                               if (catcode(c) == catNewline) {
-                                       //do {
-                                               is.get(c);
-                                       //} while (catcode(c) == catNewline);
-                                       push_back(Token("par"));
-                               } else {
-                                       push_back(Token('\n', catNewline));
+                               string s(1, c);
+                               while (is.get(c) && catcode(c) == catNewline) {
+                                       ++lineno_;
+                                       s += c;
                                 }
-                               is.putback(c);
+                               if (catcode(c) != catNewline)
+                                       is.putback(c);
+                               push_back(Token(s, catNewline));
                                 break;
                         }
  
                         case catComment: {
-                               push_back(Token(c, catComment));
+                               // We don't treat "%\n" combinations here specially because
+                               // we want to preserve them in the preamble
+                               string s;
                                 while (is.get(c) && catcode(c) != catNewline)
-                                       push_back(Token(c, catLetter));
-                               push_back(Token(c, catNewline));
+                                       s += c;
+                               // Note: The '%' at the beginning and the '\n' at the end
+                               // of the comment are not stored.
                                 ++lineno_;
-                               is.get(c);
-                               if (catcode(c) == catNewline) {
-                                       push_back(Token("par"));
-                                       ++lineno_;
-                               } else {
-                                       is.putback(c);
-                               }
+                               push_back(Token(s, catComment));
                                 break;
                         }
  
@@ -286,21 +330,14 @@ void Parser::tokenize(istream & is)
                                                 // collect letters
                                                 while (is.get(c) && catcode(c) == catLetter)
                                                         s += c;
-                                               skipSpaceTokens(is, c);
+                                               if (catcode(c) != catLetter)
+                                                       is.putback(c);
                                         }
-                                       push_back(Token(s));
+                                       push_back(Token(s, catEscape));
                                 }
                                 break;
                         }
  
-                       case catSuper:
-                       case catSub: {
-                               push_back(Token(c, catcode(c)));
-                               is.get(c);
-                               skipSpaceTokens(is, c);
-                               break;
-                       }
-
                         case catIgnore: {
                                 if (c != 13)
                                         cerr << "ignoring a char: " << int(c) << "\n";
diff --git a/src/tex2lyx/texparser.h b/src/tex2lyx/texparser.h

index 67ba3e2e6caadcdad9d35560b98d8b1c11241500..9de3600bba50c4a89ec6345b05b4194c2146eea2 100644 (file)
--- a/src/tex2lyx/texparser.h
+++ b/src/tex2lyx/texparser.h
@@ -75,7 +75,7 @@ public:
         ///
         Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
         ///
-       Token(std::string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
+       Token(std::string const & cs, CatCode cat) : cs_(cs), char_(0), cat_(cat) {}
  
         ///
         std::string const & cs() const { return cs_; }
@@ -100,9 +100,16 @@ private:
  std::ostream & operator<<(std::ostream & os, Token const & t);
  
  
-//
-// Actual parser class
-//
+/*!
+ * Actual parser class
+ *
+ * The parser parses every character of the inputstream into a token
+ * and classifies the token.
+ * The following transformations are done:
+ * - Consecutive spaces are combined into one single token with CatCode catSpace
+ * - Consecutive newlines are combined into one single token with CatCode catNewline
+ * - Comments and %\n combinations are parsed into one token with CatCode catComment
+ */
  
  class Parser {
  
@@ -136,11 +143,15 @@ public:
         ///
         Token const & prev_token() const;
         ///
-       Token const & next_token() const;
+       Token const & curr_token() const;
         ///
+       Token const & next_token() const;
+       /// Make the next token current and return that.
         Token const & get_token();
-       /// skips spaces if any
-       void skip_spaces();
+       /// skips spaces (and comments if \param skip_comments is true)
+       void skip_spaces(bool skip_comments = false);
+       /// puts back spaces (and comments if \param skip_comments is true)
+       void unskip_spaces(bool skip_comments = false);
         ///
         void lex(std::string const & s);
         ///
@@ -156,7 +167,7 @@ public:
         ///
         CatCode getCatCode(char c) const;
  
-//private:
+private:
         ///
         int lineno_;
         ///
diff --git a/src/tex2lyx/text.C b/src/tex2lyx/text.C

index a1817e04101378bdf64d94cd40b32d6985814ec1..d23ebc72fb89b4bea0ac89efb444c472c94ca7ab 100644 (file)
--- a/src/tex2lyx/text.C
+++ b/src/tex2lyx/text.C
@@ -170,8 +170,12 @@ void skip_braces(Parser & p)
  }
  
  
-void handle_ert(ostream & os, string const & s, Context const & context)
+void handle_ert(ostream & os, string const & s, Context & context, bool check_layout = true)
  {
+       if (check_layout) {
+               // We must have a valid layout before outputting the ERT inset.
+               context.check_layout(os);
+       }
         Context newcontext(true, context.textclass);
         begin_inset(os, "ERT");
         os << "\nstatus Collapsed\n";
@@ -187,6 +191,26 @@ void handle_ert(ostream & os, string const & s, Context const & context)
  }
  
  
+void handle_comment(ostream & os, string const & s, Context & context)
+{
+       // TODO: Handle this better
+       Context newcontext(true, context.textclass);
+       begin_inset(os, "ERT");
+       os << "\nstatus Collapsed\n";
+       newcontext.check_layout(os);
+       for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
+               if (*it == '\\')
+                       os << "\n\\backslash \n";
+               else
+                       os << *it;
+       }
+       // make sure that our comment is the last thing on the line
+       os << "\n\\newline";
+       newcontext.check_end_layout(os);
+       end_inset(os);
+}
+
+
  struct isLayout {
         isLayout(string const name) : name_(name) {}
         bool operator()(LyXLayout_ptr const & ptr) {
@@ -217,10 +241,11 @@ void output_command_layout(ostream & os, Parser & p, bool outer,
         context.check_deeper(os);
         context.check_layout(os);
         if (context.layout->optionalargs > 0) {
+               p.skip_spaces();
                 if (p.next_token().character() == '[') {
                         p.get_token(); // eat '['
                         begin_inset(os, "OptArg\n");
-                       os << "collapsed true\n";
+                       os << "collapsed true\n\n";
                         parse_text_in_inset(p, os, FLAG_BRACK_LAST, outer, context);
                         end_inset(os);
                 }
@@ -228,9 +253,47 @@ void output_command_layout(ostream & os, Parser & p, bool outer,
         parse_text_snippet(p, os, FLAG_ITEM, outer, context);
         context.check_end_layout(os);
         context.check_end_deeper(os);
+       // We don't need really a new paragraph, but
+       // we must make sure that the next item gets a \begin_layout.
+       parent_context.new_paragraph(os);
  }
  
  
+/*!
+ * Output a space if necessary.
+ * This function gets called for every whitespace token.
+ *
+ * We have three cases here:
+ * 1. A space must be suppressed. Example: The lyxcode case below
+ * 2. A space may be suppressed. Example: Spaces before "\par"
+ * 3. A space must not be suppressed. Example: A space between two words
+ *
+ * We currently handle only 1. and 3 and from 2. only the case of
+ * spaces before newlines as a side effect.
+ *
+ * 2. could be used to suppress as many spaces as possible. This has two effects:
+ * - Reimporting LyX generated LaTeX files changes almost no whitespace
+ * - Superflous whitespace from non LyX generated LaTeX files is removed.
+ * The drawback is that the logic inside the function becomes
+ * complicated, and that is the reason why it is not implemented.
+ */
+void check_space(Parser const & p, ostream & os, Context & context)
+{
+       Token const next = p.next_token();
+       Token const curr = p.curr_token();
+       // A space before a single newline and vice versa must be ignored
+       // LyX emits a newline before \end{lyxcode}.
+       // This newline must be ignored,
+       // otherwise LyX will add an additional protected space.
+       if (next.cat() == catSpace ||
+           next.cat() == catNewline ||
+           (next.cs() == "end" && context.layout->free_spacing && curr.cat() == catNewline)) {
+               return;
+       }
+       context.check_layout(os);
+       os << ' ';
+}
+
  void parse_environment(Parser & p, ostream & os, bool outer,
                        Context & parent_context)
  {
@@ -239,6 +302,8 @@ void parse_environment(Parser & p, ostream & os, bool outer,
         const bool is_starred = suffixIs(name, '*');
         string const unstarred_name = rtrim(name, "*");
         active_environments.push_back(name);
+       p.skip_spaces();
+
         if (is_math_env(name)) {
                 parent_context.check_layout(os);
                 begin_inset(os, "Formula ");
@@ -262,13 +327,15 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                         os << "placement " << p.getArg('[', ']') << '\n';
                 }
                 os << "wide " << tostr(is_starred)
-                  << "\ncollapsed false\n";
+                  << "\ncollapsed false\n\n";
                 parse_text_in_inset(p, os, FLAG_END, outer, parent_context);
                 end_inset(os);
+               // We don't need really a new paragraph, but
+               // we must make sure that the next item gets a \begin_layout.
+               parent_context.new_paragraph(os);
         }
  
         else if (name == "minipage") {
-               parent_context.check_layout(os);
                 string position = "1";
                 string inner_pos = "0";
                 string height = "0pt";
@@ -293,8 +360,8 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                                 if (p.next_token().asInput() == "[") {
                                         latex_inner_pos = p.getArg('[', ']');
                                         switch(latex_inner_pos[0]) {
-                                       case 't': inner_pos = "0"; break;
-                                       case 'c': inner_pos = "1"; break;
+                                       case 'c': inner_pos = "0"; break;
+                                       case 't': inner_pos = "1"; break;
                                         case 'b': inner_pos = "2"; break;
                                         case 's': inner_pos = "3"; break;
                                         default:
@@ -318,11 +385,11 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                                 ss << '[' << latex_inner_pos << ']';
                         ss << "{" << width << "}";
                         handle_ert(os, ss.str(), parent_context);
-                       parent_context.check_end_layout(os);
-                       parent_context.need_layout = true;
+                       parent_context.new_paragraph(os);
                         parse_text_in_inset(p, os, FLAG_END, outer, parent_context);
                         handle_ert(os, "\\end{minipage}", parent_context);
                 } else {
+                       parent_context.check_layout(os);
                         begin_inset(os, "Minipage\n");
                         os << "position " << position << '\n';
                         os << "inner_position " << inner_pos << '\n';
@@ -332,11 +399,27 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                         parse_text_in_inset(p, os, FLAG_END, outer, parent_context);
                         end_inset(os);
                 }
-
         }
  
-       else if (name == "center") {
+       // Alignment settings
+       else if (name == "center" || name == "flushleft" || name == "flushright" ||
+                name == "centering" || name == "raggedright" || name == "raggedleft") {
+               // We must begin a new paragraph if not already done
+               if (! parent_context.atParagraphStart()) {
+                       parent_context.check_end_layout(os);
+                       parent_context.new_paragraph(os);
+               }
+               if (name == "flushleft" || name == "raggedright")
+                       parent_context.extra_stuff += "\\align left ";
+               else if (name == "flushright" || name == "raggedleft")
+                       parent_context.extra_stuff += "\\align right ";
+               else
+                       parent_context.extra_stuff += "\\align center ";
                 parse_text(p, os, FLAG_END, outer, parent_context);
+               // Just in case the environment is empty ..
+               parent_context.extra_stuff.erase();
+               // We must begin a new paragraph to reset the alignment
+               parent_context.new_paragraph(os);
         }
  
         // The single '=' is meant here.
@@ -349,9 +432,11 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                 case  LATEX_LIST_ENVIRONMENT:
                         context.extra_stuff = "\\labelwidthstring "
                                 + p.verbatim_item() + '\n';
+                       p.skip_spaces();
                         break;
                 case  LATEX_BIB_ENVIRONMENT:
                         p.verbatim_item(); // swallow next arg
+                       p.skip_spaces();
                         break;
                 default:
                         break;
@@ -360,6 +445,7 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                 parse_text(p, os, FLAG_END, outer, context);
                 context.check_end_layout(os);
                 context.check_end_deeper(os);
+               parent_context.new_paragraph(os);
         }
  
         else if (name == "appendix") {
@@ -383,19 +469,20 @@ void parse_environment(Parser & p, ostream & os, bool outer,
  
         else if (name == "tabbing") {
                 // We need to remember that we have to handle '\=' specially
-               parent_context.check_layout(os);
                 handle_ert(os, "\\begin{" + name + "}", parent_context);
                 parse_text_snippet(p, os, FLAG_END | FLAG_TABBING, outer, parent_context);
                 handle_ert(os, "\\end{" + name + "}", parent_context);
         }
  
         else {
-               parent_context.check_layout(os);
                 handle_ert(os, "\\begin{" + name + "}", parent_context);
                 parse_text_snippet(p, os, FLAG_END, outer, parent_context);
                 handle_ert(os, "\\end{" + name + "}", parent_context);
         }
+
         active_environments.pop_back();
+       if (name != "math")
+               p.skip_spaces();
  }
  
  } // anonymous namespace
@@ -485,9 +572,10 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         skip_braces(p);
                 }
  
+               else if (t.cat() == catSpace || (t.cat() == catNewline && t.cs().size() == 1))
+                       check_space(p, os, context);
  
                 else if (t.cat() == catLetter ||
-                              t.cat() == catSpace ||
                                t.cat() == catOther ||
                                t.cat() == catAlign ||
                                t.cat() == catParameter) {
@@ -495,16 +583,9 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         os << t.character();
                 }
  
-               else if (t.cat() == catNewline) {
-                       if (p.next_token().cat() == catNewline) {
-                               // this should have been be done by
-                               // the parser already
-                               cerr << "what are we doing here?" << endl;
-                               p.get_token();
-                               context.need_layout = true;
-                       } else {
-                               os << " "; // note the space
-                       }
+               else if (t.cat() == catNewline || (t.cat() == catEscape && t.cs() == "par")) {
+                       p.skip_spaces();
+                       context.new_paragraph(os);
                 }
  
                 else if (t.cat() == catActive) {
@@ -519,20 +600,19 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                 }
  
                 else if (t.cat() == catBegin) {
-// FIXME???
                         // special handling of size changes
                         context.check_layout(os);
                         bool const is_size = is_known(p.next_token().cs(), known_sizes);
-                       Context newcontext(false, context.textclass);
-//                     need_end_layout = false;
-                       string const s = parse_text(p, FLAG_BRACE_LAST, outer, newcontext);
-//                     need_end_layout = true;
-                       if (s.empty() && p.next_token().character() == '`')
-                               ; // ignore it in  {}``
+                       Token const prev = p.prev_token();
+                       string const s = parse_text(p, FLAG_BRACE_LAST, outer, context);
+                       if (s.empty() && (p.next_token().character() == '`' ||
+                                         (prev.character() == '-' && p.next_token().character())))
+                               ; // ignore it in {}`` or -{}-
                         else if (is_size || s == "[" || s == "]" || s == "*")
                                 os << s;
                         else {
-                               handle_ert(os, "{", context);
+                               handle_ert(os, "{", context, false);
+                               // s will end the current layout and begin a new one if necessary
                                 os << s;
                                 handle_ert(os, "}", context);
                         }
@@ -540,15 +620,26 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
  
                 else if (t.cat() == catEnd) {
                         if (flags & FLAG_BRACE_LAST) {
-                               context.check_end_layout(os);
                                 return;
                         }
                         cerr << "stray '}' in text\n";
                         handle_ert(os, "}", context);
                 }
  
-               else if (t.cat() == catComment)
-                       handle_comment(p);
+               else if (t.cat() == catComment) {
+                       context.check_layout(os);
+                       if (t.cs().size()) {
+                               handle_comment(os, '%' + t.cs(), context);
+                               if (p.next_token().cat() == catNewline) {
+                                       // A newline after a comment line starts a new paragraph
+                                       context.new_paragraph(os);
+                                       p.skip_spaces();
+                               }
+                       } else {
+                               // "%\n" combination
+                               p.skip_spaces();
+                       }
+               }
  
                 //
                 // control sequences
@@ -588,8 +679,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                 }
  
                 else if (t.cs() == "item") {
-                       // should be done automatically by Parser::tokenize
-                       //p.skip_spaces();
+                       p.skip_spaces();
                         string s;
                         bool optarg = false;
                         if (p.next_token().character() == '[') {
@@ -598,11 +688,10 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                                 s = parse_text(p, FLAG_BRACK_LAST, outer, newcontext);
                                 optarg = true;
                         }
-                       context.need_layout = true;
-                       context.has_item = true;
+                       context.set_item();
                         context.check_layout(os);
                         if (optarg) {
-                               if (active_environment() == "itemize") {
+                               if (context.layout->labeltype != LABEL_MANUAL) {
                                         // lyx does not support \item[\mybullet] in itemize environments
                                         handle_ert(os, "[", context);
                                         os << s;
@@ -610,13 +699,13 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                                 } else if (s.size()) {
                                         // The space is needed to separate the item from the rest of the sentence.
                                         os << s << ' ';
+                                       p.skip_spaces();
                                 }
                         }
                 }
  
                 else if (t.cs() == "bibitem") {
-                       context.need_layout = true;
-                       context.has_item = true;
+                       context.set_item();
                         context.check_layout(os);
                         os << "\\bibitem ";
                         os << p.getOpt();
@@ -624,6 +713,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                 }
  
                 else if (t.cs() == "def") {
+                       p.skip_spaces();
                         context.check_layout(os);
                         string name = p.get_token().cs();
                         while (p.next_token().cat() != catBegin)
@@ -631,20 +721,14 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         handle_ert(os, "\\def\\" + name + '{' + p.verbatim_item() + '}', context);
                 }
  
-               else if (t.cs() == "par") {
+               else if (t.cs() == "noindent") {
                         p.skip_spaces();
-                       context.check_end_layout(os);
-                       context.need_layout = true;
+                       context.extra_stuff += "\\noindent ";
                 }
  
                 else if (t.cs() == "appendix") {
-                       context.check_end_layout(os);
-                       Context newcontext(true, context.textclass, context.layout,
-                                       context.layout);
-                       newcontext.check_layout(os);
-                       os << "\\start_of_appendix\n";
-                       parse_text(p, os, FLAG_END, outer, newcontext);
-                       newcontext.check_end_layout(os);
+                       p.skip_spaces();
+                       context.extra_stuff += "\\start_of_appendix ";
                 }
  
                 // Must attempt to parse "Section*" before "Section".
@@ -655,12 +739,14 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                          newlayout->isCommand()) {
                         p.get_token();
                         output_command_layout(os, p, outer, context, newlayout);
+                       p.skip_spaces();
                 }
  
                 // The single '=' is meant here.
                 else if ((newlayout = findLayout(context.textclass, t.cs())).get() &&
                          newlayout->isCommand()) {
                         output_command_layout(os, p, outer, context, newlayout);
+                       p.skip_spaces();
                 }
  
                 else if (t.cs() == "includegraphics") {
@@ -763,22 +849,25 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                 }
  
                 else if (t.cs() == "footnote") {
+                       p.skip_spaces();
                         context.check_layout(os);
                         begin_inset(os, "Foot\n");
-                       os << "collapsed true\n";
+                       os << "collapsed true\n\n";
                         parse_text_in_inset(p, os, FLAG_ITEM, false, context);
                         end_inset(os);
                 }
  
                 else if (t.cs() == "marginpar") {
+                       p.skip_spaces();
                         context.check_layout(os);
                         begin_inset(os, "Marginal\n");
-                       os << "collapsed true\n";
+                       os << "collapsed true\n\n";
                         parse_text_in_inset(p, os, FLAG_ITEM, false, context);
                         end_inset(os);
                 }
  
                 else if (t.cs() == "ensuremath") {
+                       p.skip_spaces();
                         context.check_layout(os);
                         Context newcontext(false, context.textclass);
                         string s = parse_text(p, FLAG_ITEM, false, newcontext);
@@ -793,12 +882,16 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         context.check_layout(os);
                         os << "\n\\hfill\n";
                         skip_braces(p);
+                       p.skip_spaces();
                 }
  
-               else if (t.cs() == "makeindex" || t.cs() == "maketitle")
+               else if (t.cs() == "makeindex" || t.cs() == "maketitle") {
+                       p.skip_spaces();
                         skip_braces(p); // swallow this
+               }
  
                 else if (t.cs() == "tableofcontents") {
+                       p.skip_spaces();
                         context.check_layout(os);
                         begin_inset(os, "LatexCommand \\tableofcontents\n");
                         end_inset(os);
@@ -806,6 +899,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                 }
  
                 else if (t.cs() == "listoffigures") {
+                       p.skip_spaces();
                         context.check_layout(os);
                         begin_inset(os, "FloatList figure\n");
                         end_inset(os);
@@ -813,6 +907,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                 }
  
                 else if (t.cs() == "listoftables") {
+                       p.skip_spaces();
                         context.check_layout(os);
                         begin_inset(os, "FloatList table\n");
                         end_inset(os);
@@ -820,6 +915,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                 }
  
                 else if (t.cs() == "listof") {
+                       p.skip_spaces(true);
                         string const name = p.get_token().asString();
                         if (context.textclass.floats().typeExist(name)) {
                                 context.check_layout(os);
@@ -906,6 +1002,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
  
                 else if (is_known(t.cs(), known_quotes)) {
                         char const ** where = is_known(t.cs(), known_quotes);
+                       context.check_layout(os);
                         begin_inset(os, "Quotes ");
                         os << known_coded_quotes[where - known_quotes];
                         end_inset(os);
@@ -916,6 +1013,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         char const ** where = is_known(t.cs(), known_sizes);
                         context.check_layout(os);
                         os << "\n\\size " << known_coded_sizes[where - known_sizes] << "\n";
+                       p.skip_spaces();
                 }
  
                 else if (t.cs() == "LyX" || t.cs() == "TeX"
@@ -1096,6 +1194,35 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         end_inset(os);
                 }
  
+               else if ( t.cs() == "smallskip" ||
+                         t.cs() == "medskip" ||
+                         t.cs() == "bigskip" ||
+                         t.cs() == "vfill" ||
+                        (t.cs() == "vspace" && p.next_token().asInput() != "*")) {
+                       string arg;
+                       if (t.cs() == "vspace")
+                               arg = p.getArg('{', '}');
+                       else
+                               arg = t.cs();
+                       // We may only add the vspace to the current context if the
+                       // current paragraph is not empty.
+                       if (context.atParagraphStart()
+                           && (p.next_token().cat() != catNewline || p.next_token().cs().size() == 1)
+                           && (! (p.next_token().cat() == catEscape && p.next_token().cs() == "end"))
+                           && (! (p.next_token().cat() == catEscape && p.next_token().cs() == "par"))) {
+                               context.extra_stuff += "\\added_space_top " + arg + " ";
+                               p.skip_spaces();
+                       } else {
+                               if (t.cs() == "vspace")
+                                       handle_ert(os, t.asInput() + '{' + arg + '}', context);
+                               else
+                                       handle_ert(os, t.asInput(), context);
+                       }
+                       // Would be nice to recognize added_space_bottom too...
+                       // At the moment this is parsed as added_space_top of the
+                       // next paragraph.
+               }
+
                 else if (t.cs() == "psfrag") {
                         // psfrag{ps-text}[ps-pos][tex-pos]{tex-text}
                         // TODO: Generalize this!
@@ -1103,7 +1230,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         arguments += '}';
                         arguments += p.getOpt();
                         arguments += p.getOpt();
-                       p.skip_spaces();
                         handle_ert(os, "\\psfrag{" + arguments, context);
                 }
  
@@ -1122,7 +1248,13 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         handle_ert(os, s + ' ', context);
                         */
                         context.check_layout(os);
-                       handle_ert(os, t.asInput() + ' ', context);
+                       string name = t.asInput();
+                       if (p.next_token().asInput() == "*") {
+                               // Starred commands like \vspace*{}
+                               p.get_token();                          // Eat '*'
+                               name += '*';
+                       }
+                       handle_ert(os, name, context);
                 }
  
                 if (flags & FLAG_LEAVE) {
author	André Pönitz <poenitz@gmx.net>
	Wed, 5 Nov 2003 10:14:13 +0000 (10:14 +0000)
committer	André Pönitz <poenitz@gmx.net>
	Wed, 5 Nov 2003 10:14:13 +0000 (10:14 +0000)
src/tex2lyx/ChangeLog		patch \| blob \| history
src/tex2lyx/context.C		patch \| blob \| history
src/tex2lyx/context.h		patch \| blob \| history
src/tex2lyx/math.C		patch \| blob \| history
src/tex2lyx/preamble.C		patch \| blob \| history
src/tex2lyx/table.C		patch \| blob \| history
src/tex2lyx/tex2lyx.C		patch \| blob \| history
src/tex2lyx/tex2lyx.h		patch \| blob \| history
src/tex2lyx/texparser.C		patch \| blob \| history
src/tex2lyx/texparser.h		patch \| blob \| history
src/tex2lyx/text.C		patch \| blob \| history