improve tex2lyx paragraph and comment handling

author Georg Baum <Georg.Baum@post.rwth-aachen.de>

Fri, 18 Jun 2004 06:47:19 +0000 (06:47 +0000)

committer Georg Baum <Georg.Baum@post.rwth-aachen.de>

Fri, 18 Jun 2004 06:47:19 +0000 (06:47 +0000)
author Georg Baum <Georg.Baum@post.rwth-aachen.de>
Fri, 18 Jun 2004 06:47:19 +0000 (06:47 +0000)
committer Georg Baum <Georg.Baum@post.rwth-aachen.de>
Fri, 18 Jun 2004 06:47:19 +0000 (06:47 +0000)
diff --git a/src/insets/ChangeLog b/src/insets/ChangeLog

index 93eb3d08b14f7024251883f37589184035bbb9ad..46845da6b3aff8c9f42bd2182d18b08ca61789fb 100644 (file)
--- a/src/insets/ChangeLog
+++ b/src/insets/ChangeLog
@@ -1,3 +1,7 @@
+2004-06-18  Georg Baum  <Georg.Baum@post.rwth-aachen.de>
+
+       * insetgraphics.C, insettabular.C: s/wether/whether/g
+
  2004-06-10  Georg Baum  <Georg.Baum@post.rwth-aachen.de>
  
         * insetgraphics.C (stripExtension): new
diff --git a/src/insets/insetgraphics.C b/src/insets/insetgraphics.C

index 043583f31a9f710fa59e6fa06a994798fcd4ba55..fe73b7ff06fffd99b30d67401a35ae57d9e3e5d6 100644 (file)
--- a/src/insets/insetgraphics.C
+++ b/src/insets/insetgraphics.C
@@ -477,7 +477,7 @@ string const InsetGraphics::prepareFile(Buffer const & buf,
  
         if (zipped) {
                 if (params().noUnzip) {
-                       // We don't know wether latex can actually handle
+                       // We don't know whether latex can actually handle
                         // this file, but we can't check, because that would
                         // mean to unzip the file and thereby making the
                         // noUnzip parameter meaningless.
diff --git a/src/insets/insettabular.C b/src/insets/insettabular.C

index edbe8a6277b222b707f85b7c9fb2ff99ab4baa89..91be3b9d370ac60e70342eb237d794dc92db17cf 100644 (file)
--- a/src/insets/insettabular.C
+++ b/src/insets/insettabular.C
@@ -1324,7 +1324,7 @@ void InsetTabular::tabularFeatures(LCursor & cur,
  #if 0
                 // just multicol for one Single Cell
                 if (!hasSelection()) {
-                       // check wether we are completly in a multicol
+                       // check whether we are completly in a multicol
                         if (tabular.isMultiColumn(actcell))
                                 tabular.unsetMultiColumn(actcell);
                         else
diff --git a/src/tex2lyx/ChangeLog b/src/tex2lyx/ChangeLog

index c91081b401dbe3d2919111c5197f39f402babc80..72dcf2c355e1b00a79db7e8d07249ad08ec50972 100644 (file)
--- a/src/tex2lyx/ChangeLog
+++ b/src/tex2lyx/ChangeLog
@@ -1,3 +1,12 @@
+2004-06-18  Georg Baum  <Georg.Baum@post.rwth-aachen.de>
+
+       * preamble.C, text.C: s/wether/whether/g
+       * text.C (eat_whitespace): new method
+       * texparser.C (getArg): use always curr_token().asInput()
+       * texparser.[Ch] (isParagraph): new method
+       * texparser.C (skip_spaces): handle "\n +\n" correctly
+       * texparser.[Ch] (asMode): remove, since it is unused
+
  2004-05-27  Lars Gullik Bjonnes  <larsbj@gullik.net>
  
         * Makefile.am (BUILT_SOURCES): move lengthcommon.C from here...
diff --git a/src/tex2lyx/preamble.C b/src/tex2lyx/preamble.C

index 7d61b0aeeae5d06f670b7a8d6a6dd3e541dc2c67..874eb37859ccca78518d08351f03829c655aac7f 100644 (file)
--- a/src/tex2lyx/preamble.C
+++ b/src/tex2lyx/preamble.C
@@ -184,7 +184,7 @@ LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & force
         special_columns['D'] = 3;
         bool is_full_document = false;
  
-       // determine wether this is a full document or a fragment for inclusion
+       // determine whether this is a full document or a fragment for inclusion
         while (p.good()) {
                 Token const & t = p.get_token();
  
diff --git a/src/tex2lyx/tex2lyx.h b/src/tex2lyx/tex2lyx.h

index 97b6f66dd7f53eaecdc6a8c6efc3396b2bba3952..66a3c16f6b85270d15becaf23c7666dd51ba2e19 100644 (file)
--- a/src/tex2lyx/tex2lyx.h
+++ b/src/tex2lyx/tex2lyx.h
@@ -34,6 +34,7 @@ void parse_text(Parser & p, std::ostream & os, unsigned flags, bool outer,
  //std::string parse_text(Parser & p, unsigned flags, const bool outer,
  //                    Context & context);
  
+/// parses a subdocument, usually useful in insets (whence the name)
  void parse_text_in_inset(Parser & p, std::ostream & os, unsigned flags,
                          bool outer, Context & context);
  
diff --git a/src/tex2lyx/texparser.C b/src/tex2lyx/texparser.C

index 46bb0d957e6476242fcb4a1f0aaa17f0f33e73d6..ea69d460d49c311960521319a3610ea385fbb50a 100644 (file)
--- a/src/tex2lyx/texparser.C
+++ b/src/tex2lyx/texparser.C
@@ -61,16 +61,6 @@ void catInit()
  // catcodes
  //
  
-mode_type asMode(mode_type oldmode, string const & str)
-{
-       if (str == "mathmode")
-               return MATH_MODE;
-       if (str == "textmode" || str == "forcetext")
-               return TEXT_MODE;
-       return oldmode;
-}
-
-
  CatCode catcode(unsigned char c)
  {
         return theCatcode[c];
@@ -177,20 +167,45 @@ Token const & Parser::get_token()
  }
  
  
+bool Parser::isParagraph() const
+{
+       // A new paragraph in TeX ist started
+       // - either by a newline, following any amount of whitespace
+       //   characters (including zero), and another newline
+       // - or the token \par
+       if (curr_token().cat() == catNewline &&
+           (curr_token().cs().size() > 1 ||
+            (next_token().cat() == catSpace &&
+             pos_ < tokens_.size() - 1 &&
+             tokens_[pos_ + 1].cat() == catNewline)))
+               return true;
+       if (curr_token().cat() == catEscape && curr_token().cs() == "par")
+               return true;
+       return false;
+}
+
+
  void Parser::skip_spaces(bool skip_comments)
  {
         // We just silently return if we have no more tokens.
         // skip_spaces() should be callable at any time,
         // the caller must check p::good() anyway.
         while (good()) {
-               if ( next_token().cat() == catSpace ||
-                   (next_token().cat() == catNewline && next_token().cs().size() == 1) ||
-                    next_token().cat() == catComment && next_token().cs().empty())
-                       get_token();
-               else if (skip_comments && next_token().cat() == catComment)
-                       cerr << "  Ignoring comment: " << get_token().asInput();
-               else
+               get_token();
+               if (isParagraph()) {
+                       putback();
                         break;
+               }
+               if ( curr_token().cat() == catSpace ||
+                    curr_token().cat() == catNewline ||
+                   (curr_token().cat() == catComment && curr_token().cs().empty()))
+                       continue;
+               if (skip_comments && curr_token().cat() == catComment)
+                       cerr << "  Ignoring comment: " << curr_token().asInput();
+               else {
+                       putback();
+                       break;
+               }
         }
  }
  
@@ -253,10 +268,8 @@ string Parser::getArg(char left, char right)
                                 if (!curr_token().cs().empty())
                                         cerr << "Ignoring comment: " << curr_token().asInput();
                         }
-                       else if (curr_token().cat() == catSpace || curr_token().cat() == catNewline)
-                               result += curr_token().cs();
                         else
-                               result += c;
+                               result += curr_token().asInput();
                 }
  
         return result;
diff --git a/src/tex2lyx/texparser.h b/src/tex2lyx/texparser.h

index 9de3600bba50c4a89ec6345b05b4194c2146eea2..6ab9946bbb4b9c3bc0b606c0b4a7327044a367cd 100644 (file)
--- a/src/tex2lyx/texparser.h
+++ b/src/tex2lyx/texparser.h
@@ -79,13 +79,13 @@ public:
  
         ///
         std::string const & cs() const { return cs_; }
-       ///
+       /// Returns the catcode of the token
         CatCode cat() const { return cat_; }
         ///
         char character() const { return char_; }
-       ///
+       /// Returns the token as string
         std::string asString() const;
-       ///
+       /// Returns the token verbatim
         std::string asInput() const;
  
  private:
@@ -130,27 +130,29 @@ public:
         std::string getArg(char left, char right);
         /// getArg('[', ']') including the brackets
         std::string getOpt();
-       ///
+       /// Returns the character of the current token and increments the token position.
         char getChar();
         ///
         void error(std::string const & msg);
-       ///
+       /// Parses \p is into tokens
         void tokenize(std::istream & is);
         ///
         void push_back(Token const & t);
         ///
         void pop_back();
-       ///
+       /// The previous token.
         Token const & prev_token() const;
-       ///
+       /// The current token.
         Token const & curr_token() const;
-       ///
+       /// The next token.
         Token const & next_token() const;
         /// Make the next token current and return that.
         Token const & get_token();
-       /// skips spaces (and comments if \param skip_comments is true)
+       /// \return whether the current token starts a new paragraph
+       bool isParagraph() const;
+       /// skips spaces (and comments if \p skip_comments is true)
         void skip_spaces(bool skip_comments = false);
-       /// puts back spaces (and comments if \param skip_comments is true)
+       /// puts back spaces (and comments if \p skip_comments is true)
         void unskip_spaces(bool skip_comments = false);
         ///
         void lex(std::string const & s);
diff --git a/src/tex2lyx/text.C b/src/tex2lyx/text.C

index e6f4227e800bb4f0e586c2d944c4200b9e0c156f..93425ec6767dbb7828cbb6fa57c323165682cde4 100644 (file)
--- a/src/tex2lyx/text.C
+++ b/src/tex2lyx/text.C
@@ -144,8 +144,8 @@ bool splitLatexLength(string const & len, string & value, string & unit)
  }
  
  
-// A simple function to translate a latex length to something lyx can
-// understand. Not perfect, but rather best-effort.
+/// A simple function to translate a latex length to something lyx can
+/// understand. Not perfect, but rather best-effort.
  bool translate_len(string const & length, string & valstring, string & unit)
  {
         if (!splitLatexLength(length, valstring, unit))
@@ -313,6 +313,9 @@ LyXLayout_ptr findLayout(LyXTextClass const & textclass,
  }
  
  
+void eat_whitespace(Parser &, ostream &, Context &, bool);
+
+
  void output_command_layout(ostream & os, Parser & p, bool outer,
                            Context & parent_context,
                            LyXLayout_ptr newlayout)
@@ -323,13 +326,14 @@ void output_command_layout(ostream & os, Parser & p, bool outer,
         context.check_deeper(os);
         context.check_layout(os);
         if (context.layout->optionalargs > 0) {
-               p.skip_spaces();
+               eat_whitespace(p, os, context, false);
                 if (p.next_token().character() == '[') {
                         p.get_token(); // eat '['
                         begin_inset(os, "OptArg\n");
                         os << "status collapsed\n\n";
                         parse_text_in_inset(p, os, FLAG_BRACK_LAST, outer, context);
                         end_inset(os);
+                       eat_whitespace(p, os, context, false);
                 }
         }
         parse_text_snippet(p, os, FLAG_ITEM, outer, context);
@@ -378,7 +382,7 @@ void check_space(Parser const & p, ostream & os, Context & context)
  
  
  /*!
- * Check wether \param command is a known command. If yes,
+ * Check whether \p command is a known command. If yes,
   * handle the command with all arguments.
   * \return true if the command was parsed, false otherwise.
   */
@@ -522,8 +526,8 @@ void parse_environment(Parser & p, ostream & os, bool outer,
         string const name = p.getArg('{', '}');
         const bool is_starred = suffixIs(name, '*');
         string const unstarred_name = rtrim(name, "*");
+       eat_whitespace(p, os, parent_context, false);
         active_environments.push_back(name);
-       p.skip_spaces();
  
         if (is_math_env(name)) {
                 parent_context.check_layout(os);
@@ -651,9 +655,52 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                 p.skip_spaces();
  }
  
-} // anonymous namespace
+/// parses a comment and outputs it to \p os.
+void parse_comment(Parser & p, ostream & os, Token const & t, Context & context)
+{
+       BOOST_ASSERT(t.cat() == catComment);
+       context.check_layout(os);
+       if (!t.cs().empty()) {
+               handle_comment(os, '%' + t.cs(), context);
+               if (p.next_token().cat() == catNewline) {
+                       // A newline after a comment line starts a new
+                       // paragraph
+                       if(!context.atParagraphStart()) {
+                               // Only start a new paragraph if not already
+                               // done (we might get called recursively)
+                               context.new_paragraph(os);
+                       }
+                       eat_whitespace(p, os, context, true);
+               }
+       } else {
+               // "%\n" combination
+               p.skip_spaces();
+       }
+}
  
  
+/*!
+ * Reads spaces and comments until the first non-space, non-comment token.
+ * New paragraphs (double newlines or \\par) are handled like simple spaces
+ * if \p eatParagraph is true.
+ * Spaces are skipped, but comments are written to \p os.
+ */
+void eat_whitespace(Parser & p, ostream & os, Context & context,
+                    bool eatParagraph)
+{
+       while (p.good()) {
+               Token const & t = p.get_token();
+               if (t.cat() == catComment)
+                       parse_comment(p, os, t, context);
+               else if ((! eatParagraph && p.isParagraph()) ||
+                        (t.cat() != catSpace && t.cat() != catNewline)) {
+                       p.putback();
+                       return;
+               }
+       }
+}
+
+} // anonymous namespace
  
  
  void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
@@ -738,7 +785,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         skip_braces(p);
                 }
  
-               else if (t.cat() == catSpace || (t.cat() == catNewline && t.cs().size() == 1))
+               else if (t.cat() == catSpace || (t.cat() == catNewline && ! p.isParagraph()))
                         check_space(p, os, context);
  
                 else if (t.cat() == catLetter ||
@@ -749,9 +796,9 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         os << t.character();
                 }
  
-               else if (t.cat() == catNewline || (t.cat() == catEscape && t.cs() == "par")) {
-                       p.skip_spaces();
+               else if (p.isParagraph()) {
                         context.new_paragraph(os);
+                       eat_whitespace(p, os, context, true);
                 }
  
                 else if (t.cat() == catActive) {
@@ -792,20 +839,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         handle_ert(os, "}", context);
                 }
  
-               else if (t.cat() == catComment) {
-                       context.check_layout(os);
-                       if (!t.cs().empty()) {
-                               handle_comment(os, '%' + t.cs(), context);
-                               if (p.next_token().cat() == catNewline) {
-                                       // A newline after a comment line starts a new paragraph
-                                       context.new_paragraph(os);
-                                       p.skip_spaces();
-                               }
-                       } else {
-                               // "%\n" combination
-                               p.skip_spaces();
-                       }
-               }
+               else if (t.cat() == catComment)
+                       parse_comment(p, os, t, context);
  
                 //
                 // control sequences
@@ -865,7 +900,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                                 } else if (!s.empty()) {
                                         // The space is needed to separate the item from the rest of the sentence.
                                         os << s << ' ';
-                                       p.skip_spaces();
+                                       eat_whitespace(p, os, context, false);
                                 }
                         }
                 }
@@ -879,8 +914,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                 }
  
                 else if (t.cs() == "def") {
-                       p.skip_spaces();
                         context.check_layout(os);
+                       eat_whitespace(p, os, context, false);
                         string name = p.get_token().cs();
                         while (p.next_token().cat() != catBegin)
                                 name += p.get_token().asString();
@@ -1010,7 +1045,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         }
                         // TODO: Handle the unknown settings better.
                         // Warn about invalid options.
-                       // Check wether some option was given twice.
+                       // Check whether some option was given twice.
                         end_inset(os);
                 }
  
@@ -1046,7 +1081,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
  
                 else if (t.cs() == "hfill") {
                         context.check_layout(os);
-                       os << "\n\\hfill\n";
+                       os << "\n\\hfill \n";
                         skip_braces(p);
                         p.skip_spaces();
                 }
@@ -1172,6 +1207,10 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         begin_inset(os, "Quotes ");
                         os << known_coded_quotes[where - known_quotes];
                         end_inset(os);
+                       // LyX adds {} after the quote, so we have to eat
+                       // spaces here if there are any before a possible
+                       // {} pair.
+                       eat_whitespace(p, os, context, false);
                         skip_braces(p);
                 }
  
@@ -1179,7 +1218,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         char const ** where = is_known(t.cs(), known_sizes);
                         context.check_layout(os);
                         os << "\n\\size " << known_coded_sizes[where - known_sizes] << "\n";
-                       p.skip_spaces();
+                       eat_whitespace(p, os, context, false);
                 }
  
                 else if (t.cs() == "LyX" || t.cs() == "TeX"
@@ -1374,6 +1413,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         begin_inset(os, "VSpace ");
                         os << t.cs();
                         end_inset(os);
+                       skip_braces(p);
                 }
  
                 else if (t.cs() == "vspace") {
author	Georg Baum <Georg.Baum@post.rwth-aachen.de>
	Fri, 18 Jun 2004 06:47:19 +0000 (06:47 +0000)
committer	Georg Baum <Georg.Baum@post.rwth-aachen.de>
	Fri, 18 Jun 2004 06:47:19 +0000 (06:47 +0000)
src/insets/ChangeLog		patch \| blob \| history
src/insets/insetgraphics.C		patch \| blob \| history
src/insets/insettabular.C		patch \| blob \| history
src/tex2lyx/ChangeLog		patch \| blob \| history
src/tex2lyx/preamble.C		patch \| blob \| history
src/tex2lyx/tex2lyx.h		patch \| blob \| history
src/tex2lyx/texparser.C		patch \| blob \| history
src/tex2lyx/texparser.h		patch \| blob \| history
src/tex2lyx/text.C		patch \| blob \| history