Improve roundtrip of tex2lyx test documents:

author Georg Baum <Georg.Baum@post.rwth-aachen.de>

Sun, 19 Dec 2010 14:54:23 +0000 (14:54 +0000)

committer Georg Baum <Georg.Baum@post.rwth-aachen.de>

Sun, 19 Dec 2010 14:54:23 +0000 (14:54 +0000)
author Georg Baum <Georg.Baum@post.rwth-aachen.de>
Sun, 19 Dec 2010 14:54:23 +0000 (14:54 +0000)
committer Georg Baum <Georg.Baum@post.rwth-aachen.de>
Sun, 19 Dec 2010 14:54:23 +0000 (14:54 +0000)
diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp

index 0b6bbe2db2c28f9838a4839ca16af5c26fd6cfc7..d387c6fcba89cfc2520a401ef1346794070fc08e 100644 (file)
--- a/src/tex2lyx/Parser.cpp
+++ b/src/tex2lyx/Parser.cpp
@@ -215,20 +215,24 @@ bool Parser::isParagraph()
  }
  
  
-void Parser::skip_spaces(bool skip_comments)
+bool Parser::skip_spaces(bool skip_comments)
  {
         // We just silently return if we have no more tokens.
         // skip_spaces() should be callable at any time,
         // the caller must check p::good() anyway.
+       bool skipped = false;
         while (good()) {
                 get_token();
                 if (isParagraph()) {
                         putback();
                         break;
                 }
-               if ( curr_token().cat() == catSpace ||
-                    curr_token().cat() == catNewline ||
-                   (curr_token().cat() == catComment && curr_token().cs().empty()))
+               if (curr_token().cat() == catSpace ||
+                   curr_token().cat() == catNewline) {
+                       skipped = true;
+                       continue;
+               }
+               if ((curr_token().cat() == catComment && curr_token().cs().empty()))
                         continue;
                 if (skip_comments && curr_token().cat() == catComment)
                         cerr << "  Ignoring comment: " << curr_token().asInput();
@@ -237,6 +241,7 @@ void Parser::skip_spaces(bool skip_comments)
                         break;
                 }
         }
+       return skipped;
  }
  
  
@@ -325,10 +330,15 @@ string Parser::getFullOpt()
  }
  
  
-string Parser::getOpt()
+string Parser::getOpt(bool keepws)
  {
         string const res = getArg('[', ']');
-       return res.empty() ? string() : '[' + res + ']';
+       if (res.empty()) {
+               if (keepws)
+                       unskip_spaces(true);
+               return string();
+       }
+       return '[' + res + ']';
  }
  
  
diff --git a/src/tex2lyx/Parser.h b/src/tex2lyx/Parser.h

index af4e030a29c268a4c2571219984ce44963327443..93e29c17ed8ad75e18fc4a18e9e9d730222a87c8 100644 (file)
--- a/src/tex2lyx/Parser.h
+++ b/src/tex2lyx/Parser.h
@@ -153,15 +153,21 @@ public:
         /*!
          * \returns getArg('[', ']') including the brackets or the
          * empty string if there is no such argument.
+        * No whitespace is eaten if \p keepws is true and no optional
+        * argument exists. This is important if an optional argument is
+        * parsed that would go after a command in ERT: In this case the
+        * whitespace is needed to separate the ERT from the subsequent
+        * word. Without it, the ERT and the next word would be concatenated
+        * during .tex export, thus creating an invalid command.
          */
-       std::string getOpt();
+       std::string getOpt(bool keepws = false);
         /*!
-        * \returns getFullArg('[', ']') including the parentheses or the
-        * empty string if there is no such argument.
+        * the same as getOpt but without the brackets
          */
         std::string getOptContent();
         /*!
-        * the same as getOpt but without the brackets
+        * \returns getFullArg('(', ')') including the parentheses or the
+        * empty string if there is no such argument.
          */
         std::string getFullParentheseArg();
         /*!
@@ -192,7 +198,8 @@ public:
         /// \return whether the current token starts a new paragraph
         bool isParagraph();
         /// skips spaces (and comments if \p skip_comments is true)
-       void skip_spaces(bool skip_comments = false);
+       /// \return whether whitespace was skipped (not comments)
+       bool skip_spaces(bool skip_comments = false);
         /// puts back spaces (and comments if \p skip_comments is true)
         void unskip_spaces(bool skip_comments = false);
         ///
diff --git a/src/tex2lyx/test/test-insets.tex b/src/tex2lyx/test/test-insets.tex

index ffaa36012fa706ffd581cb732b037f0fff625c27..bfdd9429fdcb6dfb652e447533f63f7d9b76aa88 100644 (file)
--- a/src/tex2lyx/test/test-insets.tex
+++ b/src/tex2lyx/test/test-insets.tex
@@ -1,3 +1,5 @@
+%% LyX trick_preamble_code_into_believing_that_this_was_created_by_lyx created this file.  For more info, see http://www.lyx.org/.
+%% Do not edit unless you really know what you are doing.
  \documentclass[a4paper,12pt]{article}
  \usepackage[T1]{fontenc}
  \usepackage[latin9]{inputenc}
@@ -16,7 +18,7 @@
  \providecommand{\makenomenclature}{\makeglossary}
  \usepackage{varioref}
  \usepackage{prettyref}
-\usepackage{subscript}
+\usepackage{makeidx}
  
  \usepackage{graphicx}
  
@@ -27,6 +29,12 @@
  
  \newcommand{\lyxarrow}{\leavevmode\,$\triangleright$\,\allowbreak}
  
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% User specified LaTeX commands.
+\usepackage{subscript} % user specified as long as tex2lyx
+% produces a format less than 408
+
+\def\mycommand{\textquestiondown}
+
  \begin{document}
  
  \tableofcontents
@@ -88,7 +96,8 @@ Now the natbib things:
  \section{Input files\index{Input files}}
  
  We can input files too, like this \input{DummyDocument}, or with the include
-variant \include{DummyDocument}
+variant \include{DummyDocument} % unfortunately, including the doc twice
+% generates a multiply defined label
  
  If you prefer verbatim input, you can choose
  between~\verbatiminput{foo} or~\verbatiminput*{foo}.
@@ -155,6 +164,16 @@ And what about special characters like hyphe\-nation mark,
  ellipsis\ldots, and end-of-sentence\@. LyX also supports a menu
  separator\lyxarrow{}and a spif\textcompwordmark{}fy ligature break.
  
+Test for whitespace handling of commands: The following lines should
+result in identical output:
+
+builtin \textasciicircum{} unicodesymbols \j{} user \mycommand{} xx\par
+builtin \textasciicircum {} unicodesymbols \j {} user \mycommand{} xx\par
+builtin \textasciicircum % with a comment
+{} unicodesymbols \j % and a second one
+{} user \mycommand % and another
+{} xx
+
  A sub\textsubscript{sc\emph{ript}} and super\textsuperscript{script
  with $a^2+b^2=c^2$ math}.
  
diff --git a/src/tex2lyx/test/test-structure.tex b/src/tex2lyx/test/test-structure.tex

index 9cae8ca327cf7107bd9c62500a79e6a1ad63c717..68589858a57968dc53f714df022056933d10410b 100644 (file)
--- a/src/tex2lyx/test/test-structure.tex
+++ b/src/tex2lyx/test/test-structure.tex
@@ -1,5 +1,23 @@
+%% LyX trick_preamble_code_into_believing_that_this_was_created_by_lyx created this file.  For more info, see http://www.lyx.org/.
+%% Do not edit unless you really know what you are doing.
  \documentclass[legalpaper]{article}
  
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% LyX specific LaTeX commands.
+
+\newcommand{\noun}[1]{\textsc{#1}}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Textclass specific LaTeX commands.
+
+\newenvironment{lyxlist}[1]
+       {\begin{list}{}
+               {\settowidth{\labelwidth}{#1}
+               \setlength{\leftmargin}{\labelwidth}
+               \addtolength{\leftmargin}{\labelsep}
+               \renewcommand{\makelabel}[1]{##1\hfil}}}
+       {\end{list}}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% User specified LaTeX commands.
+
  \newenvironment{foo}{==[}{]==}
  
  \begin{document}
diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp

index 60c6135af547ba3b2d6a8da479043d8bcfd5d50c..b5e02d5a79e1e057e5169818bec7764fa8c62b2b 100644 (file)
--- a/src/tex2lyx/text.cpp
+++ b/src/tex2lyx/text.cpp
@@ -371,16 +371,17 @@ void end_inset(ostream & os)
  }
  
  
-void skip_braces(Parser & p)
+bool skip_braces(Parser & p)
  {
         if (p.next_token().cat() != catBegin)
-               return;
+               return false;
         p.get_token();
         if (p.next_token().cat() == catEnd) {
                 p.get_token();
-               return;
+               return true;
         }
         p.putback();
+       return false;
  }
  
  
@@ -441,6 +442,33 @@ Layout const * findLayout(TextClass const & textclass, string const & name)
  void eat_whitespace(Parser &, ostream &, Context &, bool);
  
  
+/*!
+ * Skips whitespace and braces.
+ * This should be called after a command has been parsed that is not put into
+ * ERT, and where LyX adds "{}" if needed.
+ */
+void skip_spaces_braces(Parser & p)
+{
+       /* The following four examples produce the same typeset output and
+          should be handled by this function:
+          - abc \j{} xyz
+          - abc \j {} xyz
+          - abc \j 
+            {} xyz
+          - abc \j %comment
+            {} xyz
+        */
+       // Unfortunately we need to skip comments, too.
+       // We can't use eat_whitespace since writing them after the {}
+       // results in different output in some cases.
+       bool const skipped_spaces = p.skip_spaces(true);
+       bool const skipped_braces = skip_braces(p);
+       if (skipped_spaces && !skipped_braces)
+               // put back the space (it is better handled by check_space)
+               p.unskip_spaces(true);
+}
+
+
  void output_command_layout(ostream & os, Parser & p, bool outer,
                            Context & parent_context,
                            Layout const * newlayout)
@@ -560,7 +588,8 @@ void parse_arguments(string const & command,
                         ert += '{' + p.verbatim_item() + '}';
                         break;
                 case optional:
-                       ert += p.getOpt();
+                       // true because we must not eat whitespace
+                       ert += p.getOpt(true);
                         break;
                 }
         }
@@ -1850,32 +1879,29 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                 else if (t.cs() == "makeindex" || t.cs() == "maketitle") {
                         // FIXME: Somehow prevent title layouts if
                         // "maketitle" was not found
-                       p.skip_spaces();
-                       skip_braces(p); // swallow this
+                       // swallow this
+                       skip_spaces_braces(p);
                 }
  
                 else if (t.cs() == "tableofcontents") {
-                       p.skip_spaces();
                         context.check_layout(os);
                         begin_command_inset(os, "toc", "tableofcontents");
                         end_inset(os);
-                       skip_braces(p); // swallow this
+                       skip_spaces_braces(p);
                 }
  
                 else if (t.cs() == "listoffigures") {
-                       p.skip_spaces();
                         context.check_layout(os);
                         begin_inset(os, "FloatList figure\n");
                         end_inset(os);
-                       skip_braces(p); // swallow this
+                       skip_spaces_braces(p);
                 }
  
                 else if (t.cs() == "listoftables") {
-                       p.skip_spaces();
                         context.check_layout(os);
                         begin_inset(os, "FloatList table\n");
                         end_inset(os);
-                       skip_braces(p); // swallow this
+                       skip_spaces_braces(p);
                 }
  
                 else if (t.cs() == "listof") {
@@ -2153,14 +2179,14 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         context.check_layout(os);
                         begin_command_inset(os, "index_print", "printindex");
                         end_inset(os);
-                       skip_braces(p);
+                       skip_spaces_braces(p);
                 }
  
                 else if (t.cs() == "printnomenclature") {
                         context.check_layout(os);
                         begin_command_inset(os, "nomencl_print", "printnomenclature");
                         end_inset(os);
-                       skip_braces(p);
+                       skip_spaces_braces(p);
                 }
  
                 else if (t.cs() == "url") {
@@ -2312,37 +2338,37 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                          || t.cs() == "LaTeX") {
                         context.check_layout(os);
                         os << t.cs();
-                       skip_braces(p); // eat {}
+                       skip_spaces_braces(p);
                 }
  
                 else if (t.cs() == "LaTeXe") {
                         context.check_layout(os);
                         os << "LaTeX2e";
-                       skip_braces(p); // eat {}
+                       skip_spaces_braces(p);
                 }
  
                 else if (t.cs() == "ldots") {
                         context.check_layout(os);
-                       skip_braces(p);
                         os << "\\SpecialChar \\ldots{}\n";
+                       skip_spaces_braces(p);
                 }
  
                 else if (t.cs() == "lyxarrow") {
                         context.check_layout(os);
                         os << "\\SpecialChar \\menuseparator\n";
-                       skip_braces(p);
+                       skip_spaces_braces(p);
                 }
  
                 else if (t.cs() == "textcompwordmark") {
                         context.check_layout(os);
                         os << "\\SpecialChar \\textcompwordmark{}\n";
-                       skip_braces(p);
+                       skip_spaces_braces(p);
                 }
  
                 else if (LYX_FORMAT >= 307 && t.cs() == "slash") {
                         context.check_layout(os);
                         os << "\\SpecialChar \\slash{}\n";
-                       skip_braces(p);
+                       skip_spaces_braces(p);
                 }
  
                 else if (LYX_FORMAT >= 307 && t.cs() == "nobreakdash") {
@@ -2370,19 +2396,19 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                 else if (t.cs() == "textasciitilde") {
                         context.check_layout(os);
                         os << '~';
-                       skip_braces(p);
+                       skip_spaces_braces(p);
                 }
  
                 else if (t.cs() == "textasciicircum") {
                         context.check_layout(os);
                         os << '^';
-                       skip_braces(p);
+                       skip_spaces_braces(p);
                 }
  
                 else if (t.cs() == "textbackslash") {
                         context.check_layout(os);
                         os << "\n\\backslash\n";
-                       skip_braces(p);
+                       skip_spaces_braces(p);
                 }
  
                 else if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#"
@@ -2461,7 +2487,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                 else if (t.cs() == "newline") {
                         context.check_layout(os);
                         os << "\n\\" << t.cs() << "\n";
-                       skip_braces(p); // eat {}
+                       skip_spaces_braces(p);
                 }
  
                 else if (t.cs() == "input" || t.cs() == "include"
@@ -2577,7 +2603,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         begin_inset(os, "VSpace ");
                         os << t.cs();
                         end_inset(os);
-                       skip_braces(p);
+                       skip_spaces_braces(p);
                 }
  
                 else if (is_known(t.cs(), known_spaces)) {
@@ -2604,7 +2630,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         t.cs() == "cleardoublepage") {
                         context.check_layout(os);
                         os << "\n\\" << t.cs() << "\n";
-                       skip_braces(p); // eat {}
+                       skip_spaces_braces(p);
                 }
  
                 else if (t.cs() == "newcommand" ||
@@ -2726,8 +2752,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                                              << "+" << to_utf8(rem) << endl;
                                 context.check_layout(os);
                                 os << to_utf8(s);
-                               p.skip_spaces();
-                               skip_braces(p); // eat {}
+                               skip_spaces_braces(p);
                         }
                         //cerr << "#: " << t << " mode: " << mode << endl;
                         // heuristic: read up to next non-nested space
author	Georg Baum <Georg.Baum@post.rwth-aachen.de>
	Sun, 19 Dec 2010 14:54:23 +0000 (14:54 +0000)
committer	Georg Baum <Georg.Baum@post.rwth-aachen.de>
	Sun, 19 Dec 2010 14:54:23 +0000 (14:54 +0000)
src/tex2lyx/Parser.cpp		patch \| blob \| history
src/tex2lyx/Parser.h		patch \| blob \| history
src/tex2lyx/test/test-insets.tex		patch \| blob \| history
src/tex2lyx/test/test-structure.tex		patch \| blob \| history
src/tex2lyx/text.cpp		patch \| blob \| history