From 93232c3ad63e80dc2851cb6710fb69b7f8f211c7 Mon Sep 17 00:00:00 2001
From: Georg Baum <Georg.Baum@post.rwth-aachen.de>
Date: Sun, 19 Dec 2010 14:54:23 +0000
Subject: [PATCH] Improve roundtrip of tex2lyx test documents: - Make
 test-insets.tex and test-structure.tex compilable - Avoid duplicate
 definition of \lyxarrow in test-insets.lyx - Prevent subscript package from
 being ignored in test-insets.lyx - Prevent commands listed with optional arg
 in syntax.default from being   concatenated with the next word if no optional
 arg is given - Handle spaces and comments inbetween a command an "{}"
 consistently

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@36943 a592a061-630c-0410-9148-cb99ea01b6c8
---
 src/tex2lyx/Parser.cpp              | 22 +++++---
 src/tex2lyx/Parser.h                | 17 ++++--
 src/tex2lyx/test/test-insets.tex    | 23 +++++++-
 src/tex2lyx/test/test-structure.tex | 18 +++++++
 src/tex2lyx/text.cpp                | 81 +++++++++++++++++++----------
 5 files changed, 120 insertions(+), 41 deletions(-)

diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp
index 0b6bbe2db2..d387c6fcba 100644
--- a/src/tex2lyx/Parser.cpp
+++ b/src/tex2lyx/Parser.cpp
@@ -215,20 +215,24 @@ bool Parser::isParagraph()
 }
 
 
-void Parser::skip_spaces(bool skip_comments)
+bool Parser::skip_spaces(bool skip_comments)
 {
 	// We just silently return if we have no more tokens.
 	// skip_spaces() should be callable at any time,
 	// the caller must check p::good() anyway.
+	bool skipped = false;
 	while (good()) {
 		get_token();
 		if (isParagraph()) {
 			putback();
 			break;
 		}
-		if ( curr_token().cat() == catSpace ||
-		     curr_token().cat() == catNewline ||
-		    (curr_token().cat() == catComment && curr_token().cs().empty()))
+		if (curr_token().cat() == catSpace ||
+		    curr_token().cat() == catNewline) {
+			skipped = true;
+			continue;
+		}
+		if ((curr_token().cat() == catComment && curr_token().cs().empty()))
 			continue;
 		if (skip_comments && curr_token().cat() == catComment)
 			cerr << "  Ignoring comment: " << curr_token().asInput();
@@ -237,6 +241,7 @@ void Parser::skip_spaces(bool skip_comments)
 			break;
 		}
 	}
+	return skipped;
 }
 
 
@@ -325,10 +330,15 @@ string Parser::getFullOpt()
 }
 
 
-string Parser::getOpt()
+string Parser::getOpt(bool keepws)
 {
 	string const res = getArg('[', ']');
-	return res.empty() ? string() : '[' + res + ']';
+	if (res.empty()) {
+		if (keepws)
+			unskip_spaces(true);
+		return string();
+	}
+	return '[' + res + ']';
 }
 
 
diff --git a/src/tex2lyx/Parser.h b/src/tex2lyx/Parser.h
index af4e030a29..93e29c17ed 100644
--- a/src/tex2lyx/Parser.h
+++ b/src/tex2lyx/Parser.h
@@ -153,15 +153,21 @@ public:
 	/*!
 	 * \returns getArg('[', ']') including the brackets or the
 	 * empty string if there is no such argument.
+	 * No whitespace is eaten if \p keepws is true and no optional
+	 * argument exists. This is important if an optional argument is
+	 * parsed that would go after a command in ERT: In this case the
+	 * whitespace is needed to separate the ERT from the subsequent
+	 * word. Without it, the ERT and the next word would be concatenated
+	 * during .tex export, thus creating an invalid command.
 	 */
-	std::string getOpt();
+	std::string getOpt(bool keepws = false);
 	/*!
-	 * \returns getFullArg('[', ']') including the parentheses or the
-	 * empty string if there is no such argument.
+	 * the same as getOpt but without the brackets
 	 */
 	std::string getOptContent();
 	/*!
-	 * the same as getOpt but without the brackets
+	 * \returns getFullArg('(', ')') including the parentheses or the
+	 * empty string if there is no such argument.
 	 */
 	std::string getFullParentheseArg();
 	/*!
@@ -192,7 +198,8 @@ public:
 	/// \return whether the current token starts a new paragraph
 	bool isParagraph();
 	/// skips spaces (and comments if \p skip_comments is true)
-	void skip_spaces(bool skip_comments = false);
+	/// \return whether whitespace was skipped (not comments)
+	bool skip_spaces(bool skip_comments = false);
 	/// puts back spaces (and comments if \p skip_comments is true)
 	void unskip_spaces(bool skip_comments = false);
 	///
diff --git a/src/tex2lyx/test/test-insets.tex b/src/tex2lyx/test/test-insets.tex
index ffaa36012f..bfdd9429fd 100644
--- a/src/tex2lyx/test/test-insets.tex
+++ b/src/tex2lyx/test/test-insets.tex
@@ -1,3 +1,5 @@
+%% LyX trick_preamble_code_into_believing_that_this_was_created_by_lyx created this file.  For more info, see http://www.lyx.org/.
+%% Do not edit unless you really know what you are doing.
 \documentclass[a4paper,12pt]{article}
 \usepackage[T1]{fontenc}
 \usepackage[latin9]{inputenc}
@@ -16,7 +18,7 @@
 \providecommand{\makenomenclature}{\makeglossary}
 \usepackage{varioref}
 \usepackage{prettyref}
-\usepackage{subscript}
+\usepackage{makeidx}
 
 \usepackage{graphicx}
 
@@ -27,6 +29,12 @@
 
 \newcommand{\lyxarrow}{\leavevmode\,$\triangleright$\,\allowbreak}
 
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% User specified LaTeX commands.
+\usepackage{subscript} % user specified as long as tex2lyx
+% produces a format less than 408
+
+\def\mycommand{\textquestiondown}
+
 \begin{document}
 
 \tableofcontents
@@ -88,7 +96,8 @@ Now the natbib things:
 \section{Input files\index{Input files}}
 
 We can input files too, like this \input{DummyDocument}, or with the include
-variant \include{DummyDocument}
+variant \include{DummyDocument} % unfortunately, including the doc twice
+% generates a multiply defined label
 
 If you prefer verbatim input, you can choose
 between~\verbatiminput{foo} or~\verbatiminput*{foo}.
@@ -155,6 +164,16 @@ And what about special characters like hyphe\-nation mark,
 ellipsis\ldots, and end-of-sentence\@. LyX also supports a menu
 separator\lyxarrow{}and a spif\textcompwordmark{}fy ligature break.
 
+Test for whitespace handling of commands: The following lines should
+result in identical output:
+
+builtin \textasciicircum{} unicodesymbols \j{} user \mycommand{} xx\par
+builtin \textasciicircum {} unicodesymbols \j {} user \mycommand{} xx\par
+builtin \textasciicircum % with a comment
+{} unicodesymbols \j % and a second one
+{} user \mycommand % and another
+{} xx
+
 A sub\textsubscript{sc\emph{ript}} and super\textsuperscript{script
 with $a^2+b^2=c^2$ math}.
 
diff --git a/src/tex2lyx/test/test-structure.tex b/src/tex2lyx/test/test-structure.tex
index 9cae8ca327..68589858a5 100644
--- a/src/tex2lyx/test/test-structure.tex
+++ b/src/tex2lyx/test/test-structure.tex
@@ -1,5 +1,23 @@
+%% LyX trick_preamble_code_into_believing_that_this_was_created_by_lyx created this file.  For more info, see http://www.lyx.org/.
+%% Do not edit unless you really know what you are doing.
 \documentclass[legalpaper]{article}
 
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% LyX specific LaTeX commands.
+
+\newcommand{\noun}[1]{\textsc{#1}}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Textclass specific LaTeX commands.
+
+\newenvironment{lyxlist}[1]
+	{\begin{list}{}
+		{\settowidth{\labelwidth}{#1}
+		\setlength{\leftmargin}{\labelwidth}
+		\addtolength{\leftmargin}{\labelsep}
+		\renewcommand{\makelabel}[1]{##1\hfil}}}
+	{\end{list}}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% User specified LaTeX commands.
+
 \newenvironment{foo}{==[}{]==}
 
 \begin{document}
diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp
index 60c6135af5..b5e02d5a79 100644
--- a/src/tex2lyx/text.cpp
+++ b/src/tex2lyx/text.cpp
@@ -371,16 +371,17 @@ void end_inset(ostream & os)
 }
 
 
-void skip_braces(Parser & p)
+bool skip_braces(Parser & p)
 {
 	if (p.next_token().cat() != catBegin)
-		return;
+		return false;
 	p.get_token();
 	if (p.next_token().cat() == catEnd) {
 		p.get_token();
-		return;
+		return true;
 	}
 	p.putback();
+	return false;
 }
 
 
@@ -441,6 +442,33 @@ Layout const * findLayout(TextClass const & textclass, string const & name)
 void eat_whitespace(Parser &, ostream &, Context &, bool);
 
 
+/*!
+ * Skips whitespace and braces.
+ * This should be called after a command has been parsed that is not put into
+ * ERT, and where LyX adds "{}" if needed.
+ */
+void skip_spaces_braces(Parser & p)
+{
+	/* The following four examples produce the same typeset output and
+	   should be handled by this function:
+	   - abc \j{} xyz
+	   - abc \j {} xyz
+	   - abc \j 
+	     {} xyz
+	   - abc \j %comment
+	     {} xyz
+	 */
+	// Unfortunately we need to skip comments, too.
+	// We can't use eat_whitespace since writing them after the {}
+	// results in different output in some cases.
+	bool const skipped_spaces = p.skip_spaces(true);
+	bool const skipped_braces = skip_braces(p);
+	if (skipped_spaces && !skipped_braces)
+		// put back the space (it is better handled by check_space)
+		p.unskip_spaces(true);
+}
+
+
 void output_command_layout(ostream & os, Parser & p, bool outer,
 			   Context & parent_context,
 			   Layout const * newlayout)
@@ -560,7 +588,8 @@ void parse_arguments(string const & command,
 			ert += '{' + p.verbatim_item() + '}';
 			break;
 		case optional:
-			ert += p.getOpt();
+			// true because we must not eat whitespace
+			ert += p.getOpt(true);
 			break;
 		}
 	}
@@ -1850,32 +1879,29 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 		else if (t.cs() == "makeindex" || t.cs() == "maketitle") {
 			// FIXME: Somehow prevent title layouts if
 			// "maketitle" was not found
-			p.skip_spaces();
-			skip_braces(p); // swallow this
+			// swallow this
+			skip_spaces_braces(p);
 		}
 
 		else if (t.cs() == "tableofcontents") {
-			p.skip_spaces();
 			context.check_layout(os);
 			begin_command_inset(os, "toc", "tableofcontents");
 			end_inset(os);
-			skip_braces(p); // swallow this
+			skip_spaces_braces(p);
 		}
 
 		else if (t.cs() == "listoffigures") {
-			p.skip_spaces();
 			context.check_layout(os);
 			begin_inset(os, "FloatList figure\n");
 			end_inset(os);
-			skip_braces(p); // swallow this
+			skip_spaces_braces(p);
 		}
 
 		else if (t.cs() == "listoftables") {
-			p.skip_spaces();
 			context.check_layout(os);
 			begin_inset(os, "FloatList table\n");
 			end_inset(os);
-			skip_braces(p); // swallow this
+			skip_spaces_braces(p);
 		}
 
 		else if (t.cs() == "listof") {
@@ -2153,14 +2179,14 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			context.check_layout(os);
 			begin_command_inset(os, "index_print", "printindex");
 			end_inset(os);
-			skip_braces(p);
+			skip_spaces_braces(p);
 		}
 
 		else if (t.cs() == "printnomenclature") {
 			context.check_layout(os);
 			begin_command_inset(os, "nomencl_print", "printnomenclature");
 			end_inset(os);
-			skip_braces(p);
+			skip_spaces_braces(p);
 		}
 
 		else if (t.cs() == "url") {
@@ -2312,37 +2338,37 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			 || t.cs() == "LaTeX") {
 			context.check_layout(os);
 			os << t.cs();
-			skip_braces(p); // eat {}
+			skip_spaces_braces(p);
 		}
 
 		else if (t.cs() == "LaTeXe") {
 			context.check_layout(os);
 			os << "LaTeX2e";
-			skip_braces(p); // eat {}
+			skip_spaces_braces(p);
 		}
 
 		else if (t.cs() == "ldots") {
 			context.check_layout(os);
-			skip_braces(p);
 			os << "\\SpecialChar \\ldots{}\n";
+			skip_spaces_braces(p);
 		}
 
 		else if (t.cs() == "lyxarrow") {
 			context.check_layout(os);
 			os << "\\SpecialChar \\menuseparator\n";
-			skip_braces(p);
+			skip_spaces_braces(p);
 		}
 
 		else if (t.cs() == "textcompwordmark") {
 			context.check_layout(os);
 			os << "\\SpecialChar \\textcompwordmark{}\n";
-			skip_braces(p);
+			skip_spaces_braces(p);
 		}
 
 		else if (LYX_FORMAT >= 307 && t.cs() == "slash") {
 			context.check_layout(os);
 			os << "\\SpecialChar \\slash{}\n";
-			skip_braces(p);
+			skip_spaces_braces(p);
 		}
 
 		else if (LYX_FORMAT >= 307 && t.cs() == "nobreakdash") {
@@ -2370,19 +2396,19 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 		else if (t.cs() == "textasciitilde") {
 			context.check_layout(os);
 			os << '~';
-			skip_braces(p);
+			skip_spaces_braces(p);
 		}
 
 		else if (t.cs() == "textasciicircum") {
 			context.check_layout(os);
 			os << '^';
-			skip_braces(p);
+			skip_spaces_braces(p);
 		}
 
 		else if (t.cs() == "textbackslash") {
 			context.check_layout(os);
 			os << "\n\\backslash\n";
-			skip_braces(p);
+			skip_spaces_braces(p);
 		}
 
 		else if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#"
@@ -2461,7 +2487,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 		else if (t.cs() == "newline") {
 			context.check_layout(os);
 			os << "\n\\" << t.cs() << "\n";
-			skip_braces(p); // eat {}
+			skip_spaces_braces(p);
 		}
 
 		else if (t.cs() == "input" || t.cs() == "include"
@@ -2577,7 +2603,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			begin_inset(os, "VSpace ");
 			os << t.cs();
 			end_inset(os);
-			skip_braces(p);
+			skip_spaces_braces(p);
 		}
 
 		else if (is_known(t.cs(), known_spaces)) {
@@ -2604,7 +2630,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			t.cs() == "cleardoublepage") {
 			context.check_layout(os);
 			os << "\n\\" << t.cs() << "\n";
-			skip_braces(p); // eat {}
+			skip_spaces_braces(p);
 		}
 
 		else if (t.cs() == "newcommand" ||
@@ -2726,8 +2752,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 					     << "+" << to_utf8(rem) << endl;
 				context.check_layout(os);
 				os << to_utf8(s);
-				p.skip_spaces();
-				skip_braces(p); // eat {}
+				skip_spaces_braces(p);
 			}
 			//cerr << "#: " << t << " mode: " << mode << endl;
 			// heuristic: read up to next non-nested space
-- 
2.39.2