From 93232c3ad63e80dc2851cb6710fb69b7f8f211c7 Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Sun, 19 Dec 2010 14:54:23 +0000 Subject: [PATCH] Improve roundtrip of tex2lyx test documents: - Make test-insets.tex and test-structure.tex compilable - Avoid duplicate definition of \lyxarrow in test-insets.lyx - Prevent subscript package from being ignored in test-insets.lyx - Prevent commands listed with optional arg in syntax.default from being concatenated with the next word if no optional arg is given - Handle spaces and comments inbetween a command an "{}" consistently git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@36943 a592a061-630c-0410-9148-cb99ea01b6c8 --- src/tex2lyx/Parser.cpp | 22 +++++--- src/tex2lyx/Parser.h | 17 ++++-- src/tex2lyx/test/test-insets.tex | 23 +++++++- src/tex2lyx/test/test-structure.tex | 18 +++++++ src/tex2lyx/text.cpp | 81 +++++++++++++++++++---------- 5 files changed, 120 insertions(+), 41 deletions(-) diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp index 0b6bbe2db2..d387c6fcba 100644 --- a/src/tex2lyx/Parser.cpp +++ b/src/tex2lyx/Parser.cpp @@ -215,20 +215,24 @@ bool Parser::isParagraph() } -void Parser::skip_spaces(bool skip_comments) +bool Parser::skip_spaces(bool skip_comments) { // We just silently return if we have no more tokens. // skip_spaces() should be callable at any time, // the caller must check p::good() anyway. + bool skipped = false; while (good()) { get_token(); if (isParagraph()) { putback(); break; } - if ( curr_token().cat() == catSpace || - curr_token().cat() == catNewline || - (curr_token().cat() == catComment && curr_token().cs().empty())) + if (curr_token().cat() == catSpace || + curr_token().cat() == catNewline) { + skipped = true; + continue; + } + if ((curr_token().cat() == catComment && curr_token().cs().empty())) continue; if (skip_comments && curr_token().cat() == catComment) cerr << " Ignoring comment: " << curr_token().asInput(); @@ -237,6 +241,7 @@ void Parser::skip_spaces(bool skip_comments) break; } } + return skipped; } @@ -325,10 +330,15 @@ string Parser::getFullOpt() } -string Parser::getOpt() +string Parser::getOpt(bool keepws) { string const res = getArg('[', ']'); - return res.empty() ? string() : '[' + res + ']'; + if (res.empty()) { + if (keepws) + unskip_spaces(true); + return string(); + } + return '[' + res + ']'; } diff --git a/src/tex2lyx/Parser.h b/src/tex2lyx/Parser.h index af4e030a29..93e29c17ed 100644 --- a/src/tex2lyx/Parser.h +++ b/src/tex2lyx/Parser.h @@ -153,15 +153,21 @@ public: /*! * \returns getArg('[', ']') including the brackets or the * empty string if there is no such argument. + * No whitespace is eaten if \p keepws is true and no optional + * argument exists. This is important if an optional argument is + * parsed that would go after a command in ERT: In this case the + * whitespace is needed to separate the ERT from the subsequent + * word. Without it, the ERT and the next word would be concatenated + * during .tex export, thus creating an invalid command. */ - std::string getOpt(); + std::string getOpt(bool keepws = false); /*! - * \returns getFullArg('[', ']') including the parentheses or the - * empty string if there is no such argument. + * the same as getOpt but without the brackets */ std::string getOptContent(); /*! - * the same as getOpt but without the brackets + * \returns getFullArg('(', ')') including the parentheses or the + * empty string if there is no such argument. */ std::string getFullParentheseArg(); /*! @@ -192,7 +198,8 @@ public: /// \return whether the current token starts a new paragraph bool isParagraph(); /// skips spaces (and comments if \p skip_comments is true) - void skip_spaces(bool skip_comments = false); + /// \return whether whitespace was skipped (not comments) + bool skip_spaces(bool skip_comments = false); /// puts back spaces (and comments if \p skip_comments is true) void unskip_spaces(bool skip_comments = false); /// diff --git a/src/tex2lyx/test/test-insets.tex b/src/tex2lyx/test/test-insets.tex index ffaa36012f..bfdd9429fd 100644 --- a/src/tex2lyx/test/test-insets.tex +++ b/src/tex2lyx/test/test-insets.tex @@ -1,3 +1,5 @@ +%% LyX trick_preamble_code_into_believing_that_this_was_created_by_lyx created this file. For more info, see http://www.lyx.org/. +%% Do not edit unless you really know what you are doing. \documentclass[a4paper,12pt]{article} \usepackage[T1]{fontenc} \usepackage[latin9]{inputenc} @@ -16,7 +18,7 @@ \providecommand{\makenomenclature}{\makeglossary} \usepackage{varioref} \usepackage{prettyref} -\usepackage{subscript} +\usepackage{makeidx} \usepackage{graphicx} @@ -27,6 +29,12 @@ \newcommand{\lyxarrow}{\leavevmode\,$\triangleright$\,\allowbreak} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% User specified LaTeX commands. +\usepackage{subscript} % user specified as long as tex2lyx +% produces a format less than 408 + +\def\mycommand{\textquestiondown} + \begin{document} \tableofcontents @@ -88,7 +96,8 @@ Now the natbib things: \section{Input files\index{Input files}} We can input files too, like this \input{DummyDocument}, or with the include -variant \include{DummyDocument} +variant \include{DummyDocument} % unfortunately, including the doc twice +% generates a multiply defined label If you prefer verbatim input, you can choose between~\verbatiminput{foo} or~\verbatiminput*{foo}. @@ -155,6 +164,16 @@ And what about special characters like hyphe\-nation mark, ellipsis\ldots, and end-of-sentence\@. LyX also supports a menu separator\lyxarrow{}and a spif\textcompwordmark{}fy ligature break. +Test for whitespace handling of commands: The following lines should +result in identical output: + +builtin \textasciicircum{} unicodesymbols \j{} user \mycommand{} xx\par +builtin \textasciicircum {} unicodesymbols \j {} user \mycommand{} xx\par +builtin \textasciicircum % with a comment +{} unicodesymbols \j % and a second one +{} user \mycommand % and another +{} xx + A sub\textsubscript{sc\emph{ript}} and super\textsuperscript{script with $a^2+b^2=c^2$ math}. diff --git a/src/tex2lyx/test/test-structure.tex b/src/tex2lyx/test/test-structure.tex index 9cae8ca327..68589858a5 100644 --- a/src/tex2lyx/test/test-structure.tex +++ b/src/tex2lyx/test/test-structure.tex @@ -1,5 +1,23 @@ +%% LyX trick_preamble_code_into_believing_that_this_was_created_by_lyx created this file. For more info, see http://www.lyx.org/. +%% Do not edit unless you really know what you are doing. \documentclass[legalpaper]{article} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% LyX specific LaTeX commands. + +\newcommand{\noun}[1]{\textsc{#1}} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Textclass specific LaTeX commands. + +\newenvironment{lyxlist}[1] + {\begin{list}{} + {\settowidth{\labelwidth}{#1} + \setlength{\leftmargin}{\labelwidth} + \addtolength{\leftmargin}{\labelsep} + \renewcommand{\makelabel}[1]{##1\hfil}}} + {\end{list}} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% User specified LaTeX commands. + \newenvironment{foo}{==[}{]==} \begin{document} diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp index 60c6135af5..b5e02d5a79 100644 --- a/src/tex2lyx/text.cpp +++ b/src/tex2lyx/text.cpp @@ -371,16 +371,17 @@ void end_inset(ostream & os) } -void skip_braces(Parser & p) +bool skip_braces(Parser & p) { if (p.next_token().cat() != catBegin) - return; + return false; p.get_token(); if (p.next_token().cat() == catEnd) { p.get_token(); - return; + return true; } p.putback(); + return false; } @@ -441,6 +442,33 @@ Layout const * findLayout(TextClass const & textclass, string const & name) void eat_whitespace(Parser &, ostream &, Context &, bool); +/*! + * Skips whitespace and braces. + * This should be called after a command has been parsed that is not put into + * ERT, and where LyX adds "{}" if needed. + */ +void skip_spaces_braces(Parser & p) +{ + /* The following four examples produce the same typeset output and + should be handled by this function: + - abc \j{} xyz + - abc \j {} xyz + - abc \j + {} xyz + - abc \j %comment + {} xyz + */ + // Unfortunately we need to skip comments, too. + // We can't use eat_whitespace since writing them after the {} + // results in different output in some cases. + bool const skipped_spaces = p.skip_spaces(true); + bool const skipped_braces = skip_braces(p); + if (skipped_spaces && !skipped_braces) + // put back the space (it is better handled by check_space) + p.unskip_spaces(true); +} + + void output_command_layout(ostream & os, Parser & p, bool outer, Context & parent_context, Layout const * newlayout) @@ -560,7 +588,8 @@ void parse_arguments(string const & command, ert += '{' + p.verbatim_item() + '}'; break; case optional: - ert += p.getOpt(); + // true because we must not eat whitespace + ert += p.getOpt(true); break; } } @@ -1850,32 +1879,29 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, else if (t.cs() == "makeindex" || t.cs() == "maketitle") { // FIXME: Somehow prevent title layouts if // "maketitle" was not found - p.skip_spaces(); - skip_braces(p); // swallow this + // swallow this + skip_spaces_braces(p); } else if (t.cs() == "tableofcontents") { - p.skip_spaces(); context.check_layout(os); begin_command_inset(os, "toc", "tableofcontents"); end_inset(os); - skip_braces(p); // swallow this + skip_spaces_braces(p); } else if (t.cs() == "listoffigures") { - p.skip_spaces(); context.check_layout(os); begin_inset(os, "FloatList figure\n"); end_inset(os); - skip_braces(p); // swallow this + skip_spaces_braces(p); } else if (t.cs() == "listoftables") { - p.skip_spaces(); context.check_layout(os); begin_inset(os, "FloatList table\n"); end_inset(os); - skip_braces(p); // swallow this + skip_spaces_braces(p); } else if (t.cs() == "listof") { @@ -2153,14 +2179,14 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, context.check_layout(os); begin_command_inset(os, "index_print", "printindex"); end_inset(os); - skip_braces(p); + skip_spaces_braces(p); } else if (t.cs() == "printnomenclature") { context.check_layout(os); begin_command_inset(os, "nomencl_print", "printnomenclature"); end_inset(os); - skip_braces(p); + skip_spaces_braces(p); } else if (t.cs() == "url") { @@ -2312,37 +2338,37 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, || t.cs() == "LaTeX") { context.check_layout(os); os << t.cs(); - skip_braces(p); // eat {} + skip_spaces_braces(p); } else if (t.cs() == "LaTeXe") { context.check_layout(os); os << "LaTeX2e"; - skip_braces(p); // eat {} + skip_spaces_braces(p); } else if (t.cs() == "ldots") { context.check_layout(os); - skip_braces(p); os << "\\SpecialChar \\ldots{}\n"; + skip_spaces_braces(p); } else if (t.cs() == "lyxarrow") { context.check_layout(os); os << "\\SpecialChar \\menuseparator\n"; - skip_braces(p); + skip_spaces_braces(p); } else if (t.cs() == "textcompwordmark") { context.check_layout(os); os << "\\SpecialChar \\textcompwordmark{}\n"; - skip_braces(p); + skip_spaces_braces(p); } else if (LYX_FORMAT >= 307 && t.cs() == "slash") { context.check_layout(os); os << "\\SpecialChar \\slash{}\n"; - skip_braces(p); + skip_spaces_braces(p); } else if (LYX_FORMAT >= 307 && t.cs() == "nobreakdash") { @@ -2370,19 +2396,19 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, else if (t.cs() == "textasciitilde") { context.check_layout(os); os << '~'; - skip_braces(p); + skip_spaces_braces(p); } else if (t.cs() == "textasciicircum") { context.check_layout(os); os << '^'; - skip_braces(p); + skip_spaces_braces(p); } else if (t.cs() == "textbackslash") { context.check_layout(os); os << "\n\\backslash\n"; - skip_braces(p); + skip_spaces_braces(p); } else if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#" @@ -2461,7 +2487,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, else if (t.cs() == "newline") { context.check_layout(os); os << "\n\\" << t.cs() << "\n"; - skip_braces(p); // eat {} + skip_spaces_braces(p); } else if (t.cs() == "input" || t.cs() == "include" @@ -2577,7 +2603,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, begin_inset(os, "VSpace "); os << t.cs(); end_inset(os); - skip_braces(p); + skip_spaces_braces(p); } else if (is_known(t.cs(), known_spaces)) { @@ -2604,7 +2630,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, t.cs() == "cleardoublepage") { context.check_layout(os); os << "\n\\" << t.cs() << "\n"; - skip_braces(p); // eat {} + skip_spaces_braces(p); } else if (t.cs() == "newcommand" || @@ -2726,8 +2752,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, << "+" << to_utf8(rem) << endl; context.check_layout(os); os << to_utf8(s); - p.skip_spaces(); - skip_braces(p); // eat {} + skip_spaces_braces(p); } //cerr << "#: " << t << " mode: " << mode << endl; // heuristic: read up to next non-nested space -- 2.39.2