From cb73d69ae46128b33fd6c5394eaafdebec967a86 Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Fri, 18 Jun 2004 06:47:19 +0000 Subject: [PATCH] improve tex2lyx paragraph and comment handling git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@8815 a592a061-630c-0410-9148-cb99ea01b6c8 --- src/insets/ChangeLog | 4 ++ src/insets/insetgraphics.C | 2 +- src/insets/insettabular.C | 2 +- src/tex2lyx/ChangeLog | 9 ++++ src/tex2lyx/preamble.C | 2 +- src/tex2lyx/tex2lyx.h | 1 + src/tex2lyx/texparser.C | 53 +++++++++++++-------- src/tex2lyx/texparser.h | 22 +++++---- src/tex2lyx/text.C | 96 +++++++++++++++++++++++++++----------- 9 files changed, 130 insertions(+), 61 deletions(-) diff --git a/src/insets/ChangeLog b/src/insets/ChangeLog index 93eb3d08b1..46845da6b3 100644 --- a/src/insets/ChangeLog +++ b/src/insets/ChangeLog @@ -1,3 +1,7 @@ +2004-06-18 Georg Baum + + * insetgraphics.C, insettabular.C: s/wether/whether/g + 2004-06-10 Georg Baum * insetgraphics.C (stripExtension): new diff --git a/src/insets/insetgraphics.C b/src/insets/insetgraphics.C index 043583f31a..fe73b7ff06 100644 --- a/src/insets/insetgraphics.C +++ b/src/insets/insetgraphics.C @@ -477,7 +477,7 @@ string const InsetGraphics::prepareFile(Buffer const & buf, if (zipped) { if (params().noUnzip) { - // We don't know wether latex can actually handle + // We don't know whether latex can actually handle // this file, but we can't check, because that would // mean to unzip the file and thereby making the // noUnzip parameter meaningless. diff --git a/src/insets/insettabular.C b/src/insets/insettabular.C index edbe8a6277..91be3b9d37 100644 --- a/src/insets/insettabular.C +++ b/src/insets/insettabular.C @@ -1324,7 +1324,7 @@ void InsetTabular::tabularFeatures(LCursor & cur, #if 0 // just multicol for one Single Cell if (!hasSelection()) { - // check wether we are completly in a multicol + // check whether we are completly in a multicol if (tabular.isMultiColumn(actcell)) tabular.unsetMultiColumn(actcell); else diff --git a/src/tex2lyx/ChangeLog b/src/tex2lyx/ChangeLog index c91081b401..72dcf2c355 100644 --- a/src/tex2lyx/ChangeLog +++ b/src/tex2lyx/ChangeLog @@ -1,3 +1,12 @@ +2004-06-18 Georg Baum + + * preamble.C, text.C: s/wether/whether/g + * text.C (eat_whitespace): new method + * texparser.C (getArg): use always curr_token().asInput() + * texparser.[Ch] (isParagraph): new method + * texparser.C (skip_spaces): handle "\n +\n" correctly + * texparser.[Ch] (asMode): remove, since it is unused + 2004-05-27 Lars Gullik Bjonnes * Makefile.am (BUILT_SOURCES): move lengthcommon.C from here... diff --git a/src/tex2lyx/preamble.C b/src/tex2lyx/preamble.C index 7d61b0aeea..874eb37859 100644 --- a/src/tex2lyx/preamble.C +++ b/src/tex2lyx/preamble.C @@ -184,7 +184,7 @@ LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & force special_columns['D'] = 3; bool is_full_document = false; - // determine wether this is a full document or a fragment for inclusion + // determine whether this is a full document or a fragment for inclusion while (p.good()) { Token const & t = p.get_token(); diff --git a/src/tex2lyx/tex2lyx.h b/src/tex2lyx/tex2lyx.h index 97b6f66dd7..66a3c16f6b 100644 --- a/src/tex2lyx/tex2lyx.h +++ b/src/tex2lyx/tex2lyx.h @@ -34,6 +34,7 @@ void parse_text(Parser & p, std::ostream & os, unsigned flags, bool outer, //std::string parse_text(Parser & p, unsigned flags, const bool outer, // Context & context); +/// parses a subdocument, usually useful in insets (whence the name) void parse_text_in_inset(Parser & p, std::ostream & os, unsigned flags, bool outer, Context & context); diff --git a/src/tex2lyx/texparser.C b/src/tex2lyx/texparser.C index 46bb0d957e..ea69d460d4 100644 --- a/src/tex2lyx/texparser.C +++ b/src/tex2lyx/texparser.C @@ -61,16 +61,6 @@ void catInit() // catcodes // -mode_type asMode(mode_type oldmode, string const & str) -{ - if (str == "mathmode") - return MATH_MODE; - if (str == "textmode" || str == "forcetext") - return TEXT_MODE; - return oldmode; -} - - CatCode catcode(unsigned char c) { return theCatcode[c]; @@ -177,20 +167,45 @@ Token const & Parser::get_token() } +bool Parser::isParagraph() const +{ + // A new paragraph in TeX ist started + // - either by a newline, following any amount of whitespace + // characters (including zero), and another newline + // - or the token \par + if (curr_token().cat() == catNewline && + (curr_token().cs().size() > 1 || + (next_token().cat() == catSpace && + pos_ < tokens_.size() - 1 && + tokens_[pos_ + 1].cat() == catNewline))) + return true; + if (curr_token().cat() == catEscape && curr_token().cs() == "par") + return true; + return false; +} + + void Parser::skip_spaces(bool skip_comments) { // We just silently return if we have no more tokens. // skip_spaces() should be callable at any time, // the caller must check p::good() anyway. while (good()) { - if ( next_token().cat() == catSpace || - (next_token().cat() == catNewline && next_token().cs().size() == 1) || - next_token().cat() == catComment && next_token().cs().empty()) - get_token(); - else if (skip_comments && next_token().cat() == catComment) - cerr << " Ignoring comment: " << get_token().asInput(); - else + get_token(); + if (isParagraph()) { + putback(); break; + } + if ( curr_token().cat() == catSpace || + curr_token().cat() == catNewline || + (curr_token().cat() == catComment && curr_token().cs().empty())) + continue; + if (skip_comments && curr_token().cat() == catComment) + cerr << " Ignoring comment: " << curr_token().asInput(); + else { + putback(); + break; + } } } @@ -253,10 +268,8 @@ string Parser::getArg(char left, char right) if (!curr_token().cs().empty()) cerr << "Ignoring comment: " << curr_token().asInput(); } - else if (curr_token().cat() == catSpace || curr_token().cat() == catNewline) - result += curr_token().cs(); else - result += c; + result += curr_token().asInput(); } return result; diff --git a/src/tex2lyx/texparser.h b/src/tex2lyx/texparser.h index 9de3600bba..6ab9946bbb 100644 --- a/src/tex2lyx/texparser.h +++ b/src/tex2lyx/texparser.h @@ -79,13 +79,13 @@ public: /// std::string const & cs() const { return cs_; } - /// + /// Returns the catcode of the token CatCode cat() const { return cat_; } /// char character() const { return char_; } - /// + /// Returns the token as string std::string asString() const; - /// + /// Returns the token verbatim std::string asInput() const; private: @@ -130,27 +130,29 @@ public: std::string getArg(char left, char right); /// getArg('[', ']') including the brackets std::string getOpt(); - /// + /// Returns the character of the current token and increments the token position. char getChar(); /// void error(std::string const & msg); - /// + /// Parses \p is into tokens void tokenize(std::istream & is); /// void push_back(Token const & t); /// void pop_back(); - /// + /// The previous token. Token const & prev_token() const; - /// + /// The current token. Token const & curr_token() const; - /// + /// The next token. Token const & next_token() const; /// Make the next token current and return that. Token const & get_token(); - /// skips spaces (and comments if \param skip_comments is true) + /// \return whether the current token starts a new paragraph + bool isParagraph() const; + /// skips spaces (and comments if \p skip_comments is true) void skip_spaces(bool skip_comments = false); - /// puts back spaces (and comments if \param skip_comments is true) + /// puts back spaces (and comments if \p skip_comments is true) void unskip_spaces(bool skip_comments = false); /// void lex(std::string const & s); diff --git a/src/tex2lyx/text.C b/src/tex2lyx/text.C index e6f4227e80..93425ec676 100644 --- a/src/tex2lyx/text.C +++ b/src/tex2lyx/text.C @@ -144,8 +144,8 @@ bool splitLatexLength(string const & len, string & value, string & unit) } -// A simple function to translate a latex length to something lyx can -// understand. Not perfect, but rather best-effort. +/// A simple function to translate a latex length to something lyx can +/// understand. Not perfect, but rather best-effort. bool translate_len(string const & length, string & valstring, string & unit) { if (!splitLatexLength(length, valstring, unit)) @@ -313,6 +313,9 @@ LyXLayout_ptr findLayout(LyXTextClass const & textclass, } +void eat_whitespace(Parser &, ostream &, Context &, bool); + + void output_command_layout(ostream & os, Parser & p, bool outer, Context & parent_context, LyXLayout_ptr newlayout) @@ -323,13 +326,14 @@ void output_command_layout(ostream & os, Parser & p, bool outer, context.check_deeper(os); context.check_layout(os); if (context.layout->optionalargs > 0) { - p.skip_spaces(); + eat_whitespace(p, os, context, false); if (p.next_token().character() == '[') { p.get_token(); // eat '[' begin_inset(os, "OptArg\n"); os << "status collapsed\n\n"; parse_text_in_inset(p, os, FLAG_BRACK_LAST, outer, context); end_inset(os); + eat_whitespace(p, os, context, false); } } parse_text_snippet(p, os, FLAG_ITEM, outer, context); @@ -378,7 +382,7 @@ void check_space(Parser const & p, ostream & os, Context & context) /*! - * Check wether \param command is a known command. If yes, + * Check whether \p command is a known command. If yes, * handle the command with all arguments. * \return true if the command was parsed, false otherwise. */ @@ -522,8 +526,8 @@ void parse_environment(Parser & p, ostream & os, bool outer, string const name = p.getArg('{', '}'); const bool is_starred = suffixIs(name, '*'); string const unstarred_name = rtrim(name, "*"); + eat_whitespace(p, os, parent_context, false); active_environments.push_back(name); - p.skip_spaces(); if (is_math_env(name)) { parent_context.check_layout(os); @@ -651,9 +655,52 @@ void parse_environment(Parser & p, ostream & os, bool outer, p.skip_spaces(); } -} // anonymous namespace +/// parses a comment and outputs it to \p os. +void parse_comment(Parser & p, ostream & os, Token const & t, Context & context) +{ + BOOST_ASSERT(t.cat() == catComment); + context.check_layout(os); + if (!t.cs().empty()) { + handle_comment(os, '%' + t.cs(), context); + if (p.next_token().cat() == catNewline) { + // A newline after a comment line starts a new + // paragraph + if(!context.atParagraphStart()) { + // Only start a new paragraph if not already + // done (we might get called recursively) + context.new_paragraph(os); + } + eat_whitespace(p, os, context, true); + } + } else { + // "%\n" combination + p.skip_spaces(); + } +} +/*! + * Reads spaces and comments until the first non-space, non-comment token. + * New paragraphs (double newlines or \\par) are handled like simple spaces + * if \p eatParagraph is true. + * Spaces are skipped, but comments are written to \p os. + */ +void eat_whitespace(Parser & p, ostream & os, Context & context, + bool eatParagraph) +{ + while (p.good()) { + Token const & t = p.get_token(); + if (t.cat() == catComment) + parse_comment(p, os, t, context); + else if ((! eatParagraph && p.isParagraph()) || + (t.cat() != catSpace && t.cat() != catNewline)) { + p.putback(); + return; + } + } +} + +} // anonymous namespace void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, @@ -738,7 +785,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, skip_braces(p); } - else if (t.cat() == catSpace || (t.cat() == catNewline && t.cs().size() == 1)) + else if (t.cat() == catSpace || (t.cat() == catNewline && ! p.isParagraph())) check_space(p, os, context); else if (t.cat() == catLetter || @@ -749,9 +796,9 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, os << t.character(); } - else if (t.cat() == catNewline || (t.cat() == catEscape && t.cs() == "par")) { - p.skip_spaces(); + else if (p.isParagraph()) { context.new_paragraph(os); + eat_whitespace(p, os, context, true); } else if (t.cat() == catActive) { @@ -792,20 +839,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, handle_ert(os, "}", context); } - else if (t.cat() == catComment) { - context.check_layout(os); - if (!t.cs().empty()) { - handle_comment(os, '%' + t.cs(), context); - if (p.next_token().cat() == catNewline) { - // A newline after a comment line starts a new paragraph - context.new_paragraph(os); - p.skip_spaces(); - } - } else { - // "%\n" combination - p.skip_spaces(); - } - } + else if (t.cat() == catComment) + parse_comment(p, os, t, context); // // control sequences @@ -865,7 +900,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, } else if (!s.empty()) { // The space is needed to separate the item from the rest of the sentence. os << s << ' '; - p.skip_spaces(); + eat_whitespace(p, os, context, false); } } } @@ -879,8 +914,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, } else if (t.cs() == "def") { - p.skip_spaces(); context.check_layout(os); + eat_whitespace(p, os, context, false); string name = p.get_token().cs(); while (p.next_token().cat() != catBegin) name += p.get_token().asString(); @@ -1010,7 +1045,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, } // TODO: Handle the unknown settings better. // Warn about invalid options. - // Check wether some option was given twice. + // Check whether some option was given twice. end_inset(os); } @@ -1046,7 +1081,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, else if (t.cs() == "hfill") { context.check_layout(os); - os << "\n\\hfill\n"; + os << "\n\\hfill \n"; skip_braces(p); p.skip_spaces(); } @@ -1172,6 +1207,10 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, begin_inset(os, "Quotes "); os << known_coded_quotes[where - known_quotes]; end_inset(os); + // LyX adds {} after the quote, so we have to eat + // spaces here if there are any before a possible + // {} pair. + eat_whitespace(p, os, context, false); skip_braces(p); } @@ -1179,7 +1218,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, char const ** where = is_known(t.cs(), known_sizes); context.check_layout(os); os << "\n\\size " << known_coded_sizes[where - known_sizes] << "\n"; - p.skip_spaces(); + eat_whitespace(p, os, context, false); } else if (t.cs() == "LyX" || t.cs() == "TeX" @@ -1374,6 +1413,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, begin_inset(os, "VSpace "); os << t.cs(); end_inset(os); + skip_braces(p); } else if (t.cs() == "vspace") { -- 2.39.2