From: Bo Peng Date: Thu, 26 Apr 2007 04:53:30 +0000 (+0000) Subject: Rename .C ==> .cpp for files in src/tex2lyx, part two X-Git-Tag: 1.6.10~10055 X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=04591a6e34a0ac52632f3a309df9ff375a91b713;p=features.git Rename .C ==> .cpp for files in src/tex2lyx, part two git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@18023 a592a061-630c-0410-9148-cb99ea01b6c8 --- diff --git a/src/tex2lyx/Context.cpp b/src/tex2lyx/Context.cpp new file mode 100644 index 0000000000..36d1ec1782 --- /dev/null +++ b/src/tex2lyx/Context.cpp @@ -0,0 +1,241 @@ +/** + * \file Context.cpp + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author Jean-Marc Lasgouttes + * + * Full author contact details are available in file CREDITS. + */ + +#include + +#include + +#include "support/lstrings.h" +#include "Context.h" + + +namespace lyx { + +using std::ostream; +using std::endl; +using std::string; + + +namespace { + +void begin_layout(ostream & os, LyXLayout_ptr layout, Font const & font, + Font const & normalfont) +{ + os << "\n\\begin_layout " << layout->name() << "\n"; + // FIXME: This is not enough for things like + // \\Huge par1 \\par par2 + output_font_change(os, normalfont, font); +} + + +void end_layout(ostream & os) +{ + os << "\n\\end_layout\n"; +} + + +void begin_deeper(ostream & os) +{ + os << "\n\\begin_deeper"; +} + + +void end_deeper(ostream & os) +{ + os << "\n\\end_deeper"; +} + +} + + +bool operator==(Font const & f1, Font const & f2) +{ + return + f1.size == f2.size && + f1.family == f2.family && + f1.series == f2.series && + f1.shape == f2.shape; +} + + +void output_font_change(ostream & os, Font const & oldfont, + Font const & newfont) +{ + if (oldfont.family != newfont.family) + os << "\n\\family " << newfont.family << '\n'; + if (oldfont.series != newfont.series) + os << "\n\\series " << newfont.series << '\n'; + if (oldfont.shape != newfont.shape) + os << "\n\\shape " << newfont.shape << '\n'; + if (oldfont.size != newfont.size) + os << "\n\\size " << newfont.size << '\n'; +} + + +Font Context::normalfont; +bool Context::empty = true; + + +Context::Context(bool need_layout_, + LyXTextClass const & textclass_, + LyXLayout_ptr layout_, LyXLayout_ptr parent_layout_, + Font font_) + : need_layout(need_layout_), + need_end_layout(false), need_end_deeper(false), + has_item(false), deeper_paragraph(false), + new_layout_allowed(true), textclass(textclass_), + layout(layout_), parent_layout(parent_layout_), + font(font_) +{ + if (!layout.get()) + layout = textclass.defaultLayout(); + if (!parent_layout.get()) + parent_layout = textclass.defaultLayout(); +} + + +Context::~Context() +{ + if (!extra_stuff.empty()) + std::cerr << "Bug: Ignoring extra stuff '" << extra_stuff + << '\'' << std::endl; +} + + +void Context::check_layout(ostream & os) +{ + if (need_layout) { + check_end_layout(os); + + // are we in a list-like environment? + if (layout->isEnvironment() + && layout->latextype != LATEX_ENVIRONMENT) { + // A list-like environment + if (has_item) { + // a new item. If we had a standard + // paragraph before, we have to end it. + if (deeper_paragraph) { + end_deeper(os); + deeper_paragraph = false; + } + begin_layout(os, layout, font, normalfont); + has_item = false; + } else { + // a standard paragraph in an + // enumeration. We have to recognize + // that this may require a begin_deeper. + if (!deeper_paragraph) + begin_deeper(os); + begin_layout(os, textclass.defaultLayout(), + font, normalfont); + deeper_paragraph = true; + } + } else { + // No list-like environment + begin_layout(os, layout, font, normalfont); + } + need_layout = false; + need_end_layout = true; + if (!extra_stuff.empty()) { + os << extra_stuff; + extra_stuff.erase(); + } + os << "\n"; + empty = false; + } +} + + +void Context::check_end_layout(ostream & os) +{ + if (need_end_layout) { + end_layout(os); + need_end_layout = false; + } +} + + +void Context::check_deeper(ostream & os) +{ + if (parent_layout->isEnvironment()) { + // We start a nested environment. + // We need to increase the depth. + if (need_end_deeper) { + // no need to have \end_deeper \begin_deeper + need_end_deeper = false; + } else { + begin_deeper(os); + need_end_deeper = true; + } + } else + check_end_deeper(os); +} + + +void Context::check_end_deeper(ostream & os) +{ + if (need_end_deeper) { + end_deeper(os); + need_end_deeper = false; + } + if (deeper_paragraph) { + end_deeper(os); + deeper_paragraph = false; + } +} + + +void Context::set_item() +{ + need_layout = true; + has_item = true; +} + + +void Context::new_paragraph(ostream & os) +{ + check_end_layout(os); + need_layout = true; +} + + +void Context::add_extra_stuff(std::string const & stuff) +{ + if (!lyx::support::contains(extra_stuff, stuff)) + extra_stuff += stuff; +} + + +void Context::dump(ostream & os, string const & desc) const +{ + os << "\n" << desc <<" ["; + if (need_layout) + os << "need_layout "; + if (need_end_layout) + os << "need_end_layout "; + if (need_end_deeper) + os << "need_end_deeper "; + if (has_item) + os << "has_item "; + if (deeper_paragraph) + os << "deeper_paragraph "; + if (new_layout_allowed) + os << "new_layout_allowed "; + if (!extra_stuff.empty()) + os << "extrastuff=[" << extra_stuff << "] "; + os << "textclass=" << textclass.name() + << " layout=" << layout->name() + << " parent_layout=" << parent_layout->name() << "] font=[" + << font.size << ' ' << font.family << ' ' << font.series << ' ' + << font.shape << ']' << endl; +} + + +} // namespace lyx diff --git a/src/tex2lyx/Context.h b/src/tex2lyx/Context.h new file mode 100644 index 0000000000..307e0d65e6 --- /dev/null +++ b/src/tex2lyx/Context.h @@ -0,0 +1,157 @@ +// -*- C++ -*- +/** + * \file Context.h + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author Jean-Marc Lasgouttes + * + * Full author contact details are available in file CREDITS. + */ + +#ifndef CONTEXT_H +#define CONTEXT_H + +#include "LyXTextClass.h" + +#include + + +namespace lyx { + + +/*! + * Small helper struct that holds font properties. + * The names are in LyX language, not LaTeX. + * We don't use LyXFont, because it pulls in a lot of dependencies and has + * more strings than needed (e.g. font family error1 etc.). + * If more font related stuff is needed, it might be good to change to + * LyXFont. + */ +class Font { +public: + Font() + { + init(); + } + void init() + { + size = "normal"; + family = "default"; + series = "default"; + shape = "default"; + } + std::string size; + std::string family; + std::string series; + std::string shape; +}; + + +bool operator==(Font const &, Font const &); + + +inline bool operator!=(Font const & f1, Font const & f2) +{ + return !operator==(f1, f2); +} + + +/// Output changed font parameters if \p oldfont and \p newfont differ +void output_font_change(std::ostream & os, Font const & oldfont, + Font const & newfont); + + +/*! + * A helper struct. + * + * Every bit of text has a corresponding context. + * Usage: Parsing begins with a global context. A new context is opened for + * every new LaTeX group, e.g. at the beginning of a new environment. + * The old context is used again after the group is closed. + * + * Since not all paragraph parameters in LyX have the same scoping as their + * LaTeX counterpart we may have to transfer context properties (e. g. the + * font) from and to the parent context. + */ +class Context { +public: + Context(bool need_layout_, + LyXTextClass const & textclass_, + LyXLayout_ptr layout_ = LyXLayout_ptr(), + LyXLayout_ptr parent_layout_= LyXLayout_ptr(), + Font font_ = Font()); + ~Context(); + + /// Output a \\begin_layout if requested + void check_layout(std::ostream & os); + + /// Output a \\end_layout if needed + void check_end_layout(std::ostream & os); + + /// Output a \\begin_deeper if needed + void check_deeper(std::ostream & os); + + /// Output a \\end_deeper if needed + void check_end_deeper(std::ostream & os); + + /// dump content on stream (for debugging purpose), with + /// description \c desc. + void dump(std::ostream &, std::string const & desc = "context") const; + + /// Are we just beginning a new paragraph? + bool atParagraphStart() const { return need_layout; } + + /// Begin an item in a list environment + void set_item(); + + /// Start a new paragraph + void new_paragraph(std::ostream & os); + + /// Add extra stuff if not already there + void add_extra_stuff(std::string const &); + + /// Do we need to output some \\begin_layout command before the + /// next characters? + bool need_layout; + /// Do we need to output some \\end_layout command + bool need_end_layout; + /// We may need to add something after this \\begin_layout command + std::string extra_stuff; + /// If there has been an \\begin_deeper, we'll need a matching + /// \\end_deeper + bool need_end_deeper; + /// If we are in an itemize-like environment, we need an \item + /// for each paragraph, otherwise this has to be a deeper + /// paragraph. + bool has_item; + /// we are handling a standard paragraph in an itemize-like + /// environment + bool deeper_paragraph; + /*! + * Inside of unknown environments we may not allow font and layout + * changes. + * Otherwise things like + * \\large\\begin{foo}\\huge bar\\end{foo} + * would not work. + */ + bool new_layout_allowed; + /// Did we output anything yet in any context? + static bool empty; + + /// The textclass of the document. Could actually be a global variable + LyXTextClass const & textclass; + /// The layout of the current paragraph + LyXLayout_ptr layout; + /// The layout of the outer paragraph (for environment layouts) + LyXLayout_ptr parent_layout; + /// font attributes of this context + Font font; + /// font attributes of normal text + static Font normalfont; +}; + + +} // namespace lyx + +#endif diff --git a/src/tex2lyx/LyXFont.cpp b/src/tex2lyx/LyXFont.cpp new file mode 100644 index 0000000000..34bb319a98 --- /dev/null +++ b/src/tex2lyx/LyXFont.cpp @@ -0,0 +1,58 @@ +/** + * \file tex2lyx/LyXFont.cpp + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author Angus Leeming + * + * Full author contact details are available in file CREDITS. + */ + +#include + +#include "LyXFont.h" +#include "LyXLex.h" +#include "support/lstrings.h" + + +namespace lyx { + +using lyx::support::ascii_lowercase; + +using std::string; + + +LyXFont & LyXFont::lyxRead(LyXLex & lex) +{ + bool error = false; + bool finished = false; + while (!finished && lex.isOK() && !error) { + lex.next(); + string const tok = ascii_lowercase(lex.getString()); + + if (tok.empty()) { + continue; + } else if (tok == "endfont") { + finished = true; + } else if (tok == "family") { + lex.next(); + } else if (tok == "series") { + lex.next(); + } else if (tok == "shape") { + lex.next(); + } else if (tok == "size") { + lex.next(); + } else if (tok == "misc") { + lex.next(); + } else if (tok == "color") { + lex.next(); + } else { + lex.printError("Unknown tag `$$Token'"); + error = true; + } + } + return *this; +} + + +} // namespace lyx diff --git a/src/tex2lyx/LyXFont.h b/src/tex2lyx/LyXFont.h new file mode 100644 index 0000000000..d9127e43c2 --- /dev/null +++ b/src/tex2lyx/LyXFont.h @@ -0,0 +1,43 @@ +// -*- C++ -*- +/** + * \file tex2lyx/LyXFont.h + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author Angus Leeming + * + * Full author contact details are available in file CREDITS. + * + * This class is just a dummy version of that in the main LyX source tree + * to enable tex2lyx to use LyX's textclass classes and not have to + * re-invent the wheel. + * + */ + +#ifndef LYXFONT_H +#define LYXFONT_H + +namespace lyx { + +class LyXLex; + +class LyXFont { +public: + /// Trick to overload constructor and make it megafast + enum FONT_INIT1 { ALL_INHERIT }; + enum FONT_INIT3 { ALL_SANE }; + + LyXFont() {} + explicit LyXFont(LyXFont::FONT_INIT1) {} + explicit LyXFont(LyXFont::FONT_INIT3) {} + + LyXFont & lyxRead(LyXLex &); + + LyXFont & realize(LyXFont const &) { return *this; } + bool resolved() const { return true; } +}; + + +} // namespace lyx + +#endif // NOT LYXFONT_H diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp new file mode 100644 index 0000000000..cdd5c39921 --- /dev/null +++ b/src/tex2lyx/Parser.cpp @@ -0,0 +1,520 @@ +/** + * \file Parser.cpp + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author André Pönitz + * + * Full author contact details are available in file CREDITS. + */ + +#include + +#include "Parser.h" + +#include +#include + + +namespace lyx { + +using std::cerr; +using std::endl; +using std::fill; +using std::istream; +using std::istringstream; +using std::ostringstream; +using std::ostream; +using std::string; + + +namespace { + +CatCode theCatcode[256]; + +void catInit() +{ + fill(theCatcode, theCatcode + 256, catOther); + fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter); + fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter); + + theCatcode[int('\\')] = catEscape; + theCatcode[int('{')] = catBegin; + theCatcode[int('}')] = catEnd; + theCatcode[int('$')] = catMath; + theCatcode[int('&')] = catAlign; + theCatcode[int('\n')] = catNewline; + theCatcode[int('#')] = catParameter; + theCatcode[int('^')] = catSuper; + theCatcode[int('_')] = catSub; + theCatcode[0x7f] = catIgnore; + theCatcode[int(' ')] = catSpace; + theCatcode[int('\t')] = catSpace; + theCatcode[int('\r')] = catNewline; + theCatcode[int('~')] = catActive; + theCatcode[int('%')] = catComment; + + // This is wrong! + theCatcode[int('@')] = catLetter; +} + + +/*! + * Translate a line ending to '\n'. + * \p c must have catcode catNewline, and it must be the last character read + * from \p is. + */ +char getNewline(istream & is, char c) +{ + // we have to handle 3 different line endings: + // - UNIX (\n) + // - MAC (\r) + // - DOS (\r\n) + if (c == '\r') { + // MAC or DOS + if (is.get(c) && c != '\n') { + // MAC + is.putback(c); + } + return '\n'; + } + // UNIX + return c; +} + +} + + +// +// catcodes +// + +CatCode catcode(unsigned char c) +{ + return theCatcode[c]; +} + + + +// +// Token +// + +ostream & operator<<(ostream & os, Token const & t) +{ + if (t.cat() == catComment) + os << '%' << t.cs() << '\n'; + else if (t.cat() == catSpace) + os << t.cs(); + else if (t.cat() == catEscape) + os << '\\' << t.cs() << ' '; + else if (t.cat() == catLetter) + os << t.character(); + else if (t.cat() == catNewline) + os << "[" << t.cs().size() << "\\n," << t.cat() << "]\n"; + else + os << '[' << t.character() << ',' << t.cat() << ']'; + return os; +} + + +string Token::asString() const +{ + return cs_.size() ? cs_ : string(1, char_); +} + + +string Token::asInput() const +{ + if (cat_ == catComment) + return '%' + cs_ + '\n'; + if (cat_ == catSpace || cat_ == catNewline) + return cs_; + return char_ ? string(1, char_) : '\\' + cs_; +} + + +// +// Parser +// + + +Parser::Parser(istream & is) + : lineno_(0), pos_(0) +{ + tokenize(is); +} + + +Parser::Parser(string const & s) + : lineno_(0), pos_(0) +{ + istringstream is(s); + tokenize(is); +} + + +void Parser::push_back(Token const & t) +{ + tokens_.push_back(t); +} + + +void Parser::pop_back() +{ + tokens_.pop_back(); +} + + +Token const & Parser::prev_token() const +{ + static const Token dummy; + return pos_ > 1 ? tokens_[pos_ - 2] : dummy; +} + + +Token const & Parser::curr_token() const +{ + static const Token dummy; + return pos_ > 0 ? tokens_[pos_ - 1] : dummy; +} + + +Token const & Parser::next_token() const +{ + static const Token dummy; + return good() ? tokens_[pos_] : dummy; +} + + +Token const & Parser::get_token() +{ + static const Token dummy; + //cerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n'; + return good() ? tokens_[pos_++] : dummy; +} + + +bool Parser::isParagraph() const +{ + // A new paragraph in TeX ist started + // - either by a newline, following any amount of whitespace + // characters (including zero), and another newline + // - or the token \par + if (curr_token().cat() == catNewline && + (curr_token().cs().size() > 1 || + (next_token().cat() == catSpace && + pos_ < tokens_.size() - 1 && + tokens_[pos_ + 1].cat() == catNewline))) + return true; + if (curr_token().cat() == catEscape && curr_token().cs() == "par") + return true; + return false; +} + + +void Parser::skip_spaces(bool skip_comments) +{ + // We just silently return if we have no more tokens. + // skip_spaces() should be callable at any time, + // the caller must check p::good() anyway. + while (good()) { + get_token(); + if (isParagraph()) { + putback(); + break; + } + if ( curr_token().cat() == catSpace || + curr_token().cat() == catNewline || + (curr_token().cat() == catComment && curr_token().cs().empty())) + continue; + if (skip_comments && curr_token().cat() == catComment) + cerr << " Ignoring comment: " << curr_token().asInput(); + else { + putback(); + break; + } + } +} + + +void Parser::unskip_spaces(bool skip_comments) +{ + while (pos_ > 0) { + if ( curr_token().cat() == catSpace || + (curr_token().cat() == catNewline && curr_token().cs().size() == 1)) + putback(); + else if (skip_comments && curr_token().cat() == catComment) { + // TODO: Get rid of this + cerr << "Unignoring comment: " << curr_token().asInput(); + putback(); + } + else + break; + } +} + + +void Parser::putback() +{ + --pos_; +} + + +bool Parser::good() const +{ + return pos_ < tokens_.size(); +} + + +char Parser::getChar() +{ + if (!good()) + error("The input stream is not well..."); + return tokens_[pos_++].character(); +} + + +Parser::Arg Parser::getFullArg(char left, char right) +{ + skip_spaces(true); + + // This is needed if a partial file ends with a command without arguments, + // e. g. \medskip + if (! good()) + return std::make_pair(false, string()); + + string result; + char c = getChar(); + + if (c != left) { + putback(); + return std::make_pair(false, string()); + } else + while ((c = getChar()) != right && good()) { + // Ignore comments + if (curr_token().cat() == catComment) { + if (!curr_token().cs().empty()) + cerr << "Ignoring comment: " << curr_token().asInput(); + } + else + result += curr_token().asInput(); + } + + return std::make_pair(true, result); +} + + +string Parser::getArg(char left, char right) +{ + return getFullArg(left, right).second; +} + + +string Parser::getFullOpt() +{ + Arg arg = getFullArg('[', ']'); + if (arg.first) + return '[' + arg.second + ']'; + return arg.second; +} + + +string Parser::getOpt() +{ + string const res = getArg('[', ']'); + return res.empty() ? string() : '[' + res + ']'; +} + + +string const Parser::verbatimEnvironment(string const & name) +{ + if (!good()) + return string(); + + ostringstream os; + for (Token t = get_token(); good(); t = get_token()) { + if (t.cat() == catBegin) { + putback(); + os << '{' << verbatim_item() << '}'; + } else if (t.asInput() == "\\begin") { + string const env = getArg('{', '}'); + os << "\\begin{" << env << '}' + << verbatimEnvironment(env) + << "\\end{" << env << '}'; + } else if (t.asInput() == "\\end") { + string const end = getArg('{', '}'); + if (end != name) + cerr << "\\end{" << end + << "} does not match \\begin{" << name + << "}." << endl; + return os.str(); + } else + os << t.asInput(); + } + cerr << "unexpected end of input" << endl; + return os.str(); +} + + +void Parser::tokenize(istream & is) +{ + static bool init_done = false; + + if (!init_done) { + catInit(); + init_done = true; + } + + char c; + while (is.get(c)) { + //cerr << "reading c: " << c << "\n"; + + switch (catcode(c)) { + case catSpace: { + string s(1, c); + while (is.get(c) && catcode(c) == catSpace) + s += c; + if (catcode(c) != catSpace) + is.putback(c); + push_back(Token(s, catSpace)); + break; + } + + case catNewline: { + ++lineno_; + string s(1, getNewline(is, c)); + while (is.get(c) && catcode(c) == catNewline) { + ++lineno_; + s += getNewline(is, c); + } + if (catcode(c) != catNewline) + is.putback(c); + push_back(Token(s, catNewline)); + break; + } + + case catComment: { + // We don't treat "%\n" combinations here specially because + // we want to preserve them in the preamble + string s; + while (is.get(c) && catcode(c) != catNewline) + s += c; + // handle possible DOS line ending + if (catcode(c) == catNewline) + c = getNewline(is, c); + // Note: The '%' at the beginning and the '\n' at the end + // of the comment are not stored. + ++lineno_; + push_back(Token(s, catComment)); + break; + } + + case catEscape: { + is.get(c); + if (!is) { + error("unexpected end of input"); + } else { + string s(1, c); + if (catcode(c) == catLetter) { + // collect letters + while (is.get(c) && catcode(c) == catLetter) + s += c; + if (catcode(c) != catLetter) + is.putback(c); + } + push_back(Token(s, catEscape)); + } + break; + } + + case catIgnore: { + cerr << "ignoring a char: " << int(c) << "\n"; + break; + } + + default: + push_back(Token(c, catcode(c))); + } + } +} + + +void Parser::dump() const +{ + cerr << "\nTokens: "; + for (unsigned i = 0; i < tokens_.size(); ++i) { + if (i == pos_) + cerr << " <#> "; + cerr << tokens_[i]; + } + cerr << " pos: " << pos_ << "\n"; +} + + +void Parser::error(string const & msg) +{ + cerr << "Line ~" << lineno_ << ": parse error: " << msg << endl; + dump(); + //exit(1); +} + + +string Parser::verbatimOption() +{ + string res; + if (next_token().character() == '[') { + Token t = get_token(); + for (Token t = get_token(); t.character() != ']' && good(); t = get_token()) { + if (t.cat() == catBegin) { + putback(); + res += '{' + verbatim_item() + '}'; + } else + res += t.asString(); + } + } + return res; +} + + +string Parser::verbatim_item() +{ + if (!good()) + error("stream bad"); + skip_spaces(); + if (next_token().cat() == catBegin) { + Token t = get_token(); // skip brace + string res; + for (Token t = get_token(); t.cat() != catEnd && good(); t = get_token()) { + if (t.cat() == catBegin) { + putback(); + res += '{' + verbatim_item() + '}'; + } + else + res += t.asInput(); + } + return res; + } + return get_token().asInput(); +} + + +void Parser::reset() +{ + pos_ = 0; +} + + +void Parser::setCatCode(char c, CatCode cat) +{ + theCatcode[(unsigned char)c] = cat; +} + + +CatCode Parser::getCatCode(char c) const +{ + return theCatcode[(unsigned char)c]; +} + + +} // namespace lyx diff --git a/src/tex2lyx/Parser.h b/src/tex2lyx/Parser.h new file mode 100644 index 0000000000..5c84898210 --- /dev/null +++ b/src/tex2lyx/Parser.h @@ -0,0 +1,214 @@ +// -*- C++ -*- +/** + * \file Parser.h + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author André Pönitz + * + * Full author contact details are available in file CREDITS. + */ + +#ifndef PARSER_H +#define PARSER_H + +#include +#include +#include + + +namespace lyx { + + +enum mode_type {UNDECIDED_MODE, TEXT_MODE, MATH_MODE, MATHTEXT_MODE, TABLE_MODE}; + +mode_type asMode(mode_type oldmode, std::string const & str); + + +// These are TeX's catcodes +enum CatCode { + catEscape, // 0 backslash + catBegin, // 1 { + catEnd, // 2 } + catMath, // 3 $ + catAlign, // 4 & + catNewline, // 5 ^^M + catParameter, // 6 # + catSuper, // 7 ^ + catSub, // 8 _ + catIgnore, // 9 + catSpace, // 10 space + catLetter, // 11 a-zA-Z + catOther, // 12 none of the above + catActive, // 13 ~ + catComment, // 14 % + catInvalid // 15 +}; + + +CatCode catcode(unsigned char c); + + +enum { + FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing + FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process + FLAG_END = 1 << 3, // next \\end ends the parsing process + FLAG_BRACK_LAST = 1 << 4, // next closing bracket ends the parsing + FLAG_TEXTMODE = 1 << 5, // we are in a box + FLAG_ITEM = 1 << 6, // read a (possibly braced token) + FLAG_LEAVE = 1 << 7, // leave the loop at the end + FLAG_SIMPLE = 1 << 8, // next $ leaves the loop + FLAG_EQUATION = 1 << 9, // next \] leaves the loop + FLAG_SIMPLE2 = 1 << 10, // next \) leaves the loop + FLAG_OPTION = 1 << 11, // read [...] style option + FLAG_BRACED = 1 << 12, // read {...} style argument + FLAG_CELL = 1 << 13, // read table cell + FLAG_TABBING = 1 << 14 // We are inside a tabbing environment +}; + + + +// +// Helper class for parsing +// + +class Token { +public: + /// + Token() : cs_(), char_(0), cat_(catIgnore) {} + /// + Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {} + /// + Token(std::string const & cs, CatCode cat) : cs_(cs), char_(0), cat_(cat) {} + + /// + std::string const & cs() const { return cs_; } + /// Returns the catcode of the token + CatCode cat() const { return cat_; } + /// + char character() const { return char_; } + /// Returns the token as string + std::string asString() const; + /// Returns the token verbatim + std::string asInput() const; + +private: + /// + std::string cs_; + /// + char char_; + /// + CatCode cat_; +}; + +std::ostream & operator<<(std::ostream & os, Token const & t); + + +/*! + * Actual parser class + * + * The parser parses every character of the inputstream into a token + * and classifies the token. + * The following transformations are done: + * - Consecutive spaces are combined into one single token with CatCode catSpace + * - Consecutive newlines are combined into one single token with CatCode catNewline + * - Comments and %\n combinations are parsed into one token with CatCode catComment + */ + +class Parser { + +public: + /// + Parser(std::istream & is); + /// + Parser(std::string const & s); + + /// + int lineno() const { return lineno_; } + /// + void putback(); + /// dump contents to screen + void dump() const; + + /// + typedef std::pair Arg; + /*! + * Get an argument enclosed by \p left and \p right. + * \returns wether an argument was found in \p Arg.first and the + * argument in \p Arg.second. \see getArg(). + */ + Arg getFullArg(char left, char right); + /*! + * Get an argument enclosed by \p left and \p right. + * \returns the argument (without \p left and \p right) or the empty + * string if the next non-space token is not \p left. Use + * getFullArg() if you need to know wether there was an empty + * argument or no argument at all. + */ + std::string getArg(char left, char right); + /*! + * \returns getFullArg('[', ']') including the brackets or the + * empty string if no argument was found. + */ + std::string getFullOpt(); + /// \returns getArg('[', ']') including the brackets + std::string getOpt(); + /*! + * \returns the contents of the environment \p name. + * \begin{name} must be parsed already, \end{name} + * is parsed but not returned. + */ + std::string const verbatimEnvironment(std::string const & name); + /// Returns the character of the current token and increments the token position. + char getChar(); + /// + void error(std::string const & msg); + /// Parses \p is into tokens + void tokenize(std::istream & is); + /// + void push_back(Token const & t); + /// + void pop_back(); + /// The previous token. + Token const & prev_token() const; + /// The current token. + Token const & curr_token() const; + /// The next token. + Token const & next_token() const; + /// Make the next token current and return that. + Token const & get_token(); + /// \return whether the current token starts a new paragraph + bool isParagraph() const; + /// skips spaces (and comments if \p skip_comments is true) + void skip_spaces(bool skip_comments = false); + /// puts back spaces (and comments if \p skip_comments is true) + void unskip_spaces(bool skip_comments = false); + /// + void lex(std::string const & s); + /// + bool good() const; + /// + std::string verbatim_item(); + /// + std::string verbatimOption(); + /// resets the parser to initial state + void reset(); + /// + void setCatCode(char c, CatCode cat); + /// + CatCode getCatCode(char c) const; + +private: + /// + int lineno_; + /// + std::vector tokens_; + /// + unsigned pos_; +}; + + + +} // namespace lyx + +#endif diff --git a/src/tex2lyx/boost.C b/src/tex2lyx/boost.C deleted file mode 100644 index d66af3f2b4..0000000000 --- a/src/tex2lyx/boost.C +++ /dev/null @@ -1,42 +0,0 @@ -/** - * \file boost.cpp - * This file is part of LyX, the document processor. - * Licence details can be found in the file COPYING. - * - * \author Lars Gullik Bjønnes - * - * Full author contact details are available in file CREDITS. - */ - -#include - -#include "debug.h" - -#include - -#include -#include - - -using std::endl; - -namespace boost { - -void throw_exception(std::exception const & e) -{ - lyx::lyxerr << "Exception caught:\n" - << e.what() << endl; - BOOST_ASSERT(false); -} - - -void assertion_failed(char const * expr, char const * function, - char const * file, long line) -{ - lyx::lyxerr << "Assertion triggered in " << function - << " by failing check \"" << expr << "\"" - << " in file " << file << ":" << line << endl; - ::abort(); -} - -} // namespace boost diff --git a/src/tex2lyx/boost.cpp b/src/tex2lyx/boost.cpp new file mode 100644 index 0000000000..d66af3f2b4 --- /dev/null +++ b/src/tex2lyx/boost.cpp @@ -0,0 +1,42 @@ +/** + * \file boost.cpp + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author Lars Gullik Bjønnes + * + * Full author contact details are available in file CREDITS. + */ + +#include + +#include "debug.h" + +#include + +#include +#include + + +using std::endl; + +namespace boost { + +void throw_exception(std::exception const & e) +{ + lyx::lyxerr << "Exception caught:\n" + << e.what() << endl; + BOOST_ASSERT(false); +} + + +void assertion_failed(char const * expr, char const * function, + char const * file, long line) +{ + lyx::lyxerr << "Assertion triggered in " << function + << " by failing check \"" << expr << "\"" + << " in file " << file << ":" << line << endl; + ::abort(); +} + +} // namespace boost diff --git a/src/tex2lyx/context.C b/src/tex2lyx/context.C deleted file mode 100644 index 36d1ec1782..0000000000 --- a/src/tex2lyx/context.C +++ /dev/null @@ -1,241 +0,0 @@ -/** - * \file Context.cpp - * This file is part of LyX, the document processor. - * Licence details can be found in the file COPYING. - * - * \author Jean-Marc Lasgouttes - * - * Full author contact details are available in file CREDITS. - */ - -#include - -#include - -#include "support/lstrings.h" -#include "Context.h" - - -namespace lyx { - -using std::ostream; -using std::endl; -using std::string; - - -namespace { - -void begin_layout(ostream & os, LyXLayout_ptr layout, Font const & font, - Font const & normalfont) -{ - os << "\n\\begin_layout " << layout->name() << "\n"; - // FIXME: This is not enough for things like - // \\Huge par1 \\par par2 - output_font_change(os, normalfont, font); -} - - -void end_layout(ostream & os) -{ - os << "\n\\end_layout\n"; -} - - -void begin_deeper(ostream & os) -{ - os << "\n\\begin_deeper"; -} - - -void end_deeper(ostream & os) -{ - os << "\n\\end_deeper"; -} - -} - - -bool operator==(Font const & f1, Font const & f2) -{ - return - f1.size == f2.size && - f1.family == f2.family && - f1.series == f2.series && - f1.shape == f2.shape; -} - - -void output_font_change(ostream & os, Font const & oldfont, - Font const & newfont) -{ - if (oldfont.family != newfont.family) - os << "\n\\family " << newfont.family << '\n'; - if (oldfont.series != newfont.series) - os << "\n\\series " << newfont.series << '\n'; - if (oldfont.shape != newfont.shape) - os << "\n\\shape " << newfont.shape << '\n'; - if (oldfont.size != newfont.size) - os << "\n\\size " << newfont.size << '\n'; -} - - -Font Context::normalfont; -bool Context::empty = true; - - -Context::Context(bool need_layout_, - LyXTextClass const & textclass_, - LyXLayout_ptr layout_, LyXLayout_ptr parent_layout_, - Font font_) - : need_layout(need_layout_), - need_end_layout(false), need_end_deeper(false), - has_item(false), deeper_paragraph(false), - new_layout_allowed(true), textclass(textclass_), - layout(layout_), parent_layout(parent_layout_), - font(font_) -{ - if (!layout.get()) - layout = textclass.defaultLayout(); - if (!parent_layout.get()) - parent_layout = textclass.defaultLayout(); -} - - -Context::~Context() -{ - if (!extra_stuff.empty()) - std::cerr << "Bug: Ignoring extra stuff '" << extra_stuff - << '\'' << std::endl; -} - - -void Context::check_layout(ostream & os) -{ - if (need_layout) { - check_end_layout(os); - - // are we in a list-like environment? - if (layout->isEnvironment() - && layout->latextype != LATEX_ENVIRONMENT) { - // A list-like environment - if (has_item) { - // a new item. If we had a standard - // paragraph before, we have to end it. - if (deeper_paragraph) { - end_deeper(os); - deeper_paragraph = false; - } - begin_layout(os, layout, font, normalfont); - has_item = false; - } else { - // a standard paragraph in an - // enumeration. We have to recognize - // that this may require a begin_deeper. - if (!deeper_paragraph) - begin_deeper(os); - begin_layout(os, textclass.defaultLayout(), - font, normalfont); - deeper_paragraph = true; - } - } else { - // No list-like environment - begin_layout(os, layout, font, normalfont); - } - need_layout = false; - need_end_layout = true; - if (!extra_stuff.empty()) { - os << extra_stuff; - extra_stuff.erase(); - } - os << "\n"; - empty = false; - } -} - - -void Context::check_end_layout(ostream & os) -{ - if (need_end_layout) { - end_layout(os); - need_end_layout = false; - } -} - - -void Context::check_deeper(ostream & os) -{ - if (parent_layout->isEnvironment()) { - // We start a nested environment. - // We need to increase the depth. - if (need_end_deeper) { - // no need to have \end_deeper \begin_deeper - need_end_deeper = false; - } else { - begin_deeper(os); - need_end_deeper = true; - } - } else - check_end_deeper(os); -} - - -void Context::check_end_deeper(ostream & os) -{ - if (need_end_deeper) { - end_deeper(os); - need_end_deeper = false; - } - if (deeper_paragraph) { - end_deeper(os); - deeper_paragraph = false; - } -} - - -void Context::set_item() -{ - need_layout = true; - has_item = true; -} - - -void Context::new_paragraph(ostream & os) -{ - check_end_layout(os); - need_layout = true; -} - - -void Context::add_extra_stuff(std::string const & stuff) -{ - if (!lyx::support::contains(extra_stuff, stuff)) - extra_stuff += stuff; -} - - -void Context::dump(ostream & os, string const & desc) const -{ - os << "\n" << desc <<" ["; - if (need_layout) - os << "need_layout "; - if (need_end_layout) - os << "need_end_layout "; - if (need_end_deeper) - os << "need_end_deeper "; - if (has_item) - os << "has_item "; - if (deeper_paragraph) - os << "deeper_paragraph "; - if (new_layout_allowed) - os << "new_layout_allowed "; - if (!extra_stuff.empty()) - os << "extrastuff=[" << extra_stuff << "] "; - os << "textclass=" << textclass.name() - << " layout=" << layout->name() - << " parent_layout=" << parent_layout->name() << "] font=[" - << font.size << ' ' << font.family << ' ' << font.series << ' ' - << font.shape << ']' << endl; -} - - -} // namespace lyx diff --git a/src/tex2lyx/context.h b/src/tex2lyx/context.h deleted file mode 100644 index 307e0d65e6..0000000000 --- a/src/tex2lyx/context.h +++ /dev/null @@ -1,157 +0,0 @@ -// -*- C++ -*- -/** - * \file Context.h - * This file is part of LyX, the document processor. - * Licence details can be found in the file COPYING. - * - * \author Jean-Marc Lasgouttes - * - * Full author contact details are available in file CREDITS. - */ - -#ifndef CONTEXT_H -#define CONTEXT_H - -#include "LyXTextClass.h" - -#include - - -namespace lyx { - - -/*! - * Small helper struct that holds font properties. - * The names are in LyX language, not LaTeX. - * We don't use LyXFont, because it pulls in a lot of dependencies and has - * more strings than needed (e.g. font family error1 etc.). - * If more font related stuff is needed, it might be good to change to - * LyXFont. - */ -class Font { -public: - Font() - { - init(); - } - void init() - { - size = "normal"; - family = "default"; - series = "default"; - shape = "default"; - } - std::string size; - std::string family; - std::string series; - std::string shape; -}; - - -bool operator==(Font const &, Font const &); - - -inline bool operator!=(Font const & f1, Font const & f2) -{ - return !operator==(f1, f2); -} - - -/// Output changed font parameters if \p oldfont and \p newfont differ -void output_font_change(std::ostream & os, Font const & oldfont, - Font const & newfont); - - -/*! - * A helper struct. - * - * Every bit of text has a corresponding context. - * Usage: Parsing begins with a global context. A new context is opened for - * every new LaTeX group, e.g. at the beginning of a new environment. - * The old context is used again after the group is closed. - * - * Since not all paragraph parameters in LyX have the same scoping as their - * LaTeX counterpart we may have to transfer context properties (e. g. the - * font) from and to the parent context. - */ -class Context { -public: - Context(bool need_layout_, - LyXTextClass const & textclass_, - LyXLayout_ptr layout_ = LyXLayout_ptr(), - LyXLayout_ptr parent_layout_= LyXLayout_ptr(), - Font font_ = Font()); - ~Context(); - - /// Output a \\begin_layout if requested - void check_layout(std::ostream & os); - - /// Output a \\end_layout if needed - void check_end_layout(std::ostream & os); - - /// Output a \\begin_deeper if needed - void check_deeper(std::ostream & os); - - /// Output a \\end_deeper if needed - void check_end_deeper(std::ostream & os); - - /// dump content on stream (for debugging purpose), with - /// description \c desc. - void dump(std::ostream &, std::string const & desc = "context") const; - - /// Are we just beginning a new paragraph? - bool atParagraphStart() const { return need_layout; } - - /// Begin an item in a list environment - void set_item(); - - /// Start a new paragraph - void new_paragraph(std::ostream & os); - - /// Add extra stuff if not already there - void add_extra_stuff(std::string const &); - - /// Do we need to output some \\begin_layout command before the - /// next characters? - bool need_layout; - /// Do we need to output some \\end_layout command - bool need_end_layout; - /// We may need to add something after this \\begin_layout command - std::string extra_stuff; - /// If there has been an \\begin_deeper, we'll need a matching - /// \\end_deeper - bool need_end_deeper; - /// If we are in an itemize-like environment, we need an \item - /// for each paragraph, otherwise this has to be a deeper - /// paragraph. - bool has_item; - /// we are handling a standard paragraph in an itemize-like - /// environment - bool deeper_paragraph; - /*! - * Inside of unknown environments we may not allow font and layout - * changes. - * Otherwise things like - * \\large\\begin{foo}\\huge bar\\end{foo} - * would not work. - */ - bool new_layout_allowed; - /// Did we output anything yet in any context? - static bool empty; - - /// The textclass of the document. Could actually be a global variable - LyXTextClass const & textclass; - /// The layout of the current paragraph - LyXLayout_ptr layout; - /// The layout of the outer paragraph (for environment layouts) - LyXLayout_ptr parent_layout; - /// font attributes of this context - Font font; - /// font attributes of normal text - static Font normalfont; -}; - - -} // namespace lyx - -#endif diff --git a/src/tex2lyx/gettext.C b/src/tex2lyx/gettext.C deleted file mode 100644 index 43fc55d278..0000000000 --- a/src/tex2lyx/gettext.C +++ /dev/null @@ -1,30 +0,0 @@ -/** - * \file tex2lyx/gettext.cpp - * This file is part of LyX, the document processor. - * Licence details can be found in the file COPYING. - * - * \author Lars Gullik Bjønnes - * \author Jean-Marc Lasgouttes - * - * Full author contact details are available in file CREDITS. - */ - -#include - -#include "gettext.h" - - -namespace lyx { - - -docstring const _(std::string const & str) -{ - return from_ascii(str); -} - - -void locale_init() -{} - - -} // namespace lyx diff --git a/src/tex2lyx/gettext.cpp b/src/tex2lyx/gettext.cpp new file mode 100644 index 0000000000..43fc55d278 --- /dev/null +++ b/src/tex2lyx/gettext.cpp @@ -0,0 +1,30 @@ +/** + * \file tex2lyx/gettext.cpp + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author Lars Gullik Bjønnes + * \author Jean-Marc Lasgouttes + * + * Full author contact details are available in file CREDITS. + */ + +#include + +#include "gettext.h" + + +namespace lyx { + + +docstring const _(std::string const & str) +{ + return from_ascii(str); +} + + +void locale_init() +{} + + +} // namespace lyx diff --git a/src/tex2lyx/lengthcommon.C b/src/tex2lyx/lengthcommon.C deleted file mode 100644 index 038621438b..0000000000 --- a/src/tex2lyx/lengthcommon.C +++ /dev/null @@ -1,13 +0,0 @@ -// This file is here sine the master lengthcommon.cpp contains gettext -// markers, and also that this file does not always exist (in the linked -// case). So it is possible for po/POTFILES.in to get out of sync. -// The compile will then fail. So to make sure that this file -//(tex2lyx/lengthcommon.cpp) is not present in POTFILES.in we do a -// include trick. (Lgb) -#include "../lengthcommon.cpp" - - -namespace lyx { - - -} // namespace lyx diff --git a/src/tex2lyx/lengthcommon.cpp b/src/tex2lyx/lengthcommon.cpp new file mode 100644 index 0000000000..038621438b --- /dev/null +++ b/src/tex2lyx/lengthcommon.cpp @@ -0,0 +1,13 @@ +// This file is here sine the master lengthcommon.cpp contains gettext +// markers, and also that this file does not always exist (in the linked +// case). So it is possible for po/POTFILES.in to get out of sync. +// The compile will then fail. So to make sure that this file +//(tex2lyx/lengthcommon.cpp) is not present in POTFILES.in we do a +// include trick. (Lgb) +#include "../lengthcommon.cpp" + + +namespace lyx { + + +} // namespace lyx diff --git a/src/tex2lyx/lyxfont.C b/src/tex2lyx/lyxfont.C deleted file mode 100644 index 34bb319a98..0000000000 --- a/src/tex2lyx/lyxfont.C +++ /dev/null @@ -1,58 +0,0 @@ -/** - * \file tex2lyx/LyXFont.cpp - * This file is part of LyX, the document processor. - * Licence details can be found in the file COPYING. - * - * \author Angus Leeming - * - * Full author contact details are available in file CREDITS. - */ - -#include - -#include "LyXFont.h" -#include "LyXLex.h" -#include "support/lstrings.h" - - -namespace lyx { - -using lyx::support::ascii_lowercase; - -using std::string; - - -LyXFont & LyXFont::lyxRead(LyXLex & lex) -{ - bool error = false; - bool finished = false; - while (!finished && lex.isOK() && !error) { - lex.next(); - string const tok = ascii_lowercase(lex.getString()); - - if (tok.empty()) { - continue; - } else if (tok == "endfont") { - finished = true; - } else if (tok == "family") { - lex.next(); - } else if (tok == "series") { - lex.next(); - } else if (tok == "shape") { - lex.next(); - } else if (tok == "size") { - lex.next(); - } else if (tok == "misc") { - lex.next(); - } else if (tok == "color") { - lex.next(); - } else { - lex.printError("Unknown tag `$$Token'"); - error = true; - } - } - return *this; -} - - -} // namespace lyx diff --git a/src/tex2lyx/lyxfont.h b/src/tex2lyx/lyxfont.h deleted file mode 100644 index d9127e43c2..0000000000 --- a/src/tex2lyx/lyxfont.h +++ /dev/null @@ -1,43 +0,0 @@ -// -*- C++ -*- -/** - * \file tex2lyx/LyXFont.h - * This file is part of LyX, the document processor. - * Licence details can be found in the file COPYING. - * - * \author Angus Leeming - * - * Full author contact details are available in file CREDITS. - * - * This class is just a dummy version of that in the main LyX source tree - * to enable tex2lyx to use LyX's textclass classes and not have to - * re-invent the wheel. - * - */ - -#ifndef LYXFONT_H -#define LYXFONT_H - -namespace lyx { - -class LyXLex; - -class LyXFont { -public: - /// Trick to overload constructor and make it megafast - enum FONT_INIT1 { ALL_INHERIT }; - enum FONT_INIT3 { ALL_SANE }; - - LyXFont() {} - explicit LyXFont(LyXFont::FONT_INIT1) {} - explicit LyXFont(LyXFont::FONT_INIT3) {} - - LyXFont & lyxRead(LyXLex &); - - LyXFont & realize(LyXFont const &) { return *this; } - bool resolved() const { return true; } -}; - - -} // namespace lyx - -#endif // NOT LYXFONT_H diff --git a/src/tex2lyx/math.C b/src/tex2lyx/math.C deleted file mode 100644 index c2af18f009..0000000000 --- a/src/tex2lyx/math.C +++ /dev/null @@ -1,245 +0,0 @@ -/** - * \file math.cpp - * This file is part of LyX, the document processor. - * Licence details can be found in the file COPYING. - * - * \author André Pönitz - * - * Full author contact details are available in file CREDITS. - */ - -// {[( - -#include - -#include "tex2lyx.h" - -#include - - -namespace lyx { - -using std::cerr; -using std::endl; - -using std::ostream; -using std::string; - - -bool is_math_env(string const & name) -{ - return known_math_environments.find(name) != known_math_environments.end(); -} - - -void parse_math(Parser & p, ostream & os, unsigned flags, const mode_type mode) -{ - while (p.good()) { - Token const & t = p.get_token(); - -#ifdef FILEDEBUG - cerr << "t: " << t << " flags: " << flags << "\n"; -#endif - - if (flags & FLAG_ITEM) { - if (t.cat() == catSpace) - continue; - - flags &= ~FLAG_ITEM; - if (t.cat() == catBegin) { - // skip the brace and collect everything to the next matching - // closing brace - flags |= FLAG_BRACE_LAST; - continue; - } - - // handle only this single token, leave the loop if done - flags |= FLAG_LEAVE; - } - - - // - // cat codes - // - if (t.cat() == catMath) { - if (mode == MATHTEXT_MODE) { - // we are inside some text mode thingy, so opening new math is allowed - Token const & n = p.get_token(); - if (n.cat() == catMath) { - // TeX's $$...$$ syntax for displayed math - os << "\\["; - parse_math(p, os, FLAG_SIMPLE, MATH_MODE); - os << "\\]"; - p.get_token(); // skip the second '$' token - } else { - // simple $...$ stuff - p.putback(); - os << '$'; - parse_math(p, os, FLAG_SIMPLE, MATH_MODE); - os << '$'; - } - } - - else if (flags & FLAG_SIMPLE) { - // this is the end of the formula - return; - } - - else { - cerr << "\nmode: " << mode << endl; - p.error("something strange in the parser\n"); - break; - } - } - - else if (t.cat() == catLetter || - t.cat() == catSuper || - t.cat() == catSub || - t.cat() == catOther || - t.cat() == catAlign || - t.cat() == catActive || - t.cat() == catParameter) - os << t.character(); - - else if (t.cat() == catBegin) { - os << '{'; - parse_math(p, os, FLAG_BRACE_LAST, mode); - os << '}'; - } - - else if (t.cat() == catEnd) { - if (flags & FLAG_BRACE_LAST) - return; - os << "unexpected '}' in math\n"; - } - - else if (t.cat() == catComment) { - if (!t.cs().empty()) - cerr << "Ignoring comment: " << t.asInput(); - else - // "%\n" combination - p.skip_spaces(); - } - - // - // control sequences - // - - else if (t.cs() == "(") { - os << "\\("; - parse_math(p, os, FLAG_SIMPLE2, MATH_MODE); - os << "\\)"; - } - - else if (t.cs() == "[") { - // special handling of a few common SW user quirks - p.skip_spaces(); - //if (p.next_token().cs() == - os << "\\["; - parse_math(p, os, FLAG_EQUATION, MATH_MODE); - os << "\\]"; - } - - else if (t.cs() == "protect") - // ignore \\protect, will hopefully be re-added during output - ; - - else if (t.cs() == "begin") { - string const name = p.getArg('{', '}'); - active_environments.push_back(name); - os << "\\begin{" << name << "}"; - if (name == "tabular") - parse_math(p, os, FLAG_END, MATHTEXT_MODE); - else - parse_math(p, os, FLAG_END, mode); - os << "\\end{" << name << "}"; - active_environments.pop_back(); - } - - else if (t.cs() == "end") { - if (flags & FLAG_END) { - // eat environment name - string const name = p.getArg('{', '}'); - if (name != active_environment()) - p.error("\\end{" + name + "} does not match \\begin{" - + active_environment() + "}"); - return; - } - p.error("found 'end' unexpectedly"); - } - - else if (t.cs() == ")") { - if (flags & FLAG_SIMPLE2) - return; - p.error("found '\\)' unexpectedly"); - } - - else if (t.cs() == "]") { - if (flags & FLAG_EQUATION) - return; - p.error("found '\\]' unexpectedly"); - } - - else if (t.cs() == "textrm" || t.cs() == "textsf" || t.cs() == "textbf" - || t.cs() == "texttt" || t.cs() == "textsc" - || t.cs() == "text" || t.cs() == "intertext") { - os << '\\' << t.cs() << '{'; - parse_math(p, os, FLAG_ITEM, MATHTEXT_MODE); - os << '}'; - } - - else if (t.cs() == "tag") { - os << '\\' << t.cs(); - if (p.next_token().asInput() == "*") { - p.get_token(); - os << '*'; - } - os << '{'; - parse_math(p, os, FLAG_ITEM, MATHTEXT_MODE); - os << '}'; - } - - else if (t.cs() == "mbox" || t.cs() == "fbox") { - os << "\\" << t.cs() << '{'; - parse_math(p, os, FLAG_ITEM, MATHTEXT_MODE); - os << '}'; - } - - else if (t.cs() == "\"") { - string const name = p.verbatim_item(); - if (name == "a") os << 'ä'; - else if (name == "o") os << 'ö'; - else if (name == "u") os << 'ü'; - else if (name == "A") os << 'Ä'; - else if (name == "O") os << 'Ö'; - else if (name == "U") os << 'Ü'; - else os << "\"{" << name << "}"; - } - - else if (t.cs() == "ss") - os << "ß"; - - else if (t.cs() == "cr") { - // lyx can't handle \\cr - cerr << "Warning: Converting TeX '\\cr' to LaTeX '\\\\'." - << endl; - os << "\\\\"; - } - - else - os << t.asInput(); - - if (flags & FLAG_LEAVE) { - flags &= ~FLAG_LEAVE; - break; - } - } -} - - - - -// }]) - - -} // namespace lyx diff --git a/src/tex2lyx/math.cpp b/src/tex2lyx/math.cpp new file mode 100644 index 0000000000..c2af18f009 --- /dev/null +++ b/src/tex2lyx/math.cpp @@ -0,0 +1,245 @@ +/** + * \file math.cpp + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author André Pönitz + * + * Full author contact details are available in file CREDITS. + */ + +// {[( + +#include + +#include "tex2lyx.h" + +#include + + +namespace lyx { + +using std::cerr; +using std::endl; + +using std::ostream; +using std::string; + + +bool is_math_env(string const & name) +{ + return known_math_environments.find(name) != known_math_environments.end(); +} + + +void parse_math(Parser & p, ostream & os, unsigned flags, const mode_type mode) +{ + while (p.good()) { + Token const & t = p.get_token(); + +#ifdef FILEDEBUG + cerr << "t: " << t << " flags: " << flags << "\n"; +#endif + + if (flags & FLAG_ITEM) { + if (t.cat() == catSpace) + continue; + + flags &= ~FLAG_ITEM; + if (t.cat() == catBegin) { + // skip the brace and collect everything to the next matching + // closing brace + flags |= FLAG_BRACE_LAST; + continue; + } + + // handle only this single token, leave the loop if done + flags |= FLAG_LEAVE; + } + + + // + // cat codes + // + if (t.cat() == catMath) { + if (mode == MATHTEXT_MODE) { + // we are inside some text mode thingy, so opening new math is allowed + Token const & n = p.get_token(); + if (n.cat() == catMath) { + // TeX's $$...$$ syntax for displayed math + os << "\\["; + parse_math(p, os, FLAG_SIMPLE, MATH_MODE); + os << "\\]"; + p.get_token(); // skip the second '$' token + } else { + // simple $...$ stuff + p.putback(); + os << '$'; + parse_math(p, os, FLAG_SIMPLE, MATH_MODE); + os << '$'; + } + } + + else if (flags & FLAG_SIMPLE) { + // this is the end of the formula + return; + } + + else { + cerr << "\nmode: " << mode << endl; + p.error("something strange in the parser\n"); + break; + } + } + + else if (t.cat() == catLetter || + t.cat() == catSuper || + t.cat() == catSub || + t.cat() == catOther || + t.cat() == catAlign || + t.cat() == catActive || + t.cat() == catParameter) + os << t.character(); + + else if (t.cat() == catBegin) { + os << '{'; + parse_math(p, os, FLAG_BRACE_LAST, mode); + os << '}'; + } + + else if (t.cat() == catEnd) { + if (flags & FLAG_BRACE_LAST) + return; + os << "unexpected '}' in math\n"; + } + + else if (t.cat() == catComment) { + if (!t.cs().empty()) + cerr << "Ignoring comment: " << t.asInput(); + else + // "%\n" combination + p.skip_spaces(); + } + + // + // control sequences + // + + else if (t.cs() == "(") { + os << "\\("; + parse_math(p, os, FLAG_SIMPLE2, MATH_MODE); + os << "\\)"; + } + + else if (t.cs() == "[") { + // special handling of a few common SW user quirks + p.skip_spaces(); + //if (p.next_token().cs() == + os << "\\["; + parse_math(p, os, FLAG_EQUATION, MATH_MODE); + os << "\\]"; + } + + else if (t.cs() == "protect") + // ignore \\protect, will hopefully be re-added during output + ; + + else if (t.cs() == "begin") { + string const name = p.getArg('{', '}'); + active_environments.push_back(name); + os << "\\begin{" << name << "}"; + if (name == "tabular") + parse_math(p, os, FLAG_END, MATHTEXT_MODE); + else + parse_math(p, os, FLAG_END, mode); + os << "\\end{" << name << "}"; + active_environments.pop_back(); + } + + else if (t.cs() == "end") { + if (flags & FLAG_END) { + // eat environment name + string const name = p.getArg('{', '}'); + if (name != active_environment()) + p.error("\\end{" + name + "} does not match \\begin{" + + active_environment() + "}"); + return; + } + p.error("found 'end' unexpectedly"); + } + + else if (t.cs() == ")") { + if (flags & FLAG_SIMPLE2) + return; + p.error("found '\\)' unexpectedly"); + } + + else if (t.cs() == "]") { + if (flags & FLAG_EQUATION) + return; + p.error("found '\\]' unexpectedly"); + } + + else if (t.cs() == "textrm" || t.cs() == "textsf" || t.cs() == "textbf" + || t.cs() == "texttt" || t.cs() == "textsc" + || t.cs() == "text" || t.cs() == "intertext") { + os << '\\' << t.cs() << '{'; + parse_math(p, os, FLAG_ITEM, MATHTEXT_MODE); + os << '}'; + } + + else if (t.cs() == "tag") { + os << '\\' << t.cs(); + if (p.next_token().asInput() == "*") { + p.get_token(); + os << '*'; + } + os << '{'; + parse_math(p, os, FLAG_ITEM, MATHTEXT_MODE); + os << '}'; + } + + else if (t.cs() == "mbox" || t.cs() == "fbox") { + os << "\\" << t.cs() << '{'; + parse_math(p, os, FLAG_ITEM, MATHTEXT_MODE); + os << '}'; + } + + else if (t.cs() == "\"") { + string const name = p.verbatim_item(); + if (name == "a") os << 'ä'; + else if (name == "o") os << 'ö'; + else if (name == "u") os << 'ü'; + else if (name == "A") os << 'Ä'; + else if (name == "O") os << 'Ö'; + else if (name == "U") os << 'Ü'; + else os << "\"{" << name << "}"; + } + + else if (t.cs() == "ss") + os << "ß"; + + else if (t.cs() == "cr") { + // lyx can't handle \\cr + cerr << "Warning: Converting TeX '\\cr' to LaTeX '\\\\'." + << endl; + os << "\\\\"; + } + + else + os << t.asInput(); + + if (flags & FLAG_LEAVE) { + flags &= ~FLAG_LEAVE; + break; + } + } +} + + + + +// }]) + + +} // namespace lyx diff --git a/src/tex2lyx/preamble.C b/src/tex2lyx/preamble.C deleted file mode 100644 index db16af2081..0000000000 --- a/src/tex2lyx/preamble.C +++ /dev/null @@ -1,514 +0,0 @@ -/** - * \file preamble.cpp - * This file is part of LyX, the document processor. - * Licence details can be found in the file COPYING. - * - * \author André Pönitz - * - * Full author contact details are available in file CREDITS. - */ - -// {[( - -#include - -#include "tex2lyx.h" - -#include "layout.h" -#include "LyXTextClass.h" -#include "LyXLex.h" -#include "support/filetools.h" -#include "support/lstrings.h" - -#include -#include -#include -#include -#include -#include - - -namespace lyx { - -using std::istringstream; -using std::ostream; -using std::ostringstream; -using std::string; -using std::vector; -using std::cerr; -using std::endl; - -using support::FileName; -using support::libFileSearch; - -// special columntypes -extern std::map special_columns; - -std::map > used_packages; - -namespace { - -const char * const known_languages[] = { "austrian", "babel", "bahasa", -"basque", "breton", "british", "bulgarian", "catalan", "croatian", "czech", -"danish", "dutch", "english", "esperanto", "estonian", "finnish", -"francais", "french", "frenchb", "frenchle", "frenchpro", -"galician", "german", "germanb", "greek", "hebcal", "hebfont", -"hebrew", "hebrew_newcode", "hebrew_oldcode", "hebrew_p", "hyphen", -"icelandic", "irish", "italian", "latin", "lgrcmr", "lgrcmro", "lgrcmss", -"lgrcmtt", "lgrenc", "lgrlcmss", "lgrlcmtt", "lheclas", "lhecmr", -"lhecmss", "lhecmtt", "lhecrml", "lheenc", "lhefr", "lheredis", "lheshold", -"lheshscr", "lheshstk", "lsorbian", "magyar", "naustrian", "ngermanb", -"ngerman", "norsk", "polish", "portuges", "rlbabel", "romanian", -"russianb", "samin", "scottish", "serbian", "slovak", "slovene", "spanish", -"swedish", "turkish", "ukraineb", "usorbian", "welsh", 0}; - -const char * const known_french_languages[] = {"french", "frenchb", "francais", - "frenchle", "frenchpro", 0}; -char const * const known_fontsizes[] = { "10pt", "11pt", "12pt", 0 }; - -// some ugly stuff -ostringstream h_preamble; -string h_textclass = "article"; -string h_options = string(); -string h_language = "english"; -string h_inputencoding = "latin1"; -string h_fontscheme = "default"; -string h_graphics = "default"; -string h_paperfontsize = "default"; -string h_spacing = "single"; -string h_papersize = "default"; -string h_use_geometry = "false"; -string h_use_amsmath = "0"; -string h_cite_engine = "basic"; -string h_use_bibtopic = "false"; -string h_paperorientation = "portrait"; -string h_secnumdepth = "3"; -string h_tocdepth = "3"; -string h_paragraph_separation = "indent"; -string h_defskip = "medskip"; -string h_quotes_language = "english"; -string h_papercolumns = "1"; -string h_papersides = string(); -string h_paperpagestyle = "default"; -string h_tracking_changes = "false"; -string h_output_changes = "false"; - - -void handle_opt(vector & opts, char const * const * what, string & target) -{ - if (opts.empty()) - return; - - for ( ; *what; ++what) { - vector::iterator it = find(opts.begin(), opts.end(), *what); - if (it != opts.end()) { - //cerr << "### found option '" << *what << "'\n"; - target = *what; - opts.erase(it); - return; - } - } -} - - -/*! - * Split a package options string (keyval format) into a vector. - * Example input: - * authorformat=smallcaps, - * commabeforerest, - * titleformat=colonsep, - * bibformat={tabular,ibidem,numbered} - */ -vector split_options(string const & input) -{ - vector options; - string option; - Parser p(input); - while (p.good()) { - Token const & t = p.get_token(); - if (t.asInput() == ",") { - options.push_back(option); - option.erase(); - } else if (t.asInput() == "=") { - option += '='; - p.skip_spaces(true); - if (p.next_token().asInput() == "{") - option += '{' + p.getArg('{', '}') + '}'; - } else if (t.cat() != catSpace) - option += t.asInput(); - } - - if (!option.empty()) - options.push_back(option); - - return options; -} - - -/*! - * Add package \p name with options \p options to used_packages. - * Remove options from \p options that we don't want to output. - */ -void add_package(string const & name, vector & options) -{ - // every package inherits the global options - if (used_packages.find(name) == used_packages.end()) - used_packages[name] = split_options(h_options); - - vector & v = used_packages[name]; - v.insert(v.end(), options.begin(), options.end()); - if (name == "jurabib") { - // Don't output the order argument (see the cite command - // handling code in text.cpp). - vector::iterator end = - remove(options.begin(), options.end(), "natbiborder"); - end = remove(options.begin(), end, "jurabiborder"); - options.erase(end, options.end()); - } -} - - -void handle_package(string const & name, string const & opts) -{ - vector options = split_options(opts); - add_package(name, options); - - //cerr << "handle_package: '" << name << "'\n"; - if (name == "ae") - h_fontscheme = "ae"; - else if (name == "aecompl") - h_fontscheme = "ae"; - else if (name == "amsmath") - h_use_amsmath = "1"; - else if (name == "amssymb") - h_use_amsmath = "1"; - else if (name == "babel") - ; // ignore this - else if (name == "fontenc") - ; // ignore this - else if (name == "inputenc") { - h_inputencoding = opts; - options.clear(); - } else if (name == "makeidx") - ; // ignore this - else if (name == "verbatim") - ; // ignore this - else if (name == "graphicx") - ; // ignore this - else if (is_known(name, known_languages)) { - if (is_known(name, known_french_languages)) { - h_language = "french"; - h_quotes_language = "french"; - } else { - h_language = name; - h_quotes_language = name; - } - - } else if (name == "natbib") { - h_cite_engine = "natbib_authoryear"; - vector::iterator it = - find(options.begin(), options.end(), "authoryear"); - if (it != options.end()) - options.erase(it); - else { - it = find(options.begin(), options.end(), "numbers"); - if (it != options.end()) { - h_cite_engine = "natbib_numerical"; - options.erase(it); - } - } - } else if (name == "jurabib") { - h_cite_engine = "jurabib"; - } else if (options.empty()) - h_preamble << "\\usepackage{" << name << "}\n"; - else { - h_preamble << "\\usepackage[" << opts << "]{" << name << "}\n"; - options.clear(); - } - - // We need to do something with the options... - if (!options.empty()) - cerr << "Ignoring options '" << join(options, ",") - << "' of package " << name << '.' << endl; -} - - - -void end_preamble(ostream & os, LyXTextClass const & /*textclass*/) -{ - os << "#LyX file created by tex2lyx 0.1.2\n" - << "\\lyxformat 245\n" - << "\\begin_document\n" - << "\\begin_header\n" - << "\\textclass " << h_textclass << "\n" - << "\\begin_preamble\n" << h_preamble.str() << "\n\\end_preamble\n"; - if (!h_options.empty()) - os << "\\options " << h_options << "\n"; - os << "\\language " << h_language << "\n" - << "\\inputencoding " << h_inputencoding << "\n" - << "\\fontscheme " << h_fontscheme << "\n" - << "\\graphics " << h_graphics << "\n" - << "\\paperfontsize " << h_paperfontsize << "\n" - << "\\spacing " << h_spacing << "\n" - << "\\papersize " << h_papersize << "\n" - << "\\use_geometry " << h_use_geometry << "\n" - << "\\use_amsmath " << h_use_amsmath << "\n" - << "\\cite_engine " << h_cite_engine << "\n" - << "\\use_bibtopic " << h_use_bibtopic << "\n" - << "\\paperorientation " << h_paperorientation << "\n" - << "\\secnumdepth " << h_secnumdepth << "\n" - << "\\tocdepth " << h_tocdepth << "\n" - << "\\paragraph_separation " << h_paragraph_separation << "\n" - << "\\defskip " << h_defskip << "\n" - << "\\quotes_language " << h_quotes_language << "\n" - << "\\papercolumns " << h_papercolumns << "\n" - << "\\papersides " << h_papersides << "\n" - << "\\paperpagestyle " << h_paperpagestyle << "\n" - << "\\tracking_changes " << h_tracking_changes << "\n" - << "\\output_changes " << h_output_changes << "\n" - << "\\end_header\n\n" - << "\\begin_body\n"; - // clear preamble for subdocuments - h_preamble.str(""); -} - -} // anonymous namespace - -LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & forceclass) -{ - // initialize fixed types - special_columns['D'] = 3; - bool is_full_document = false; - - // determine whether this is a full document or a fragment for inclusion - while (p.good()) { - Token const & t = p.get_token(); - - if (t.cat() == catEscape && t.cs() == "documentclass") { - is_full_document = true; - break; - } - } - p.reset(); - - while (is_full_document && p.good()) { - Token const & t = p.get_token(); - -#ifdef FILEDEBUG - cerr << "t: " << t << "\n"; -#endif - - // - // cat codes - // - if (t.cat() == catLetter || - t.cat() == catSuper || - t.cat() == catSub || - t.cat() == catOther || - t.cat() == catMath || - t.cat() == catActive || - t.cat() == catBegin || - t.cat() == catEnd || - t.cat() == catAlign || - t.cat() == catParameter) - h_preamble << t.character(); - - else if (t.cat() == catSpace || t.cat() == catNewline) - h_preamble << t.asInput(); - - else if (t.cat() == catComment) - h_preamble << t.asInput(); - - else if (t.cs() == "pagestyle") - h_paperpagestyle = p.verbatim_item(); - - else if (t.cs() == "makeatletter") { - p.setCatCode('@', catLetter); - h_preamble << "\\makeatletter"; - } - - else if (t.cs() == "makeatother") { - p.setCatCode('@', catOther); - h_preamble << "\\makeatother"; - } - - else if (t.cs() == "newcommand" || t.cs() == "renewcommand" - || t.cs() == "providecommand") { - bool star = false; - if (p.next_token().character() == '*') { - p.get_token(); - star = true; - } - string const name = p.verbatim_item(); - string const opt1 = p.getOpt(); - string const opt2 = p.getFullOpt(); - string const body = p.verbatim_item(); - // only non-lyxspecific stuff - if ( name != "\\noun" - && name != "\\tabularnewline" - && name != "\\LyX" - && name != "\\lyxline" - && name != "\\lyxaddress" - && name != "\\lyxrightaddress" - && name != "\\lyxdot" - && name != "\\boldsymbol" - && name != "\\lyxarrow") { - ostringstream ss; - ss << '\\' << t.cs(); - if (star) - ss << '*'; - ss << '{' << name << '}' << opt1 << opt2 - << '{' << body << "}"; - h_preamble << ss.str(); - - // Add the command to the known commands - add_known_command(name, opt1, !opt2.empty()); -/* - ostream & out = in_preamble ? h_preamble : os; - out << "\\" << t.cs() << "{" << name << "}" - << opts << "{" << body << "}"; -*/ - } - } - - else if (t.cs() == "documentclass") { - vector opts; - split(p.getArg('[', ']'), opts, ','); - handle_opt(opts, known_languages, h_language); - if (is_known(h_language, known_french_languages)) - h_language = "french"; - handle_opt(opts, known_fontsizes, h_paperfontsize); - // delete "pt" at the end - string::size_type i = h_paperfontsize.find("pt"); - if (i != string::npos) - h_paperfontsize.erase(i); - h_quotes_language = h_language; - h_options = join(opts, ","); - h_textclass = p.getArg('{', '}'); - } - - else if (t.cs() == "usepackage") { - string const options = p.getArg('[', ']'); - string const name = p.getArg('{', '}'); - if (options.empty() && name.find(',')) { - vector vecnames; - split(name, vecnames, ','); - vector::const_iterator it = vecnames.begin(); - vector::const_iterator end = vecnames.end(); - for (; it != end; ++it) - handle_package(trim(*it), string()); - } else { - handle_package(name, options); - } - } - - else if (t.cs() == "newenvironment") { - string const name = p.getArg('{', '}'); - ostringstream ss; - ss << "\\newenvironment{" << name << "}"; - ss << p.getOpt(); - ss << p.getOpt(); - ss << '{' << p.verbatim_item() << '}'; - ss << '{' << p.verbatim_item() << '}'; - if (name != "lyxcode" && name != "lyxlist" && - name != "lyxrightadress" && - name != "lyxaddress" && name != "lyxgreyedout") - h_preamble << ss.str(); - } - - else if (t.cs() == "def") { - string name = p.get_token().cs(); - while (p.next_token().cat() != catBegin) - name += p.get_token().asString(); - h_preamble << "\\def\\" << name << '{' - << p.verbatim_item() << "}"; - } - - else if (t.cs() == "newcolumntype") { - string const name = p.getArg('{', '}'); - trim(name); - int nargs = 0; - string opts = p.getOpt(); - if (!opts.empty()) { - istringstream is(string(opts, 1)); - //cerr << "opt: " << is.str() << "\n"; - is >> nargs; - } - special_columns[name[0]] = nargs; - h_preamble << "\\newcolumntype{" << name << "}"; - if (nargs) - h_preamble << "[" << nargs << "]"; - h_preamble << "{" << p.verbatim_item() << "}"; - } - - else if (t.cs() == "setcounter") { - string const name = p.getArg('{', '}'); - string const content = p.getArg('{', '}'); - if (name == "secnumdepth") - h_secnumdepth = content; - else if (name == "tocdepth") - h_tocdepth = content; - else - h_preamble << "\\setcounter{" << name << "}{" << content << "}"; - } - - else if (t.cs() == "setlength") { - string const name = p.verbatim_item(); - string const content = p.verbatim_item(); - // Is this correct? - if (name == "parskip") - h_paragraph_separation = "skip"; - else if (name == "parindent") - h_paragraph_separation = "skip"; - else - h_preamble << "\\setlength{" << name << "}{" << content << "}"; - } - - else if (t.cs() == "begin") { - string const name = p.getArg('{', '}'); - if (name == "document") - break; - h_preamble << "\\begin{" << name << "}"; - } - - else if (t.cs() == "jurabibsetup") { - vector jurabibsetup = - split_options(p.getArg('{', '}')); - // add jurabibsetup to the jurabib package options - add_package("jurabib", jurabibsetup); - if (!jurabibsetup.empty()) { - h_preamble << "\\jurabibsetup{" - << join(jurabibsetup, ",") << '}'; - } - } - - else if (!t.cs().empty()) - h_preamble << '\\' << t.cs(); - } - p.skip_spaces(); - - // Force textclass if the user wanted it - if (!forceclass.empty()) - h_textclass = forceclass; - if (noweb_mode && !lyx::support::prefixIs(h_textclass, "literate-")) - h_textclass.insert(0, "literate-"); - FileName layoutfilename = libFileSearch("layouts", h_textclass, "layout"); - if (layoutfilename.empty()) { - cerr << "Error: Could not find layout file for textclass \"" << h_textclass << "\"." << endl; - exit(1); - } - LyXTextClass textclass; - textclass.read(layoutfilename); - if (h_papersides.empty()) { - ostringstream ss; - ss << textclass.sides(); - h_papersides = ss.str(); - } - end_preamble(os, textclass); - return textclass; -} - -// }]) - - -} // namespace lyx diff --git a/src/tex2lyx/preamble.cpp b/src/tex2lyx/preamble.cpp new file mode 100644 index 0000000000..db16af2081 --- /dev/null +++ b/src/tex2lyx/preamble.cpp @@ -0,0 +1,514 @@ +/** + * \file preamble.cpp + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author André Pönitz + * + * Full author contact details are available in file CREDITS. + */ + +// {[( + +#include + +#include "tex2lyx.h" + +#include "layout.h" +#include "LyXTextClass.h" +#include "LyXLex.h" +#include "support/filetools.h" +#include "support/lstrings.h" + +#include +#include +#include +#include +#include +#include + + +namespace lyx { + +using std::istringstream; +using std::ostream; +using std::ostringstream; +using std::string; +using std::vector; +using std::cerr; +using std::endl; + +using support::FileName; +using support::libFileSearch; + +// special columntypes +extern std::map special_columns; + +std::map > used_packages; + +namespace { + +const char * const known_languages[] = { "austrian", "babel", "bahasa", +"basque", "breton", "british", "bulgarian", "catalan", "croatian", "czech", +"danish", "dutch", "english", "esperanto", "estonian", "finnish", +"francais", "french", "frenchb", "frenchle", "frenchpro", +"galician", "german", "germanb", "greek", "hebcal", "hebfont", +"hebrew", "hebrew_newcode", "hebrew_oldcode", "hebrew_p", "hyphen", +"icelandic", "irish", "italian", "latin", "lgrcmr", "lgrcmro", "lgrcmss", +"lgrcmtt", "lgrenc", "lgrlcmss", "lgrlcmtt", "lheclas", "lhecmr", +"lhecmss", "lhecmtt", "lhecrml", "lheenc", "lhefr", "lheredis", "lheshold", +"lheshscr", "lheshstk", "lsorbian", "magyar", "naustrian", "ngermanb", +"ngerman", "norsk", "polish", "portuges", "rlbabel", "romanian", +"russianb", "samin", "scottish", "serbian", "slovak", "slovene", "spanish", +"swedish", "turkish", "ukraineb", "usorbian", "welsh", 0}; + +const char * const known_french_languages[] = {"french", "frenchb", "francais", + "frenchle", "frenchpro", 0}; +char const * const known_fontsizes[] = { "10pt", "11pt", "12pt", 0 }; + +// some ugly stuff +ostringstream h_preamble; +string h_textclass = "article"; +string h_options = string(); +string h_language = "english"; +string h_inputencoding = "latin1"; +string h_fontscheme = "default"; +string h_graphics = "default"; +string h_paperfontsize = "default"; +string h_spacing = "single"; +string h_papersize = "default"; +string h_use_geometry = "false"; +string h_use_amsmath = "0"; +string h_cite_engine = "basic"; +string h_use_bibtopic = "false"; +string h_paperorientation = "portrait"; +string h_secnumdepth = "3"; +string h_tocdepth = "3"; +string h_paragraph_separation = "indent"; +string h_defskip = "medskip"; +string h_quotes_language = "english"; +string h_papercolumns = "1"; +string h_papersides = string(); +string h_paperpagestyle = "default"; +string h_tracking_changes = "false"; +string h_output_changes = "false"; + + +void handle_opt(vector & opts, char const * const * what, string & target) +{ + if (opts.empty()) + return; + + for ( ; *what; ++what) { + vector::iterator it = find(opts.begin(), opts.end(), *what); + if (it != opts.end()) { + //cerr << "### found option '" << *what << "'\n"; + target = *what; + opts.erase(it); + return; + } + } +} + + +/*! + * Split a package options string (keyval format) into a vector. + * Example input: + * authorformat=smallcaps, + * commabeforerest, + * titleformat=colonsep, + * bibformat={tabular,ibidem,numbered} + */ +vector split_options(string const & input) +{ + vector options; + string option; + Parser p(input); + while (p.good()) { + Token const & t = p.get_token(); + if (t.asInput() == ",") { + options.push_back(option); + option.erase(); + } else if (t.asInput() == "=") { + option += '='; + p.skip_spaces(true); + if (p.next_token().asInput() == "{") + option += '{' + p.getArg('{', '}') + '}'; + } else if (t.cat() != catSpace) + option += t.asInput(); + } + + if (!option.empty()) + options.push_back(option); + + return options; +} + + +/*! + * Add package \p name with options \p options to used_packages. + * Remove options from \p options that we don't want to output. + */ +void add_package(string const & name, vector & options) +{ + // every package inherits the global options + if (used_packages.find(name) == used_packages.end()) + used_packages[name] = split_options(h_options); + + vector & v = used_packages[name]; + v.insert(v.end(), options.begin(), options.end()); + if (name == "jurabib") { + // Don't output the order argument (see the cite command + // handling code in text.cpp). + vector::iterator end = + remove(options.begin(), options.end(), "natbiborder"); + end = remove(options.begin(), end, "jurabiborder"); + options.erase(end, options.end()); + } +} + + +void handle_package(string const & name, string const & opts) +{ + vector options = split_options(opts); + add_package(name, options); + + //cerr << "handle_package: '" << name << "'\n"; + if (name == "ae") + h_fontscheme = "ae"; + else if (name == "aecompl") + h_fontscheme = "ae"; + else if (name == "amsmath") + h_use_amsmath = "1"; + else if (name == "amssymb") + h_use_amsmath = "1"; + else if (name == "babel") + ; // ignore this + else if (name == "fontenc") + ; // ignore this + else if (name == "inputenc") { + h_inputencoding = opts; + options.clear(); + } else if (name == "makeidx") + ; // ignore this + else if (name == "verbatim") + ; // ignore this + else if (name == "graphicx") + ; // ignore this + else if (is_known(name, known_languages)) { + if (is_known(name, known_french_languages)) { + h_language = "french"; + h_quotes_language = "french"; + } else { + h_language = name; + h_quotes_language = name; + } + + } else if (name == "natbib") { + h_cite_engine = "natbib_authoryear"; + vector::iterator it = + find(options.begin(), options.end(), "authoryear"); + if (it != options.end()) + options.erase(it); + else { + it = find(options.begin(), options.end(), "numbers"); + if (it != options.end()) { + h_cite_engine = "natbib_numerical"; + options.erase(it); + } + } + } else if (name == "jurabib") { + h_cite_engine = "jurabib"; + } else if (options.empty()) + h_preamble << "\\usepackage{" << name << "}\n"; + else { + h_preamble << "\\usepackage[" << opts << "]{" << name << "}\n"; + options.clear(); + } + + // We need to do something with the options... + if (!options.empty()) + cerr << "Ignoring options '" << join(options, ",") + << "' of package " << name << '.' << endl; +} + + + +void end_preamble(ostream & os, LyXTextClass const & /*textclass*/) +{ + os << "#LyX file created by tex2lyx 0.1.2\n" + << "\\lyxformat 245\n" + << "\\begin_document\n" + << "\\begin_header\n" + << "\\textclass " << h_textclass << "\n" + << "\\begin_preamble\n" << h_preamble.str() << "\n\\end_preamble\n"; + if (!h_options.empty()) + os << "\\options " << h_options << "\n"; + os << "\\language " << h_language << "\n" + << "\\inputencoding " << h_inputencoding << "\n" + << "\\fontscheme " << h_fontscheme << "\n" + << "\\graphics " << h_graphics << "\n" + << "\\paperfontsize " << h_paperfontsize << "\n" + << "\\spacing " << h_spacing << "\n" + << "\\papersize " << h_papersize << "\n" + << "\\use_geometry " << h_use_geometry << "\n" + << "\\use_amsmath " << h_use_amsmath << "\n" + << "\\cite_engine " << h_cite_engine << "\n" + << "\\use_bibtopic " << h_use_bibtopic << "\n" + << "\\paperorientation " << h_paperorientation << "\n" + << "\\secnumdepth " << h_secnumdepth << "\n" + << "\\tocdepth " << h_tocdepth << "\n" + << "\\paragraph_separation " << h_paragraph_separation << "\n" + << "\\defskip " << h_defskip << "\n" + << "\\quotes_language " << h_quotes_language << "\n" + << "\\papercolumns " << h_papercolumns << "\n" + << "\\papersides " << h_papersides << "\n" + << "\\paperpagestyle " << h_paperpagestyle << "\n" + << "\\tracking_changes " << h_tracking_changes << "\n" + << "\\output_changes " << h_output_changes << "\n" + << "\\end_header\n\n" + << "\\begin_body\n"; + // clear preamble for subdocuments + h_preamble.str(""); +} + +} // anonymous namespace + +LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & forceclass) +{ + // initialize fixed types + special_columns['D'] = 3; + bool is_full_document = false; + + // determine whether this is a full document or a fragment for inclusion + while (p.good()) { + Token const & t = p.get_token(); + + if (t.cat() == catEscape && t.cs() == "documentclass") { + is_full_document = true; + break; + } + } + p.reset(); + + while (is_full_document && p.good()) { + Token const & t = p.get_token(); + +#ifdef FILEDEBUG + cerr << "t: " << t << "\n"; +#endif + + // + // cat codes + // + if (t.cat() == catLetter || + t.cat() == catSuper || + t.cat() == catSub || + t.cat() == catOther || + t.cat() == catMath || + t.cat() == catActive || + t.cat() == catBegin || + t.cat() == catEnd || + t.cat() == catAlign || + t.cat() == catParameter) + h_preamble << t.character(); + + else if (t.cat() == catSpace || t.cat() == catNewline) + h_preamble << t.asInput(); + + else if (t.cat() == catComment) + h_preamble << t.asInput(); + + else if (t.cs() == "pagestyle") + h_paperpagestyle = p.verbatim_item(); + + else if (t.cs() == "makeatletter") { + p.setCatCode('@', catLetter); + h_preamble << "\\makeatletter"; + } + + else if (t.cs() == "makeatother") { + p.setCatCode('@', catOther); + h_preamble << "\\makeatother"; + } + + else if (t.cs() == "newcommand" || t.cs() == "renewcommand" + || t.cs() == "providecommand") { + bool star = false; + if (p.next_token().character() == '*') { + p.get_token(); + star = true; + } + string const name = p.verbatim_item(); + string const opt1 = p.getOpt(); + string const opt2 = p.getFullOpt(); + string const body = p.verbatim_item(); + // only non-lyxspecific stuff + if ( name != "\\noun" + && name != "\\tabularnewline" + && name != "\\LyX" + && name != "\\lyxline" + && name != "\\lyxaddress" + && name != "\\lyxrightaddress" + && name != "\\lyxdot" + && name != "\\boldsymbol" + && name != "\\lyxarrow") { + ostringstream ss; + ss << '\\' << t.cs(); + if (star) + ss << '*'; + ss << '{' << name << '}' << opt1 << opt2 + << '{' << body << "}"; + h_preamble << ss.str(); + + // Add the command to the known commands + add_known_command(name, opt1, !opt2.empty()); +/* + ostream & out = in_preamble ? h_preamble : os; + out << "\\" << t.cs() << "{" << name << "}" + << opts << "{" << body << "}"; +*/ + } + } + + else if (t.cs() == "documentclass") { + vector opts; + split(p.getArg('[', ']'), opts, ','); + handle_opt(opts, known_languages, h_language); + if (is_known(h_language, known_french_languages)) + h_language = "french"; + handle_opt(opts, known_fontsizes, h_paperfontsize); + // delete "pt" at the end + string::size_type i = h_paperfontsize.find("pt"); + if (i != string::npos) + h_paperfontsize.erase(i); + h_quotes_language = h_language; + h_options = join(opts, ","); + h_textclass = p.getArg('{', '}'); + } + + else if (t.cs() == "usepackage") { + string const options = p.getArg('[', ']'); + string const name = p.getArg('{', '}'); + if (options.empty() && name.find(',')) { + vector vecnames; + split(name, vecnames, ','); + vector::const_iterator it = vecnames.begin(); + vector::const_iterator end = vecnames.end(); + for (; it != end; ++it) + handle_package(trim(*it), string()); + } else { + handle_package(name, options); + } + } + + else if (t.cs() == "newenvironment") { + string const name = p.getArg('{', '}'); + ostringstream ss; + ss << "\\newenvironment{" << name << "}"; + ss << p.getOpt(); + ss << p.getOpt(); + ss << '{' << p.verbatim_item() << '}'; + ss << '{' << p.verbatim_item() << '}'; + if (name != "lyxcode" && name != "lyxlist" && + name != "lyxrightadress" && + name != "lyxaddress" && name != "lyxgreyedout") + h_preamble << ss.str(); + } + + else if (t.cs() == "def") { + string name = p.get_token().cs(); + while (p.next_token().cat() != catBegin) + name += p.get_token().asString(); + h_preamble << "\\def\\" << name << '{' + << p.verbatim_item() << "}"; + } + + else if (t.cs() == "newcolumntype") { + string const name = p.getArg('{', '}'); + trim(name); + int nargs = 0; + string opts = p.getOpt(); + if (!opts.empty()) { + istringstream is(string(opts, 1)); + //cerr << "opt: " << is.str() << "\n"; + is >> nargs; + } + special_columns[name[0]] = nargs; + h_preamble << "\\newcolumntype{" << name << "}"; + if (nargs) + h_preamble << "[" << nargs << "]"; + h_preamble << "{" << p.verbatim_item() << "}"; + } + + else if (t.cs() == "setcounter") { + string const name = p.getArg('{', '}'); + string const content = p.getArg('{', '}'); + if (name == "secnumdepth") + h_secnumdepth = content; + else if (name == "tocdepth") + h_tocdepth = content; + else + h_preamble << "\\setcounter{" << name << "}{" << content << "}"; + } + + else if (t.cs() == "setlength") { + string const name = p.verbatim_item(); + string const content = p.verbatim_item(); + // Is this correct? + if (name == "parskip") + h_paragraph_separation = "skip"; + else if (name == "parindent") + h_paragraph_separation = "skip"; + else + h_preamble << "\\setlength{" << name << "}{" << content << "}"; + } + + else if (t.cs() == "begin") { + string const name = p.getArg('{', '}'); + if (name == "document") + break; + h_preamble << "\\begin{" << name << "}"; + } + + else if (t.cs() == "jurabibsetup") { + vector jurabibsetup = + split_options(p.getArg('{', '}')); + // add jurabibsetup to the jurabib package options + add_package("jurabib", jurabibsetup); + if (!jurabibsetup.empty()) { + h_preamble << "\\jurabibsetup{" + << join(jurabibsetup, ",") << '}'; + } + } + + else if (!t.cs().empty()) + h_preamble << '\\' << t.cs(); + } + p.skip_spaces(); + + // Force textclass if the user wanted it + if (!forceclass.empty()) + h_textclass = forceclass; + if (noweb_mode && !lyx::support::prefixIs(h_textclass, "literate-")) + h_textclass.insert(0, "literate-"); + FileName layoutfilename = libFileSearch("layouts", h_textclass, "layout"); + if (layoutfilename.empty()) { + cerr << "Error: Could not find layout file for textclass \"" << h_textclass << "\"." << endl; + exit(1); + } + LyXTextClass textclass; + textclass.read(layoutfilename); + if (h_papersides.empty()) { + ostringstream ss; + ss << textclass.sides(); + h_papersides = ss.str(); + } + end_preamble(os, textclass); + return textclass; +} + +// }]) + + +} // namespace lyx diff --git a/src/tex2lyx/table.C b/src/tex2lyx/table.C deleted file mode 100644 index 35de97af6a..0000000000 --- a/src/tex2lyx/table.C +++ /dev/null @@ -1,1124 +0,0 @@ -/** - * \file table.cpp - * This file is part of LyX, the document processor. - * Licence details can be found in the file COPYING. - * - * \author André Pönitz - * \author Jean-Marc Lasgouttes - * \author Georg Baum - * - * Full author contact details are available in file CREDITS. - */ - -// {[( - -#include - -#include "tex2lyx.h" - -#include "support/convert.h" -#include "support/lstrings.h" - -#include -#include -#include -#include - - -namespace lyx { - -using std::cerr; -using std::endl; -using std::istringstream; -using std::ostream; -using std::ostringstream; -using std::string; -using std::vector; - - -// filled in preamble.cpp -std::map special_columns; - - -namespace { - -class ColInfo { -public: - ColInfo() : align('n'), valign('n'), rightlines(0), leftlines(0) {} - /// column alignment - char align; - /// vertical alignment - char valign; - /// column width - string width; - /// special column alignment - string special; - /// number of lines on the right - int rightlines; - /// number of lines on the left - int leftlines; -}; - - -/// row type for longtables -enum LTRowType -{ - /// normal row - LT_NORMAL, - /// part of head - LT_HEAD, - /// part of head on first page - LT_FIRSTHEAD, - /// part of foot - LT_FOOT, - /// part of foot on last page - LT_LASTFOOT -}; - - -class RowInfo { -public: - RowInfo() : topline(false), bottomline(false), type(LT_NORMAL), - newpage(false) {} - /// horizontal line above - bool topline; - /// horizontal line below - bool bottomline; - /// These are for longtabulars only - /// row type (head, foot, firsthead etc.) - LTRowType type; - /// row for a pagebreak - bool newpage; -}; - - -enum Multicolumn { - /// A normal cell - CELL_NORMAL = 0, - /// A multicolumn cell. The number of columns is 1 + number - /// of CELL_PART_OF_MULTICOLUMN cells that follow directly - CELL_BEGIN_OF_MULTICOLUMN, - /// This is a dummy cell (part of a multicolumn cell) - CELL_PART_OF_MULTICOLUMN -}; - - -class CellInfo { -public: - CellInfo() : multi(CELL_NORMAL), align('n'), valign('n'), - leftlines(0), rightlines(0), topline(false), - bottomline(false), rotate(false) {} - /// cell content - string content; - /// multicolumn flag - Multicolumn multi; - /// cell alignment - char align; - /// vertical cell alignment - char valign; - /// number of lines on the left - int leftlines; - /// number of lines on the right - int rightlines; - /// do we have a line above? - bool topline; - /// do we have a line below? - bool bottomline; - /// is the cell rotated? - bool rotate; - /// width for multicolumn cells - string width; - /// special formatting for multicolumn cells - string special; -}; - - -/// translate a horizontal alignment (as stored in ColInfo and CellInfo) to LyX -inline char const * verbose_align(char c) -{ - switch (c) { - case 'c': - return "center"; - case 'r': - return "right"; - case 'l': - return "left"; - default: - return "none"; - } -} - - -/// translate a vertical alignment (as stored in ColInfo and CellInfo) to LyX -inline char const * verbose_valign(char c) -{ - // The default value for no special alignment is "top". - switch (c) { - case 'm': - return "middle"; - case 'b': - return "bottom"; - case 'p': - default: - return "top"; - } -} - - -// stripped down from tabluar.C. We use it currently only for bools and -// strings -string const write_attribute(string const & name, bool const & b) -{ - // we write only true attribute values so we remove a bit of the - // file format bloat for tabulars. - return b ? ' ' + name + "=\"true\"" : string(); -} - - -string const write_attribute(string const & name, string const & s) -{ - return s.empty() ? string() : ' ' + name + "=\"" + s + '"'; -} - - -/*! rather brutish way to code table structure in a string: - -\verbatim - \begin{tabular}{ccc} - 1 & 2 & 3\\ \hline - \multicolumn{2}{c}{4} & 5 // - 6 & 7 \\ - 8 \endhead - \end{tabular} -\endverbatim - - gets "translated" to: - -\verbatim - HLINE 1 TAB 2 TAB 3 HLINE HLINE LINE - \hline HLINE \multicolumn{2}{c}{4} TAB 5 HLINE HLINE LINE - HLINE 6 TAB 7 HLINE HLINE LINE - HLINE 8 HLINE \endhead HLINE LINE -\endverbatim - */ - -char const TAB = '\001'; -char const LINE = '\002'; -char const HLINE = '\004'; - - -/*! - * Move the information in leftlines, rightlines, align and valign to the - * special field. This is necessary if the special field is not empty, - * because LyX ignores leftlines, rightlines, align and valign in this case. - */ -void ci2special(ColInfo & ci) -{ - if (ci.width.empty() && ci.align == 'n') - // The alignment setting is already in special, since - // handle_colalign() never stores ci with these settings - // and ensures that leftlines == 0 and rightlines == 0 in - // this case. - return; - - if (!ci.width.empty()) { - switch (ci.align) { - case 'l': - ci.special += ">{\\raggedright}"; - break; - case 'r': - ci.special += ">{\\raggedleft}"; - break; - case 'c': - ci.special += ">{\\centering}"; - break; - } - if (ci.valign == 'n') - ci.special += 'p'; - else - ci.special += ci.valign; - ci.special += '{' + ci.width + '}'; - ci.width.erase(); - } else - ci.special += ci.align; - - for (int i = 0; i < ci.leftlines; ++i) - ci.special.insert(0, "|"); - for (int i = 0; i < ci.rightlines; ++i) - ci.special += '|'; - ci.leftlines = 0; - ci.rightlines = 0; - ci.align = 'n'; - ci.valign = 'n'; -} - - -/*! - * Handle column specifications for tabulars and multicolumns. - * The next token of the parser \p p must be an opening brace, and we read - * everything until the matching closing brace. - * The resulting column specifications are filled into \p colinfo. This is - * in an intermediate form. fix_colalign() makes it suitable for LyX output. - */ -void handle_colalign(Parser & p, vector & colinfo, - ColInfo const & start) -{ - if (p.get_token().cat() != catBegin) - cerr << "Wrong syntax for table column alignment.\n" - "Expected '{', got '" << p.curr_token().asInput() - << "'.\n"; - - ColInfo next = start; - for (Token t = p.get_token(); p.good() && t.cat() != catEnd; - t = p.get_token()) { -#ifdef FILEDEBUG - cerr << "t: " << t << " c: '" << t.character() << "'\n"; -#endif - - // We cannot handle comments here - if (t.cat() == catComment) { - if (t.cs().empty()) { - // "%\n" combination - p.skip_spaces(); - } else - cerr << "Ignoring comment: " << t.asInput(); - continue; - } - - switch (t.character()) { - case 'c': - case 'l': - case 'r': - // new column, horizontal aligned - next.align = t.character(); - if (!next.special.empty()) - ci2special(next); - colinfo.push_back(next); - next = ColInfo(); - break; - case 'p': - case 'b': - case 'm': - // new column, vertical aligned box - next.valign = t.character(); - next.width = p.verbatim_item(); - if (!next.special.empty()) - ci2special(next); - colinfo.push_back(next); - next = ColInfo(); - break; - case '|': - // vertical rule - if (colinfo.empty()) { - if (next.special.empty()) - ++next.leftlines; - else - next.special += '|'; - } else if (colinfo.back().special.empty()) - ++colinfo.back().rightlines; - else if (next.special.empty()) - ++next.leftlines; - else - colinfo.back().special += '|'; - break; - case '>': { - // text before the next column - string const s = trim(p.verbatim_item()); - if (next.special.empty() && - next.align == 'n') { - // Maybe this can be converted to a - // horizontal alignment setting for - // fixed width columns - if (s == "\\raggedleft") - next.align = 'r'; - else if (s == "\\raggedright") - next.align = 'l'; - else if (s == "\\centering") - next.align = 'c'; - else - next.special = ">{" + s + '}'; - } else - next.special += ">{" + s + '}'; - break; - } - case '<': { - // text after the last column - string const s = trim(p.verbatim_item()); - if (colinfo.empty()) - // This is not possible in LaTeX. - cerr << "Ignoring separator '<{" - << s << "}'." << endl; - else { - ColInfo & ci = colinfo.back(); - ci2special(ci); - ci.special += "<{" + s + '}'; - } - break; - } - case '*': { - // *{n}{arg} means 'n' columns of type 'arg' - string const num = p.verbatim_item(); - string const arg = p.verbatim_item(); - size_t const n = convert(num); - if (!arg.empty() && n > 0) { - string s("{"); - for (size_t i = 0; i < n; ++i) - s += arg; - s += '}'; - Parser p2(s); - handle_colalign(p2, colinfo, next); - next = ColInfo(); - } else { - cerr << "Ignoring column specification" - " '*{" << num << "}{" - << arg << "}'." << endl; - } - break; - } - case '@': - // text instead of the column spacing - case '!': - // text in addition to the column spacing - next.special += t.character(); - next.special += '{' + p.verbatim_item() + '}'; - break; - default: - // try user defined column types - if (special_columns.find(t.character()) != - special_columns.end()) { - ci2special(next); - next.special += t.character(); - int const nargs = - special_columns[t.character()]; - for (int i = 0; i < nargs; ++i) - next.special += '{' + - p.verbatim_item() + - '}'; - colinfo.push_back(next); - next = ColInfo(); - } else - cerr << "Ignoring column specification" - " '" << t << "'." << endl; - break; - } - } - - // Maybe we have some column separators that need to be added to the - // last column? - ci2special(next); - if (!next.special.empty()) { - ColInfo & ci = colinfo.back(); - ci2special(ci); - ci.special += next.special; - next.special.erase(); - } -} - - -/*! - * Move the left and right lines and alignment settings of the column \p ci - * to the special field if necessary. - */ -void fix_colalign(ColInfo & ci) -{ - if (ci.leftlines > 1 || ci.rightlines > 1) - ci2special(ci); -} - - -/*! - * LyX can't handle more than one vertical line at the left or right side - * of a column. - * This function moves the left and right lines and alignment settings of all - * columns in \p colinfo to the special field if necessary. - */ -void fix_colalign(vector & colinfo) -{ - // Try to move extra leftlines to the previous column. - // We do this only if both special fields are empty, otherwise we - // can't tell wether the result will be the same. - for (size_t col = 0; col < colinfo.size(); ++col) { - if (colinfo[col].leftlines > 1 && - colinfo[col].special.empty() && col > 0 && - colinfo[col - 1].rightlines == 0 && - colinfo[col - 1].special.empty()) { - ++colinfo[col - 1].rightlines; - --colinfo[col].leftlines; - } - } - // Try to move extra rightlines to the next column - for (size_t col = 0; col < colinfo.size(); ++col) { - if (colinfo[col].rightlines > 1 && - colinfo[col].special.empty() && - col < colinfo.size() - 1 && - colinfo[col + 1].leftlines == 0 && - colinfo[col + 1].special.empty()) { - ++colinfo[col + 1].leftlines; - --colinfo[col].rightlines; - } - } - // Move the lines and alignment settings to the special field if - // necessary - for (size_t col = 0; col < colinfo.size(); ++col) - fix_colalign(colinfo[col]); -} - - -/*! - * Parse hlines and similar stuff. - * \returns wether the token \p t was parsed - */ -bool parse_hlines(Parser & p, Token const & t, string & hlines, - bool is_long_tabular) -{ - BOOST_ASSERT(t.cat() == catEscape); - - if (t.cs() == "hline") - hlines += "\\hline"; - - else if (t.cs() == "cline") - hlines += "\\cline{" + p.verbatim_item() + '}'; - - else if (is_long_tabular && t.cs() == "newpage") - hlines += "\\newpage"; - - else - return false; - - return true; -} - - -/// Position in a row -enum RowPosition { - /// At the very beginning, before the first token - ROW_START, - /// After the first token and before any column token - IN_HLINES_START, - /// After the first column token. Comments and whitespace are only - /// treated as tokens in this position - IN_COLUMNS, - /// After the first non-column token at the end - IN_HLINES_END -}; - - -/*! - * Parse table structure. - * We parse tables in a two-pass process: This function extracts the table - * structure (rows, columns, hlines etc.), but does not change the cell - * content. The cell content is parsed in a second step in handle_tabular(). - */ -void parse_table(Parser & p, ostream & os, bool is_long_tabular, - RowPosition & pos, unsigned flags) -{ - // table structure commands such as \hline - string hlines; - - // comments that occur at places where we can't handle them - string comments; - - while (p.good()) { - Token const & t = p.get_token(); - -#ifdef FILEDEBUG - cerr << "t: " << t << " flags: " << flags << "\n"; -#endif - - // comments and whitespace in hlines - switch (pos) { - case ROW_START: - case IN_HLINES_START: - case IN_HLINES_END: - if (t.cat() == catComment) { - if (t.cs().empty()) - // line continuation - p.skip_spaces(); - else - // We can't handle comments here, - // store them for later use - comments += t.asInput(); - continue; - } else if (t.cat() == catSpace || - t.cat() == catNewline) { - // whitespace is irrelevant here, we - // need to recognize hline stuff - p.skip_spaces(); - continue; - } - break; - case IN_COLUMNS: - break; - } - - // We need to handle structure stuff first in order to - // determine wether we need to output a HLINE separator - // before the row or not. - if (t.cat() == catEscape) { - if (parse_hlines(p, t, hlines, is_long_tabular)) { - switch (pos) { - case ROW_START: - pos = IN_HLINES_START; - break; - case IN_COLUMNS: - pos = IN_HLINES_END; - break; - case IN_HLINES_START: - case IN_HLINES_END: - break; - } - continue; - } - - else if (t.cs() == "tabularnewline" || - t.cs() == "\\" || - t.cs() == "cr") { - if (t.cs() == "cr") - cerr << "Warning: Converting TeX " - "'\\cr' to LaTeX '\\\\'." - << endl; - // stuff before the line break - os << comments << HLINE << hlines << HLINE - << LINE; - //cerr << "hlines: " << hlines << endl; - hlines.erase(); - comments.erase(); - pos = ROW_START; - continue; - } - - else if (is_long_tabular && - (t.cs() == "endhead" || - t.cs() == "endfirsthead" || - t.cs() == "endfoot" || - t.cs() == "endlastfoot")) { - hlines += t.asInput(); - switch (pos) { - case IN_COLUMNS: - case IN_HLINES_END: - // these commands are implicit line - // breaks - os << comments << HLINE << hlines - << HLINE << LINE; - hlines.erase(); - comments.erase(); - pos = ROW_START; - break; - case ROW_START: - pos = IN_HLINES_START; - break; - case IN_HLINES_START: - break; - } - continue; - } - - } - - // We need a HLINE separator if we either have no hline - // stuff at all and are just starting a row or if we just - // got the first non-hline token. - switch (pos) { - case ROW_START: - // no hline tokens exist, first token at row start - case IN_HLINES_START: - // hline tokens exist, first non-hline token at row - // start - os << hlines << HLINE << comments; - hlines.erase(); - comments.erase(); - pos = IN_COLUMNS; - break; - case IN_HLINES_END: - // Oops, there is still cell content after hline - // stuff. This does not work in LaTeX, so we ignore - // the hlines. - cerr << "Ignoring '" << hlines << "' in a cell" - << endl; - os << comments; - hlines.erase(); - comments.erase(); - pos = IN_COLUMNS; - break; - case IN_COLUMNS: - break; - } - - // If we come here we have normal cell content - // - // cat codes - // - if (t.cat() == catMath) { - // we are inside some text mode thingy, so opening new math is allowed - Token const & n = p.get_token(); - if (n.cat() == catMath) { - // TeX's $$...$$ syntax for displayed math - os << "\\["; - // This does only work because parse_math outputs TeX - parse_math(p, os, FLAG_SIMPLE, MATH_MODE); - os << "\\]"; - p.get_token(); // skip the second '$' token - } else { - // simple $...$ stuff - p.putback(); - os << '$'; - // This does only work because parse_math outputs TeX - parse_math(p, os, FLAG_SIMPLE, MATH_MODE); - os << '$'; - } - } - - else if (t.cat() == catSpace || t.cat() == catNewline) - os << t.cs(); - - else if (t.cat() == catLetter || - t.cat() == catSuper || - t.cat() == catSub || - t.cat() == catOther || - t.cat() == catActive || - t.cat() == catParameter) - os << t.character(); - - else if (t.cat() == catBegin) { - os << '{'; - parse_table(p, os, is_long_tabular, pos, - FLAG_BRACE_LAST); - os << '}'; - } - - else if (t.cat() == catEnd) { - if (flags & FLAG_BRACE_LAST) - return; - cerr << "unexpected '}'\n"; - } - - else if (t.cat() == catAlign) { - os << TAB; - p.skip_spaces(); - } - - else if (t.cat() == catComment) - os << t.asInput(); - - else if (t.cs() == "(") { - os << "\\("; - // This does only work because parse_math outputs TeX - parse_math(p, os, FLAG_SIMPLE2, MATH_MODE); - os << "\\)"; - } - - else if (t.cs() == "[") { - os << "\\["; - // This does only work because parse_math outputs TeX - parse_math(p, os, FLAG_EQUATION, MATH_MODE); - os << "\\]"; - } - - else if (t.cs() == "begin") { - string const name = p.getArg('{', '}'); - active_environments.push_back(name); - os << "\\begin{" << name << '}'; - // treat the nested environment as a block, don't - // parse &, \\ etc, because they don't belong to our - // table if they appear. - os << p.verbatimEnvironment(name); - os << "\\end{" << name << '}'; - active_environments.pop_back(); - } - - else if (t.cs() == "end") { - if (flags & FLAG_END) { - // eat environment name - string const name = p.getArg('{', '}'); - if (name != active_environment()) - p.error("\\end{" + name + "} does not match \\begin{" - + active_environment() + "}"); - return; - } - p.error("found 'end' unexpectedly"); - } - - else - os << t.asInput(); - } - - // We can have comments if the last line is incomplete - os << comments; - - // We can have hline stuff if the last line is incomplete - if (!hlines.empty()) { - // this does not work in LaTeX, so we ignore it - cerr << "Ignoring '" << hlines << "' at end of tabular" - << endl; - } -} - - -void handle_hline_above(RowInfo & ri, vector & ci) -{ - ri.topline = true; - for (size_t col = 0; col < ci.size(); ++col) - ci[col].topline = true; -} - - -void handle_hline_below(RowInfo & ri, vector & ci) -{ - ri.bottomline = true; - for (size_t col = 0; col < ci.size(); ++col) - ci[col].bottomline = true; -} - - -} // anonymous namespace - - -void handle_tabular(Parser & p, ostream & os, bool is_long_tabular, - Context & context) -{ - string posopts = p.getOpt(); - if (!posopts.empty()) { - // FIXME: Convert this to ERT - if (is_long_tabular) - cerr << "horizontal longtable"; - else - cerr << "vertical tabular"; - cerr << " positioning '" << posopts << "' ignored\n"; - } - - vector colinfo; - - // handle column formatting - handle_colalign(p, colinfo, ColInfo()); - fix_colalign(colinfo); - - // first scan of cells - // use table mode to keep it minimal-invasive - // not exactly what's TeX doing... - vector lines; - ostringstream ss; - RowPosition rowpos = ROW_START; - parse_table(p, ss, is_long_tabular, rowpos, FLAG_END); - split(ss.str(), lines, LINE); - - vector< vector > cellinfo(lines.size()); - vector rowinfo(lines.size()); - - // split into rows - //cerr << "// split into rows\n"; - for (size_t row = 0; row < rowinfo.size(); ++row) { - - // init row - cellinfo[row].resize(colinfo.size()); - - // split row - vector dummy; - //cerr << "\n########### LINE: " << lines[row] << "########\n"; - split(lines[row], dummy, HLINE); - - // handle horizontal line fragments - // we do only expect this for a last line without '\\' - if (dummy.size() != 3) { - if ((dummy.size() != 1 && dummy.size() != 2) || - row != rowinfo.size() - 1) - cerr << "unexpected dummy size: " << dummy.size() - << " content: " << lines[row] << "\n"; - dummy.resize(3); - } - lines[row] = dummy[1]; - - //cerr << "line: " << row << " above 0: " << dummy[0] << "\n"; - //cerr << "line: " << row << " below 2: " << dummy[2] << "\n"; - //cerr << "line: " << row << " cells 1: " << dummy[1] << "\n"; - - for (int i = 0; i <= 2; i += 2) { - //cerr << " reading from line string '" << dummy[i] << "'\n"; - Parser p1(dummy[i]); - while (p1.good()) { - Token t = p1.get_token(); - //cerr << "read token: " << t << "\n"; - if (t.cs() == "hline") { - if (i == 0) { - if (rowinfo[row].topline) { - if (row > 0) // extra bottomline above - handle_hline_below(rowinfo[row - 1], cellinfo[row - 1]); - else - cerr << "dropping extra hline\n"; - //cerr << "below row: " << row-1 << endl; - } else { - handle_hline_above(rowinfo[row], cellinfo[row]); - //cerr << "above row: " << row << endl; - } - } else { - //cerr << "below row: " << row << endl; - handle_hline_below(rowinfo[row], cellinfo[row]); - } - } else if (t.cs() == "cline") { - string arg = p1.verbatim_item(); - //cerr << "read cline arg: '" << arg << "'\n"; - vector t; - split(arg, t, '-'); - t.resize(2); - size_t from = convert(t[0]); - if (from == 0) - cerr << "Could not parse " - "cline start column." - << endl; - else - // 1 based index -> 0 based - --from; - if (from >= colinfo.size()) { - cerr << "cline starts at non " - "existing column " - << (from + 1) << endl; - from = colinfo.size() - 1; - } - size_t to = convert(t[1]); - if (to == 0) - cerr << "Could not parse " - "cline end column." - << endl; - else - // 1 based index -> 0 based - --to; - if (to >= colinfo.size()) { - cerr << "cline ends at non " - "existing column " - << (to + 1) << endl; - to = colinfo.size() - 1; - } - for (size_t col = from; col <= to; ++col) { - //cerr << "row: " << row << " col: " << col << " i: " << i << endl; - if (i == 0) { - rowinfo[row].topline = true; - cellinfo[row][col].topline = true; - } else { - rowinfo[row].bottomline = true; - cellinfo[row][col].bottomline = true; - } - } - } else if (t.cs() == "endhead") { - if (i > 0) - rowinfo[row].type = LT_HEAD; - for (int r = row - 1; r >= 0; --r) { - if (rowinfo[r].type != LT_NORMAL) - break; - rowinfo[r].type = LT_HEAD; - } - } else if (t.cs() == "endfirsthead") { - if (i > 0) - rowinfo[row].type = LT_FIRSTHEAD; - for (int r = row - 1; r >= 0; --r) { - if (rowinfo[r].type != LT_NORMAL) - break; - rowinfo[r].type = LT_FIRSTHEAD; - } - } else if (t.cs() == "endfoot") { - if (i > 0) - rowinfo[row].type = LT_FOOT; - for (int r = row - 1; r >= 0; --r) { - if (rowinfo[r].type != LT_NORMAL) - break; - rowinfo[r].type = LT_FOOT; - } - } else if (t.cs() == "endlastfoot") { - if (i > 0) - rowinfo[row].type = LT_LASTFOOT; - for (int r = row - 1; r >= 0; --r) { - if (rowinfo[r].type != LT_NORMAL) - break; - rowinfo[r].type = LT_LASTFOOT; - } - } else if (t.cs() == "newpage") { - if (i == 0) { - if (row > 0) - rowinfo[row - 1].newpage = true; - else - // This does not work in LaTeX - cerr << "Ignoring " - "'\\newpage' " - "before rows." - << endl; - } else - rowinfo[row].newpage = true; - } else { - cerr << "unexpected line token: " << t << endl; - } - } - } - - // split into cells - vector cells; - split(lines[row], cells, TAB); - for (size_t col = 0, cell = 0; cell < cells.size(); - ++col, ++cell) { - //cerr << "cell content: '" << cells[cell] << "'\n"; - if (col >= colinfo.size()) { - // This does not work in LaTeX - cerr << "Ignoring extra cell '" - << cells[cell] << "'." << endl; - continue; - } - Parser p(cells[cell]); - p.skip_spaces(); - //cells[cell] << "'\n"; - if (p.next_token().cs() == "multicolumn") { - // how many cells? - p.get_token(); - size_t const ncells = - convert(p.verbatim_item()); - - // special cell properties alignment - vector t; - handle_colalign(p, t, ColInfo()); - ColInfo & ci = t.front(); - - // The logic of LyX for multicolumn vertical - // lines is too complicated to reproduce it - // here (see LyXTabular::TeXCellPreamble()). - // Therefore we simply put everything in the - // special field. - ci2special(ci); - - cellinfo[row][col].multi = CELL_BEGIN_OF_MULTICOLUMN; - cellinfo[row][col].align = ci.align; - cellinfo[row][col].special = ci.special; - cellinfo[row][col].leftlines = ci.leftlines; - cellinfo[row][col].rightlines = ci.rightlines; - ostringstream os; - parse_text_in_inset(p, os, FLAG_ITEM, false, context); - if (!cellinfo[row][col].content.empty()) { - // This may or may not work in LaTeX, - // but it does not work in LyX. - // FIXME: Handle it correctly! - cerr << "Moving cell content '" - << cells[cell] - << "' into a multicolumn cell. " - "This will probably not work." - << endl; - } - cellinfo[row][col].content += os.str(); - - // add dummy cells for multicol - for (size_t i = 0; i < ncells - 1 && col < colinfo.size(); ++i) { - ++col; - cellinfo[row][col].multi = CELL_PART_OF_MULTICOLUMN; - cellinfo[row][col].align = 'c'; - } - - } else { - cellinfo[row][col].leftlines = colinfo[col].leftlines; - cellinfo[row][col].rightlines = colinfo[col].rightlines; - cellinfo[row][col].align = colinfo[col].align; - ostringstream os; - parse_text_in_inset(p, os, FLAG_CELL, false, context); - cellinfo[row][col].content += os.str(); - } - } - - //cerr << "// handle almost empty last row what we have\n"; - // handle almost empty last row - if (row && lines[row].empty() && row + 1 == rowinfo.size()) { - //cerr << "remove empty last line\n"; - if (rowinfo[row].topline) - rowinfo[row - 1].bottomline = true; - for (size_t col = 0; col < colinfo.size(); ++col) - if (cellinfo[row][col].topline) - cellinfo[row - 1][col].bottomline = true; - rowinfo.pop_back(); - } - } - - // Now we have the table structure and content in rowinfo, colinfo - // and cellinfo. - // Unfortunately LyX has some limitations that we need to work around. - - // Convert cells with special content to multicolumn cells - // (LyX ignores the special field for non-multicolumn cells). - for (size_t row = 0; row < rowinfo.size(); ++row) { - for (size_t col = 0; col < cellinfo[row].size(); ++col) { - if (cellinfo[row][col].multi == CELL_NORMAL && - !cellinfo[row][col].special.empty()) - cellinfo[row][col].multi = CELL_BEGIN_OF_MULTICOLUMN; - } - } - - //cerr << "// output what we have\n"; - // output what we have - os << "\n\n"; - os << "\n"; - - //cerr << "// after header\n"; - for (size_t col = 0; col < colinfo.size(); ++col) { - os << " 0) - << write_attribute("rightline", colinfo[col].rightlines > 0) - << write_attribute("width", translate_len(colinfo[col].width)) - << write_attribute("special", colinfo[col].special) - << ">\n"; - } - //cerr << "// after cols\n"; - - for (size_t row = 0; row < rowinfo.size(); ++row) { - os << "\n"; - for (size_t col = 0; col < colinfo.size(); ++col) { - CellInfo const & cell = cellinfo[row][col]; - os << " 0) - << write_attribute("rightline", cell.rightlines > 0) - << write_attribute("rotate", cell.rotate); - //cerr << "\nrow: " << row << " col: " << col; - //if (cell.topline) - // cerr << " topline=\"true\""; - //if (cell.bottomline) - // cerr << " bottomline=\"true\""; - os << " usebox=\"none\"" - << write_attribute("width", translate_len(cell.width)); - if (cell.multi != CELL_NORMAL) - os << write_attribute("special", cell.special); - os << ">" - << "\n\\begin_inset Text\n" - << cell.content - << "\n\\end_inset\n" - << "\n"; - } - os << "\n"; - } - - os << "\n"; -} - - - - -// }]) - - -} // namespace lyx diff --git a/src/tex2lyx/table.cpp b/src/tex2lyx/table.cpp new file mode 100644 index 0000000000..35de97af6a --- /dev/null +++ b/src/tex2lyx/table.cpp @@ -0,0 +1,1124 @@ +/** + * \file table.cpp + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author André Pönitz + * \author Jean-Marc Lasgouttes + * \author Georg Baum + * + * Full author contact details are available in file CREDITS. + */ + +// {[( + +#include + +#include "tex2lyx.h" + +#include "support/convert.h" +#include "support/lstrings.h" + +#include +#include +#include +#include + + +namespace lyx { + +using std::cerr; +using std::endl; +using std::istringstream; +using std::ostream; +using std::ostringstream; +using std::string; +using std::vector; + + +// filled in preamble.cpp +std::map special_columns; + + +namespace { + +class ColInfo { +public: + ColInfo() : align('n'), valign('n'), rightlines(0), leftlines(0) {} + /// column alignment + char align; + /// vertical alignment + char valign; + /// column width + string width; + /// special column alignment + string special; + /// number of lines on the right + int rightlines; + /// number of lines on the left + int leftlines; +}; + + +/// row type for longtables +enum LTRowType +{ + /// normal row + LT_NORMAL, + /// part of head + LT_HEAD, + /// part of head on first page + LT_FIRSTHEAD, + /// part of foot + LT_FOOT, + /// part of foot on last page + LT_LASTFOOT +}; + + +class RowInfo { +public: + RowInfo() : topline(false), bottomline(false), type(LT_NORMAL), + newpage(false) {} + /// horizontal line above + bool topline; + /// horizontal line below + bool bottomline; + /// These are for longtabulars only + /// row type (head, foot, firsthead etc.) + LTRowType type; + /// row for a pagebreak + bool newpage; +}; + + +enum Multicolumn { + /// A normal cell + CELL_NORMAL = 0, + /// A multicolumn cell. The number of columns is 1 + number + /// of CELL_PART_OF_MULTICOLUMN cells that follow directly + CELL_BEGIN_OF_MULTICOLUMN, + /// This is a dummy cell (part of a multicolumn cell) + CELL_PART_OF_MULTICOLUMN +}; + + +class CellInfo { +public: + CellInfo() : multi(CELL_NORMAL), align('n'), valign('n'), + leftlines(0), rightlines(0), topline(false), + bottomline(false), rotate(false) {} + /// cell content + string content; + /// multicolumn flag + Multicolumn multi; + /// cell alignment + char align; + /// vertical cell alignment + char valign; + /// number of lines on the left + int leftlines; + /// number of lines on the right + int rightlines; + /// do we have a line above? + bool topline; + /// do we have a line below? + bool bottomline; + /// is the cell rotated? + bool rotate; + /// width for multicolumn cells + string width; + /// special formatting for multicolumn cells + string special; +}; + + +/// translate a horizontal alignment (as stored in ColInfo and CellInfo) to LyX +inline char const * verbose_align(char c) +{ + switch (c) { + case 'c': + return "center"; + case 'r': + return "right"; + case 'l': + return "left"; + default: + return "none"; + } +} + + +/// translate a vertical alignment (as stored in ColInfo and CellInfo) to LyX +inline char const * verbose_valign(char c) +{ + // The default value for no special alignment is "top". + switch (c) { + case 'm': + return "middle"; + case 'b': + return "bottom"; + case 'p': + default: + return "top"; + } +} + + +// stripped down from tabluar.C. We use it currently only for bools and +// strings +string const write_attribute(string const & name, bool const & b) +{ + // we write only true attribute values so we remove a bit of the + // file format bloat for tabulars. + return b ? ' ' + name + "=\"true\"" : string(); +} + + +string const write_attribute(string const & name, string const & s) +{ + return s.empty() ? string() : ' ' + name + "=\"" + s + '"'; +} + + +/*! rather brutish way to code table structure in a string: + +\verbatim + \begin{tabular}{ccc} + 1 & 2 & 3\\ \hline + \multicolumn{2}{c}{4} & 5 // + 6 & 7 \\ + 8 \endhead + \end{tabular} +\endverbatim + + gets "translated" to: + +\verbatim + HLINE 1 TAB 2 TAB 3 HLINE HLINE LINE + \hline HLINE \multicolumn{2}{c}{4} TAB 5 HLINE HLINE LINE + HLINE 6 TAB 7 HLINE HLINE LINE + HLINE 8 HLINE \endhead HLINE LINE +\endverbatim + */ + +char const TAB = '\001'; +char const LINE = '\002'; +char const HLINE = '\004'; + + +/*! + * Move the information in leftlines, rightlines, align and valign to the + * special field. This is necessary if the special field is not empty, + * because LyX ignores leftlines, rightlines, align and valign in this case. + */ +void ci2special(ColInfo & ci) +{ + if (ci.width.empty() && ci.align == 'n') + // The alignment setting is already in special, since + // handle_colalign() never stores ci with these settings + // and ensures that leftlines == 0 and rightlines == 0 in + // this case. + return; + + if (!ci.width.empty()) { + switch (ci.align) { + case 'l': + ci.special += ">{\\raggedright}"; + break; + case 'r': + ci.special += ">{\\raggedleft}"; + break; + case 'c': + ci.special += ">{\\centering}"; + break; + } + if (ci.valign == 'n') + ci.special += 'p'; + else + ci.special += ci.valign; + ci.special += '{' + ci.width + '}'; + ci.width.erase(); + } else + ci.special += ci.align; + + for (int i = 0; i < ci.leftlines; ++i) + ci.special.insert(0, "|"); + for (int i = 0; i < ci.rightlines; ++i) + ci.special += '|'; + ci.leftlines = 0; + ci.rightlines = 0; + ci.align = 'n'; + ci.valign = 'n'; +} + + +/*! + * Handle column specifications for tabulars and multicolumns. + * The next token of the parser \p p must be an opening brace, and we read + * everything until the matching closing brace. + * The resulting column specifications are filled into \p colinfo. This is + * in an intermediate form. fix_colalign() makes it suitable for LyX output. + */ +void handle_colalign(Parser & p, vector & colinfo, + ColInfo const & start) +{ + if (p.get_token().cat() != catBegin) + cerr << "Wrong syntax for table column alignment.\n" + "Expected '{', got '" << p.curr_token().asInput() + << "'.\n"; + + ColInfo next = start; + for (Token t = p.get_token(); p.good() && t.cat() != catEnd; + t = p.get_token()) { +#ifdef FILEDEBUG + cerr << "t: " << t << " c: '" << t.character() << "'\n"; +#endif + + // We cannot handle comments here + if (t.cat() == catComment) { + if (t.cs().empty()) { + // "%\n" combination + p.skip_spaces(); + } else + cerr << "Ignoring comment: " << t.asInput(); + continue; + } + + switch (t.character()) { + case 'c': + case 'l': + case 'r': + // new column, horizontal aligned + next.align = t.character(); + if (!next.special.empty()) + ci2special(next); + colinfo.push_back(next); + next = ColInfo(); + break; + case 'p': + case 'b': + case 'm': + // new column, vertical aligned box + next.valign = t.character(); + next.width = p.verbatim_item(); + if (!next.special.empty()) + ci2special(next); + colinfo.push_back(next); + next = ColInfo(); + break; + case '|': + // vertical rule + if (colinfo.empty()) { + if (next.special.empty()) + ++next.leftlines; + else + next.special += '|'; + } else if (colinfo.back().special.empty()) + ++colinfo.back().rightlines; + else if (next.special.empty()) + ++next.leftlines; + else + colinfo.back().special += '|'; + break; + case '>': { + // text before the next column + string const s = trim(p.verbatim_item()); + if (next.special.empty() && + next.align == 'n') { + // Maybe this can be converted to a + // horizontal alignment setting for + // fixed width columns + if (s == "\\raggedleft") + next.align = 'r'; + else if (s == "\\raggedright") + next.align = 'l'; + else if (s == "\\centering") + next.align = 'c'; + else + next.special = ">{" + s + '}'; + } else + next.special += ">{" + s + '}'; + break; + } + case '<': { + // text after the last column + string const s = trim(p.verbatim_item()); + if (colinfo.empty()) + // This is not possible in LaTeX. + cerr << "Ignoring separator '<{" + << s << "}'." << endl; + else { + ColInfo & ci = colinfo.back(); + ci2special(ci); + ci.special += "<{" + s + '}'; + } + break; + } + case '*': { + // *{n}{arg} means 'n' columns of type 'arg' + string const num = p.verbatim_item(); + string const arg = p.verbatim_item(); + size_t const n = convert(num); + if (!arg.empty() && n > 0) { + string s("{"); + for (size_t i = 0; i < n; ++i) + s += arg; + s += '}'; + Parser p2(s); + handle_colalign(p2, colinfo, next); + next = ColInfo(); + } else { + cerr << "Ignoring column specification" + " '*{" << num << "}{" + << arg << "}'." << endl; + } + break; + } + case '@': + // text instead of the column spacing + case '!': + // text in addition to the column spacing + next.special += t.character(); + next.special += '{' + p.verbatim_item() + '}'; + break; + default: + // try user defined column types + if (special_columns.find(t.character()) != + special_columns.end()) { + ci2special(next); + next.special += t.character(); + int const nargs = + special_columns[t.character()]; + for (int i = 0; i < nargs; ++i) + next.special += '{' + + p.verbatim_item() + + '}'; + colinfo.push_back(next); + next = ColInfo(); + } else + cerr << "Ignoring column specification" + " '" << t << "'." << endl; + break; + } + } + + // Maybe we have some column separators that need to be added to the + // last column? + ci2special(next); + if (!next.special.empty()) { + ColInfo & ci = colinfo.back(); + ci2special(ci); + ci.special += next.special; + next.special.erase(); + } +} + + +/*! + * Move the left and right lines and alignment settings of the column \p ci + * to the special field if necessary. + */ +void fix_colalign(ColInfo & ci) +{ + if (ci.leftlines > 1 || ci.rightlines > 1) + ci2special(ci); +} + + +/*! + * LyX can't handle more than one vertical line at the left or right side + * of a column. + * This function moves the left and right lines and alignment settings of all + * columns in \p colinfo to the special field if necessary. + */ +void fix_colalign(vector & colinfo) +{ + // Try to move extra leftlines to the previous column. + // We do this only if both special fields are empty, otherwise we + // can't tell wether the result will be the same. + for (size_t col = 0; col < colinfo.size(); ++col) { + if (colinfo[col].leftlines > 1 && + colinfo[col].special.empty() && col > 0 && + colinfo[col - 1].rightlines == 0 && + colinfo[col - 1].special.empty()) { + ++colinfo[col - 1].rightlines; + --colinfo[col].leftlines; + } + } + // Try to move extra rightlines to the next column + for (size_t col = 0; col < colinfo.size(); ++col) { + if (colinfo[col].rightlines > 1 && + colinfo[col].special.empty() && + col < colinfo.size() - 1 && + colinfo[col + 1].leftlines == 0 && + colinfo[col + 1].special.empty()) { + ++colinfo[col + 1].leftlines; + --colinfo[col].rightlines; + } + } + // Move the lines and alignment settings to the special field if + // necessary + for (size_t col = 0; col < colinfo.size(); ++col) + fix_colalign(colinfo[col]); +} + + +/*! + * Parse hlines and similar stuff. + * \returns wether the token \p t was parsed + */ +bool parse_hlines(Parser & p, Token const & t, string & hlines, + bool is_long_tabular) +{ + BOOST_ASSERT(t.cat() == catEscape); + + if (t.cs() == "hline") + hlines += "\\hline"; + + else if (t.cs() == "cline") + hlines += "\\cline{" + p.verbatim_item() + '}'; + + else if (is_long_tabular && t.cs() == "newpage") + hlines += "\\newpage"; + + else + return false; + + return true; +} + + +/// Position in a row +enum RowPosition { + /// At the very beginning, before the first token + ROW_START, + /// After the first token and before any column token + IN_HLINES_START, + /// After the first column token. Comments and whitespace are only + /// treated as tokens in this position + IN_COLUMNS, + /// After the first non-column token at the end + IN_HLINES_END +}; + + +/*! + * Parse table structure. + * We parse tables in a two-pass process: This function extracts the table + * structure (rows, columns, hlines etc.), but does not change the cell + * content. The cell content is parsed in a second step in handle_tabular(). + */ +void parse_table(Parser & p, ostream & os, bool is_long_tabular, + RowPosition & pos, unsigned flags) +{ + // table structure commands such as \hline + string hlines; + + // comments that occur at places where we can't handle them + string comments; + + while (p.good()) { + Token const & t = p.get_token(); + +#ifdef FILEDEBUG + cerr << "t: " << t << " flags: " << flags << "\n"; +#endif + + // comments and whitespace in hlines + switch (pos) { + case ROW_START: + case IN_HLINES_START: + case IN_HLINES_END: + if (t.cat() == catComment) { + if (t.cs().empty()) + // line continuation + p.skip_spaces(); + else + // We can't handle comments here, + // store them for later use + comments += t.asInput(); + continue; + } else if (t.cat() == catSpace || + t.cat() == catNewline) { + // whitespace is irrelevant here, we + // need to recognize hline stuff + p.skip_spaces(); + continue; + } + break; + case IN_COLUMNS: + break; + } + + // We need to handle structure stuff first in order to + // determine wether we need to output a HLINE separator + // before the row or not. + if (t.cat() == catEscape) { + if (parse_hlines(p, t, hlines, is_long_tabular)) { + switch (pos) { + case ROW_START: + pos = IN_HLINES_START; + break; + case IN_COLUMNS: + pos = IN_HLINES_END; + break; + case IN_HLINES_START: + case IN_HLINES_END: + break; + } + continue; + } + + else if (t.cs() == "tabularnewline" || + t.cs() == "\\" || + t.cs() == "cr") { + if (t.cs() == "cr") + cerr << "Warning: Converting TeX " + "'\\cr' to LaTeX '\\\\'." + << endl; + // stuff before the line break + os << comments << HLINE << hlines << HLINE + << LINE; + //cerr << "hlines: " << hlines << endl; + hlines.erase(); + comments.erase(); + pos = ROW_START; + continue; + } + + else if (is_long_tabular && + (t.cs() == "endhead" || + t.cs() == "endfirsthead" || + t.cs() == "endfoot" || + t.cs() == "endlastfoot")) { + hlines += t.asInput(); + switch (pos) { + case IN_COLUMNS: + case IN_HLINES_END: + // these commands are implicit line + // breaks + os << comments << HLINE << hlines + << HLINE << LINE; + hlines.erase(); + comments.erase(); + pos = ROW_START; + break; + case ROW_START: + pos = IN_HLINES_START; + break; + case IN_HLINES_START: + break; + } + continue; + } + + } + + // We need a HLINE separator if we either have no hline + // stuff at all and are just starting a row or if we just + // got the first non-hline token. + switch (pos) { + case ROW_START: + // no hline tokens exist, first token at row start + case IN_HLINES_START: + // hline tokens exist, first non-hline token at row + // start + os << hlines << HLINE << comments; + hlines.erase(); + comments.erase(); + pos = IN_COLUMNS; + break; + case IN_HLINES_END: + // Oops, there is still cell content after hline + // stuff. This does not work in LaTeX, so we ignore + // the hlines. + cerr << "Ignoring '" << hlines << "' in a cell" + << endl; + os << comments; + hlines.erase(); + comments.erase(); + pos = IN_COLUMNS; + break; + case IN_COLUMNS: + break; + } + + // If we come here we have normal cell content + // + // cat codes + // + if (t.cat() == catMath) { + // we are inside some text mode thingy, so opening new math is allowed + Token const & n = p.get_token(); + if (n.cat() == catMath) { + // TeX's $$...$$ syntax for displayed math + os << "\\["; + // This does only work because parse_math outputs TeX + parse_math(p, os, FLAG_SIMPLE, MATH_MODE); + os << "\\]"; + p.get_token(); // skip the second '$' token + } else { + // simple $...$ stuff + p.putback(); + os << '$'; + // This does only work because parse_math outputs TeX + parse_math(p, os, FLAG_SIMPLE, MATH_MODE); + os << '$'; + } + } + + else if (t.cat() == catSpace || t.cat() == catNewline) + os << t.cs(); + + else if (t.cat() == catLetter || + t.cat() == catSuper || + t.cat() == catSub || + t.cat() == catOther || + t.cat() == catActive || + t.cat() == catParameter) + os << t.character(); + + else if (t.cat() == catBegin) { + os << '{'; + parse_table(p, os, is_long_tabular, pos, + FLAG_BRACE_LAST); + os << '}'; + } + + else if (t.cat() == catEnd) { + if (flags & FLAG_BRACE_LAST) + return; + cerr << "unexpected '}'\n"; + } + + else if (t.cat() == catAlign) { + os << TAB; + p.skip_spaces(); + } + + else if (t.cat() == catComment) + os << t.asInput(); + + else if (t.cs() == "(") { + os << "\\("; + // This does only work because parse_math outputs TeX + parse_math(p, os, FLAG_SIMPLE2, MATH_MODE); + os << "\\)"; + } + + else if (t.cs() == "[") { + os << "\\["; + // This does only work because parse_math outputs TeX + parse_math(p, os, FLAG_EQUATION, MATH_MODE); + os << "\\]"; + } + + else if (t.cs() == "begin") { + string const name = p.getArg('{', '}'); + active_environments.push_back(name); + os << "\\begin{" << name << '}'; + // treat the nested environment as a block, don't + // parse &, \\ etc, because they don't belong to our + // table if they appear. + os << p.verbatimEnvironment(name); + os << "\\end{" << name << '}'; + active_environments.pop_back(); + } + + else if (t.cs() == "end") { + if (flags & FLAG_END) { + // eat environment name + string const name = p.getArg('{', '}'); + if (name != active_environment()) + p.error("\\end{" + name + "} does not match \\begin{" + + active_environment() + "}"); + return; + } + p.error("found 'end' unexpectedly"); + } + + else + os << t.asInput(); + } + + // We can have comments if the last line is incomplete + os << comments; + + // We can have hline stuff if the last line is incomplete + if (!hlines.empty()) { + // this does not work in LaTeX, so we ignore it + cerr << "Ignoring '" << hlines << "' at end of tabular" + << endl; + } +} + + +void handle_hline_above(RowInfo & ri, vector & ci) +{ + ri.topline = true; + for (size_t col = 0; col < ci.size(); ++col) + ci[col].topline = true; +} + + +void handle_hline_below(RowInfo & ri, vector & ci) +{ + ri.bottomline = true; + for (size_t col = 0; col < ci.size(); ++col) + ci[col].bottomline = true; +} + + +} // anonymous namespace + + +void handle_tabular(Parser & p, ostream & os, bool is_long_tabular, + Context & context) +{ + string posopts = p.getOpt(); + if (!posopts.empty()) { + // FIXME: Convert this to ERT + if (is_long_tabular) + cerr << "horizontal longtable"; + else + cerr << "vertical tabular"; + cerr << " positioning '" << posopts << "' ignored\n"; + } + + vector colinfo; + + // handle column formatting + handle_colalign(p, colinfo, ColInfo()); + fix_colalign(colinfo); + + // first scan of cells + // use table mode to keep it minimal-invasive + // not exactly what's TeX doing... + vector lines; + ostringstream ss; + RowPosition rowpos = ROW_START; + parse_table(p, ss, is_long_tabular, rowpos, FLAG_END); + split(ss.str(), lines, LINE); + + vector< vector > cellinfo(lines.size()); + vector rowinfo(lines.size()); + + // split into rows + //cerr << "// split into rows\n"; + for (size_t row = 0; row < rowinfo.size(); ++row) { + + // init row + cellinfo[row].resize(colinfo.size()); + + // split row + vector dummy; + //cerr << "\n########### LINE: " << lines[row] << "########\n"; + split(lines[row], dummy, HLINE); + + // handle horizontal line fragments + // we do only expect this for a last line without '\\' + if (dummy.size() != 3) { + if ((dummy.size() != 1 && dummy.size() != 2) || + row != rowinfo.size() - 1) + cerr << "unexpected dummy size: " << dummy.size() + << " content: " << lines[row] << "\n"; + dummy.resize(3); + } + lines[row] = dummy[1]; + + //cerr << "line: " << row << " above 0: " << dummy[0] << "\n"; + //cerr << "line: " << row << " below 2: " << dummy[2] << "\n"; + //cerr << "line: " << row << " cells 1: " << dummy[1] << "\n"; + + for (int i = 0; i <= 2; i += 2) { + //cerr << " reading from line string '" << dummy[i] << "'\n"; + Parser p1(dummy[i]); + while (p1.good()) { + Token t = p1.get_token(); + //cerr << "read token: " << t << "\n"; + if (t.cs() == "hline") { + if (i == 0) { + if (rowinfo[row].topline) { + if (row > 0) // extra bottomline above + handle_hline_below(rowinfo[row - 1], cellinfo[row - 1]); + else + cerr << "dropping extra hline\n"; + //cerr << "below row: " << row-1 << endl; + } else { + handle_hline_above(rowinfo[row], cellinfo[row]); + //cerr << "above row: " << row << endl; + } + } else { + //cerr << "below row: " << row << endl; + handle_hline_below(rowinfo[row], cellinfo[row]); + } + } else if (t.cs() == "cline") { + string arg = p1.verbatim_item(); + //cerr << "read cline arg: '" << arg << "'\n"; + vector t; + split(arg, t, '-'); + t.resize(2); + size_t from = convert(t[0]); + if (from == 0) + cerr << "Could not parse " + "cline start column." + << endl; + else + // 1 based index -> 0 based + --from; + if (from >= colinfo.size()) { + cerr << "cline starts at non " + "existing column " + << (from + 1) << endl; + from = colinfo.size() - 1; + } + size_t to = convert(t[1]); + if (to == 0) + cerr << "Could not parse " + "cline end column." + << endl; + else + // 1 based index -> 0 based + --to; + if (to >= colinfo.size()) { + cerr << "cline ends at non " + "existing column " + << (to + 1) << endl; + to = colinfo.size() - 1; + } + for (size_t col = from; col <= to; ++col) { + //cerr << "row: " << row << " col: " << col << " i: " << i << endl; + if (i == 0) { + rowinfo[row].topline = true; + cellinfo[row][col].topline = true; + } else { + rowinfo[row].bottomline = true; + cellinfo[row][col].bottomline = true; + } + } + } else if (t.cs() == "endhead") { + if (i > 0) + rowinfo[row].type = LT_HEAD; + for (int r = row - 1; r >= 0; --r) { + if (rowinfo[r].type != LT_NORMAL) + break; + rowinfo[r].type = LT_HEAD; + } + } else if (t.cs() == "endfirsthead") { + if (i > 0) + rowinfo[row].type = LT_FIRSTHEAD; + for (int r = row - 1; r >= 0; --r) { + if (rowinfo[r].type != LT_NORMAL) + break; + rowinfo[r].type = LT_FIRSTHEAD; + } + } else if (t.cs() == "endfoot") { + if (i > 0) + rowinfo[row].type = LT_FOOT; + for (int r = row - 1; r >= 0; --r) { + if (rowinfo[r].type != LT_NORMAL) + break; + rowinfo[r].type = LT_FOOT; + } + } else if (t.cs() == "endlastfoot") { + if (i > 0) + rowinfo[row].type = LT_LASTFOOT; + for (int r = row - 1; r >= 0; --r) { + if (rowinfo[r].type != LT_NORMAL) + break; + rowinfo[r].type = LT_LASTFOOT; + } + } else if (t.cs() == "newpage") { + if (i == 0) { + if (row > 0) + rowinfo[row - 1].newpage = true; + else + // This does not work in LaTeX + cerr << "Ignoring " + "'\\newpage' " + "before rows." + << endl; + } else + rowinfo[row].newpage = true; + } else { + cerr << "unexpected line token: " << t << endl; + } + } + } + + // split into cells + vector cells; + split(lines[row], cells, TAB); + for (size_t col = 0, cell = 0; cell < cells.size(); + ++col, ++cell) { + //cerr << "cell content: '" << cells[cell] << "'\n"; + if (col >= colinfo.size()) { + // This does not work in LaTeX + cerr << "Ignoring extra cell '" + << cells[cell] << "'." << endl; + continue; + } + Parser p(cells[cell]); + p.skip_spaces(); + //cells[cell] << "'\n"; + if (p.next_token().cs() == "multicolumn") { + // how many cells? + p.get_token(); + size_t const ncells = + convert(p.verbatim_item()); + + // special cell properties alignment + vector t; + handle_colalign(p, t, ColInfo()); + ColInfo & ci = t.front(); + + // The logic of LyX for multicolumn vertical + // lines is too complicated to reproduce it + // here (see LyXTabular::TeXCellPreamble()). + // Therefore we simply put everything in the + // special field. + ci2special(ci); + + cellinfo[row][col].multi = CELL_BEGIN_OF_MULTICOLUMN; + cellinfo[row][col].align = ci.align; + cellinfo[row][col].special = ci.special; + cellinfo[row][col].leftlines = ci.leftlines; + cellinfo[row][col].rightlines = ci.rightlines; + ostringstream os; + parse_text_in_inset(p, os, FLAG_ITEM, false, context); + if (!cellinfo[row][col].content.empty()) { + // This may or may not work in LaTeX, + // but it does not work in LyX. + // FIXME: Handle it correctly! + cerr << "Moving cell content '" + << cells[cell] + << "' into a multicolumn cell. " + "This will probably not work." + << endl; + } + cellinfo[row][col].content += os.str(); + + // add dummy cells for multicol + for (size_t i = 0; i < ncells - 1 && col < colinfo.size(); ++i) { + ++col; + cellinfo[row][col].multi = CELL_PART_OF_MULTICOLUMN; + cellinfo[row][col].align = 'c'; + } + + } else { + cellinfo[row][col].leftlines = colinfo[col].leftlines; + cellinfo[row][col].rightlines = colinfo[col].rightlines; + cellinfo[row][col].align = colinfo[col].align; + ostringstream os; + parse_text_in_inset(p, os, FLAG_CELL, false, context); + cellinfo[row][col].content += os.str(); + } + } + + //cerr << "// handle almost empty last row what we have\n"; + // handle almost empty last row + if (row && lines[row].empty() && row + 1 == rowinfo.size()) { + //cerr << "remove empty last line\n"; + if (rowinfo[row].topline) + rowinfo[row - 1].bottomline = true; + for (size_t col = 0; col < colinfo.size(); ++col) + if (cellinfo[row][col].topline) + cellinfo[row - 1][col].bottomline = true; + rowinfo.pop_back(); + } + } + + // Now we have the table structure and content in rowinfo, colinfo + // and cellinfo. + // Unfortunately LyX has some limitations that we need to work around. + + // Convert cells with special content to multicolumn cells + // (LyX ignores the special field for non-multicolumn cells). + for (size_t row = 0; row < rowinfo.size(); ++row) { + for (size_t col = 0; col < cellinfo[row].size(); ++col) { + if (cellinfo[row][col].multi == CELL_NORMAL && + !cellinfo[row][col].special.empty()) + cellinfo[row][col].multi = CELL_BEGIN_OF_MULTICOLUMN; + } + } + + //cerr << "// output what we have\n"; + // output what we have + os << "\n\n"; + os << "\n"; + + //cerr << "// after header\n"; + for (size_t col = 0; col < colinfo.size(); ++col) { + os << " 0) + << write_attribute("rightline", colinfo[col].rightlines > 0) + << write_attribute("width", translate_len(colinfo[col].width)) + << write_attribute("special", colinfo[col].special) + << ">\n"; + } + //cerr << "// after cols\n"; + + for (size_t row = 0; row < rowinfo.size(); ++row) { + os << "\n"; + for (size_t col = 0; col < colinfo.size(); ++col) { + CellInfo const & cell = cellinfo[row][col]; + os << " 0) + << write_attribute("rightline", cell.rightlines > 0) + << write_attribute("rotate", cell.rotate); + //cerr << "\nrow: " << row << " col: " << col; + //if (cell.topline) + // cerr << " topline=\"true\""; + //if (cell.bottomline) + // cerr << " bottomline=\"true\""; + os << " usebox=\"none\"" + << write_attribute("width", translate_len(cell.width)); + if (cell.multi != CELL_NORMAL) + os << write_attribute("special", cell.special); + os << ">" + << "\n\\begin_inset Text\n" + << cell.content + << "\n\\end_inset\n" + << "\n"; + } + os << "\n"; + } + + os << "\n"; +} + + + + +// }]) + + +} // namespace lyx diff --git a/src/tex2lyx/tex2lyx.C b/src/tex2lyx/tex2lyx.C deleted file mode 100644 index 81afdaec6f..0000000000 --- a/src/tex2lyx/tex2lyx.C +++ /dev/null @@ -1,561 +0,0 @@ -/** - * \file tex2lyx.cpp - * This file is part of LyX, the document processor. - * Licence details can be found in the file COPYING. - * - * \author André Pönitz - * - * Full author contact details are available in file CREDITS. - */ - -// {[( - -#include - -#include "tex2lyx.h" -#include "Context.h" - -#include "debug.h" -#include "LyXTextClass.h" - -#include "support/convert.h" -#include "support/filetools.h" -#include "support/fs_extras.h" -#include "support/lstrings.h" -#include "support/lyxlib.h" -#include "support/ExceptionMessage.h" -#include "support/os.h" -#include "support/package.h" -#include "support/unicode.h" - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - - -namespace lyx { - -using std::endl; -using std::cout; -using std::cerr; -using std::getline; - -using std::ifstream; -using std::ofstream; -using std::istringstream; -using std::ostringstream; -using std::stringstream; -using std::string; -using std::vector; -using std::map; - -using support::changeExtension; -using support::FileName; -using support::isStrUnsignedInt; -using support::ltrim; -using support::makeAbsPath; -using support::onlyPath; -using support::rtrim; -using support::isFileReadable; - -namespace fs = boost::filesystem; - - -// Hacks to allow the thing to link in the lyxlayout stuff -LyXErr lyxerr(std::cerr.rdbuf()); - - -string const trim(string const & a, char const * p) -{ - // BOOST_ASSERT(p); - - if (a.empty() || !*p) - return a; - - string::size_type r = a.find_last_not_of(p); - string::size_type l = a.find_first_not_of(p); - - // Is this the minimal test? (lgb) - if (r == string::npos && l == string::npos) - return string(); - - return a.substr(l, r - l + 1); -} - - -void split(string const & s, vector & result, char delim) -{ - //cerr << "split 1: '" << s << "'\n"; - istringstream is(s); - string t; - while (getline(is, t, delim)) - result.push_back(t); - //cerr << "split 2\n"; -} - - -string join(vector const & input, char const * delim) -{ - ostringstream os; - for (size_t i = 0; i < input.size(); ++i) { - if (i) - os << delim; - os << input[i]; - } - return os.str(); -} - - -char const * const * is_known(string const & str, char const * const * what) -{ - for ( ; *what; ++what) - if (str == *what) - return what; - return 0; -} - - - -// current stack of nested environments -vector active_environments; - - -string active_environment() -{ - return active_environments.empty() ? string() : active_environments.back(); -} - - -CommandMap known_commands; -CommandMap known_environments; -CommandMap known_math_environments; - - -void add_known_command(string const & command, string const & o1, - bool o2) -{ - // We have to handle the following cases: - // definition o1 o2 invocation result - // \newcommand{\foo}{bar} "" false \foo bar - // \newcommand{\foo}[1]{bar #1} "[1]" false \foo{x} bar x - // \newcommand{\foo}[1][]{bar #1} "[1]" true \foo bar - // \newcommand{\foo}[1][]{bar #1} "[1]" true \foo[x] bar x - // \newcommand{\foo}[1][x]{bar #1} "[1]" true \foo[x] bar x - unsigned int nargs = 0; - vector arguments; - string const opt1 = rtrim(ltrim(o1, "["), "]"); - if (isStrUnsignedInt(opt1)) { - // The command has arguments - nargs = convert(opt1); - if (nargs > 0 && o2) { - // The first argument is optional - arguments.push_back(optional); - --nargs; - } - } - for (unsigned int i = 0; i < nargs; ++i) - arguments.push_back(required); - known_commands[command] = arguments; -} - - -bool noweb_mode = false; - - -namespace { - - -/*! - * Read one command definition from the syntax file - */ -void read_command(Parser & p, string command, CommandMap & commands) { - if (p.next_token().asInput() == "*") { - p.get_token(); - command += '*'; - } - vector arguments; - while (p.next_token().cat() == catBegin || - p.next_token().asInput() == "[") { - if (p.next_token().cat() == catBegin) { - string const arg = p.getArg('{', '}'); - if (arg == "translate") - arguments.push_back(required); - else - arguments.push_back(verbatim); - } else { - p.getArg('[', ']'); - arguments.push_back(optional); - } - } - commands[command] = arguments; -} - - -/*! - * Read a class of environments from the syntax file - */ -void read_environment(Parser & p, string const & begin, - CommandMap & environments) -{ - string environment; - while (p.good()) { - Token const & t = p.get_token(); - if (t.cat() == catLetter) - environment += t.asInput(); - else if (!environment.empty()) { - p.putback(); - read_command(p, environment, environments); - environment.erase(); - } - if (t.cat() == catEscape && t.asInput() == "\\end") { - string const end = p.getArg('{', '}'); - if (end == begin) - return; - } - } -} - - -/*! - * Read a list of TeX commands from a reLyX compatible syntax file. - * Since this list is used after all commands that have a LyX counterpart - * are handled, it does not matter that the "syntax.default" file - * has almost all of them listed. For the same reason the reLyX-specific - * reLyXre environment is ignored. - */ -void read_syntaxfile(FileName const & file_name) -{ - ifstream is(file_name.toFilesystemEncoding().c_str()); - if (!is.good()) { - cerr << "Could not open syntax file \"" << file_name - << "\" for reading." << endl; - exit(2); - } - // We can use our TeX parser, since the syntax of the layout file is - // modeled after TeX. - // Unknown tokens are just silently ignored, this helps us to skip some - // reLyX specific things. - Parser p(is); - while (p.good()) { - Token const & t = p.get_token(); - if (t.cat() == catEscape) { - string const command = t.asInput(); - if (command == "\\begin") { - string const name = p.getArg('{', '}'); - if (name == "environments" || name == "reLyXre") - // We understand "reLyXre", but it is - // not as powerful as "environments". - read_environment(p, name, - known_environments); - else if (name == "mathenvironments") - read_environment(p, name, - known_math_environments); - } else { - read_command(p, command, known_commands); - } - } - } -} - - -string documentclass; -string syntaxfile; -bool overwrite_files = false; - - -/// return the number of arguments consumed -typedef boost::function cmd_helper; - - -int parse_help(string const &, string const &) -{ - cerr << "Usage: tex2lyx [ command line switches ] []\n" - "Command line switches (case sensitive):\n" - "\t-help summarize tex2lyx usage\n" - "\t-f Force creation of .lyx files even if they exist already\n" - "\t-userdir dir try to set user directory to dir\n" - "\t-sysdir dir try to set system directory to dir\n" - "\t-c textclass declare the textclass\n" - "\t-n translate a noweb (aka literate programming) file.\n" - "\t-s syntaxfile read additional syntax file" << endl; - exit(0); -} - - -int parse_class(string const & arg, string const &) -{ - if (arg.empty()) { - cerr << "Missing textclass string after -c switch" << endl; - exit(1); - } - documentclass = arg; - return 1; -} - - -int parse_syntaxfile(string const & arg, string const &) -{ - if (arg.empty()) { - cerr << "Missing syntaxfile string after -s switch" << endl; - exit(1); - } - syntaxfile = arg; - return 1; -} - - -// Filled with the command line arguments "foo" of "-sysdir foo" or -// "-userdir foo". -string cl_system_support; -string cl_user_support; - - -int parse_sysdir(string const & arg, string const &) -{ - if (arg.empty()) { - cerr << "Missing directory for -sysdir switch" << endl; - exit(1); - } - cl_system_support = arg; - return 1; -} - - -int parse_userdir(string const & arg, string const &) -{ - if (arg.empty()) { - cerr << "Missing directory for -userdir switch" << endl; - exit(1); - } - cl_user_support = arg; - return 1; -} - - -int parse_force(string const &, string const &) -{ - overwrite_files = true; - return 0; -} - - -int parse_noweb(string const &, string const &) -{ - noweb_mode = true; - return 0; -} - - -void easyParse(int & argc, char * argv[]) -{ - map cmdmap; - - cmdmap["-c"] = parse_class; - cmdmap["-f"] = parse_force; - cmdmap["-s"] = parse_syntaxfile; - cmdmap["-help"] = parse_help; - cmdmap["--help"] = parse_help; - cmdmap["-n"] = parse_noweb; - cmdmap["-sysdir"] = parse_sysdir; - cmdmap["-userdir"] = parse_userdir; - - for (int i = 1; i < argc; ++i) { - std::map::const_iterator it - = cmdmap.find(argv[i]); - - // don't complain if not found - may be parsed later - if (it == cmdmap.end()) - continue; - - string arg(to_utf8(from_local8bit((i + 1 < argc) ? argv[i + 1] : ""))); - string arg2(to_utf8(from_local8bit((i + 2 < argc) ? argv[i + 2] : ""))); - - int const remove = 1 + it->second(arg, arg2); - - // Now, remove used arguments by shifting - // the following ones remove places down. - argc -= remove; - for (int j = i; j < argc; ++j) - argv[j] = argv[j + remove]; - --i; - } -} - - -// path of the first parsed file -string masterFilePath; -// path of the currently parsed file -string parentFilePath; - -} // anonymous namespace - - -string getMasterFilePath() -{ - return masterFilePath; -} - -string getParentFilePath() -{ - return parentFilePath; -} - - -namespace { - -/*! - * Reads tex input from \a is and writes lyx output to \a os. - * Uses some common settings for the preamble, so this should only - * be used more than once for included documents. - * Caution: Overwrites the existing preamble settings if the new document - * contains a preamble. - * You must ensure that \p parentFilePath is properly set before calling - * this function! - */ -void tex2lyx(std::istream &is, std::ostream &os) -{ - Parser p(is); - //p.dump(); - - stringstream ss; - LyXTextClass textclass = parse_preamble(p, ss, documentclass); - - active_environments.push_back("document"); - Context context(true, textclass); - parse_text(p, ss, FLAG_END, true, context); - if (Context::empty) - // Empty document body. LyX needs at least one paragraph. - context.check_layout(ss); - context.check_end_layout(ss); - ss << "\n\\end_body\n\\end_document\n"; - active_environments.pop_back(); - ss.seekg(0); - os << ss.str(); -#ifdef TEST_PARSER - p.reset(); - ofstream parsertest("parsertest.tex"); - while (p.good()) - parsertest << p.get_token().asInput(); - // and parsertest.tex should now have identical content -#endif -} - - -/// convert TeX from \p infilename to LyX and write it to \p os -bool tex2lyx(FileName const & infilename, std::ostream &os) -{ - ifstream is(infilename.toFilesystemEncoding().c_str()); - if (!is.good()) { - cerr << "Could not open input file \"" << infilename - << "\" for reading." << endl; - return false; - } - string const oldParentFilePath = parentFilePath; - parentFilePath = onlyPath(infilename.absFilename()); - tex2lyx(is, os); - parentFilePath = oldParentFilePath; - return true; -} - -} // anonymous namespace - - -bool tex2lyx(string const &infilename, FileName const &outfilename) -{ - if (isFileReadable(outfilename)) { - if (overwrite_files) { - cerr << "Overwriting existing file " - << outfilename << endl; - } else { - cerr << "Not overwriting existing file " - << outfilename << endl; - return false; - } - } else { - cerr << "Creating file " << outfilename << endl; - } - ofstream os(outfilename.toFilesystemEncoding().c_str()); - if (!os.good()) { - cerr << "Could not open output file \"" << outfilename - << "\" for writing." << endl; - return false; - } -#ifdef FILEDEBUG - cerr << "Input file: " << infilename << "\n"; - cerr << "Output file: " << outfilename << "\n"; -#endif - return tex2lyx(FileName(infilename), os); -} - -} // namespace lyx - - -int main(int argc, char * argv[]) -{ - using namespace lyx; - fs::path::default_name_check(fs::no_check); - - easyParse(argc, argv); - - if (argc <= 1) { - cerr << "Usage: tex2lyx [ command line switches ] []\n" - "See tex2lyx -help." << endl; - return 2; - } - - lyx::support::os::init(argc, argv); - - try { support::init_package(to_utf8(from_local8bit(argv[0])), - cl_system_support, cl_user_support, - support::top_build_dir_is_two_levels_up); - } catch (support::ExceptionMessage const & message) { - cerr << to_utf8(message.title_) << ":\n" - << to_utf8(message.details_) << endl; - if (message.type_ == support::ErrorException) - exit(1); - } - - // Now every known option is parsed. Look for input and output - // file name (the latter is optional). - string const infilename = makeAbsPath(to_utf8(from_local8bit(argv[1]))).absFilename(); - string outfilename; - if (argc > 2) { - outfilename = to_utf8(from_local8bit(argv[2])); - if (outfilename != "-") - outfilename = makeAbsPath(to_utf8(from_local8bit(argv[2]))).absFilename(); - } else - outfilename = changeExtension(infilename, ".lyx"); - - FileName const system_syntaxfile = lyx::support::libFileSearch("", "syntax.default"); - if (system_syntaxfile.empty()) { - cerr << "Error: Could not find syntax file \"syntax.default\"." << endl; - exit(1); - } - read_syntaxfile(system_syntaxfile); - if (!syntaxfile.empty()) - read_syntaxfile(makeAbsPath(syntaxfile)); - - masterFilePath = onlyPath(infilename); - parentFilePath = masterFilePath; - - if (outfilename == "-") { - if (tex2lyx(FileName(infilename), cout)) - return EXIT_SUCCESS; - else - return EXIT_FAILURE; - } else { - if (tex2lyx(infilename, FileName(outfilename))) - return EXIT_SUCCESS; - else - return EXIT_FAILURE; - } -} - -// }]) diff --git a/src/tex2lyx/tex2lyx.cpp b/src/tex2lyx/tex2lyx.cpp new file mode 100644 index 0000000000..81afdaec6f --- /dev/null +++ b/src/tex2lyx/tex2lyx.cpp @@ -0,0 +1,561 @@ +/** + * \file tex2lyx.cpp + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author André Pönitz + * + * Full author contact details are available in file CREDITS. + */ + +// {[( + +#include + +#include "tex2lyx.h" +#include "Context.h" + +#include "debug.h" +#include "LyXTextClass.h" + +#include "support/convert.h" +#include "support/filetools.h" +#include "support/fs_extras.h" +#include "support/lstrings.h" +#include "support/lyxlib.h" +#include "support/ExceptionMessage.h" +#include "support/os.h" +#include "support/package.h" +#include "support/unicode.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + + +namespace lyx { + +using std::endl; +using std::cout; +using std::cerr; +using std::getline; + +using std::ifstream; +using std::ofstream; +using std::istringstream; +using std::ostringstream; +using std::stringstream; +using std::string; +using std::vector; +using std::map; + +using support::changeExtension; +using support::FileName; +using support::isStrUnsignedInt; +using support::ltrim; +using support::makeAbsPath; +using support::onlyPath; +using support::rtrim; +using support::isFileReadable; + +namespace fs = boost::filesystem; + + +// Hacks to allow the thing to link in the lyxlayout stuff +LyXErr lyxerr(std::cerr.rdbuf()); + + +string const trim(string const & a, char const * p) +{ + // BOOST_ASSERT(p); + + if (a.empty() || !*p) + return a; + + string::size_type r = a.find_last_not_of(p); + string::size_type l = a.find_first_not_of(p); + + // Is this the minimal test? (lgb) + if (r == string::npos && l == string::npos) + return string(); + + return a.substr(l, r - l + 1); +} + + +void split(string const & s, vector & result, char delim) +{ + //cerr << "split 1: '" << s << "'\n"; + istringstream is(s); + string t; + while (getline(is, t, delim)) + result.push_back(t); + //cerr << "split 2\n"; +} + + +string join(vector const & input, char const * delim) +{ + ostringstream os; + for (size_t i = 0; i < input.size(); ++i) { + if (i) + os << delim; + os << input[i]; + } + return os.str(); +} + + +char const * const * is_known(string const & str, char const * const * what) +{ + for ( ; *what; ++what) + if (str == *what) + return what; + return 0; +} + + + +// current stack of nested environments +vector active_environments; + + +string active_environment() +{ + return active_environments.empty() ? string() : active_environments.back(); +} + + +CommandMap known_commands; +CommandMap known_environments; +CommandMap known_math_environments; + + +void add_known_command(string const & command, string const & o1, + bool o2) +{ + // We have to handle the following cases: + // definition o1 o2 invocation result + // \newcommand{\foo}{bar} "" false \foo bar + // \newcommand{\foo}[1]{bar #1} "[1]" false \foo{x} bar x + // \newcommand{\foo}[1][]{bar #1} "[1]" true \foo bar + // \newcommand{\foo}[1][]{bar #1} "[1]" true \foo[x] bar x + // \newcommand{\foo}[1][x]{bar #1} "[1]" true \foo[x] bar x + unsigned int nargs = 0; + vector arguments; + string const opt1 = rtrim(ltrim(o1, "["), "]"); + if (isStrUnsignedInt(opt1)) { + // The command has arguments + nargs = convert(opt1); + if (nargs > 0 && o2) { + // The first argument is optional + arguments.push_back(optional); + --nargs; + } + } + for (unsigned int i = 0; i < nargs; ++i) + arguments.push_back(required); + known_commands[command] = arguments; +} + + +bool noweb_mode = false; + + +namespace { + + +/*! + * Read one command definition from the syntax file + */ +void read_command(Parser & p, string command, CommandMap & commands) { + if (p.next_token().asInput() == "*") { + p.get_token(); + command += '*'; + } + vector arguments; + while (p.next_token().cat() == catBegin || + p.next_token().asInput() == "[") { + if (p.next_token().cat() == catBegin) { + string const arg = p.getArg('{', '}'); + if (arg == "translate") + arguments.push_back(required); + else + arguments.push_back(verbatim); + } else { + p.getArg('[', ']'); + arguments.push_back(optional); + } + } + commands[command] = arguments; +} + + +/*! + * Read a class of environments from the syntax file + */ +void read_environment(Parser & p, string const & begin, + CommandMap & environments) +{ + string environment; + while (p.good()) { + Token const & t = p.get_token(); + if (t.cat() == catLetter) + environment += t.asInput(); + else if (!environment.empty()) { + p.putback(); + read_command(p, environment, environments); + environment.erase(); + } + if (t.cat() == catEscape && t.asInput() == "\\end") { + string const end = p.getArg('{', '}'); + if (end == begin) + return; + } + } +} + + +/*! + * Read a list of TeX commands from a reLyX compatible syntax file. + * Since this list is used after all commands that have a LyX counterpart + * are handled, it does not matter that the "syntax.default" file + * has almost all of them listed. For the same reason the reLyX-specific + * reLyXre environment is ignored. + */ +void read_syntaxfile(FileName const & file_name) +{ + ifstream is(file_name.toFilesystemEncoding().c_str()); + if (!is.good()) { + cerr << "Could not open syntax file \"" << file_name + << "\" for reading." << endl; + exit(2); + } + // We can use our TeX parser, since the syntax of the layout file is + // modeled after TeX. + // Unknown tokens are just silently ignored, this helps us to skip some + // reLyX specific things. + Parser p(is); + while (p.good()) { + Token const & t = p.get_token(); + if (t.cat() == catEscape) { + string const command = t.asInput(); + if (command == "\\begin") { + string const name = p.getArg('{', '}'); + if (name == "environments" || name == "reLyXre") + // We understand "reLyXre", but it is + // not as powerful as "environments". + read_environment(p, name, + known_environments); + else if (name == "mathenvironments") + read_environment(p, name, + known_math_environments); + } else { + read_command(p, command, known_commands); + } + } + } +} + + +string documentclass; +string syntaxfile; +bool overwrite_files = false; + + +/// return the number of arguments consumed +typedef boost::function cmd_helper; + + +int parse_help(string const &, string const &) +{ + cerr << "Usage: tex2lyx [ command line switches ] []\n" + "Command line switches (case sensitive):\n" + "\t-help summarize tex2lyx usage\n" + "\t-f Force creation of .lyx files even if they exist already\n" + "\t-userdir dir try to set user directory to dir\n" + "\t-sysdir dir try to set system directory to dir\n" + "\t-c textclass declare the textclass\n" + "\t-n translate a noweb (aka literate programming) file.\n" + "\t-s syntaxfile read additional syntax file" << endl; + exit(0); +} + + +int parse_class(string const & arg, string const &) +{ + if (arg.empty()) { + cerr << "Missing textclass string after -c switch" << endl; + exit(1); + } + documentclass = arg; + return 1; +} + + +int parse_syntaxfile(string const & arg, string const &) +{ + if (arg.empty()) { + cerr << "Missing syntaxfile string after -s switch" << endl; + exit(1); + } + syntaxfile = arg; + return 1; +} + + +// Filled with the command line arguments "foo" of "-sysdir foo" or +// "-userdir foo". +string cl_system_support; +string cl_user_support; + + +int parse_sysdir(string const & arg, string const &) +{ + if (arg.empty()) { + cerr << "Missing directory for -sysdir switch" << endl; + exit(1); + } + cl_system_support = arg; + return 1; +} + + +int parse_userdir(string const & arg, string const &) +{ + if (arg.empty()) { + cerr << "Missing directory for -userdir switch" << endl; + exit(1); + } + cl_user_support = arg; + return 1; +} + + +int parse_force(string const &, string const &) +{ + overwrite_files = true; + return 0; +} + + +int parse_noweb(string const &, string const &) +{ + noweb_mode = true; + return 0; +} + + +void easyParse(int & argc, char * argv[]) +{ + map cmdmap; + + cmdmap["-c"] = parse_class; + cmdmap["-f"] = parse_force; + cmdmap["-s"] = parse_syntaxfile; + cmdmap["-help"] = parse_help; + cmdmap["--help"] = parse_help; + cmdmap["-n"] = parse_noweb; + cmdmap["-sysdir"] = parse_sysdir; + cmdmap["-userdir"] = parse_userdir; + + for (int i = 1; i < argc; ++i) { + std::map::const_iterator it + = cmdmap.find(argv[i]); + + // don't complain if not found - may be parsed later + if (it == cmdmap.end()) + continue; + + string arg(to_utf8(from_local8bit((i + 1 < argc) ? argv[i + 1] : ""))); + string arg2(to_utf8(from_local8bit((i + 2 < argc) ? argv[i + 2] : ""))); + + int const remove = 1 + it->second(arg, arg2); + + // Now, remove used arguments by shifting + // the following ones remove places down. + argc -= remove; + for (int j = i; j < argc; ++j) + argv[j] = argv[j + remove]; + --i; + } +} + + +// path of the first parsed file +string masterFilePath; +// path of the currently parsed file +string parentFilePath; + +} // anonymous namespace + + +string getMasterFilePath() +{ + return masterFilePath; +} + +string getParentFilePath() +{ + return parentFilePath; +} + + +namespace { + +/*! + * Reads tex input from \a is and writes lyx output to \a os. + * Uses some common settings for the preamble, so this should only + * be used more than once for included documents. + * Caution: Overwrites the existing preamble settings if the new document + * contains a preamble. + * You must ensure that \p parentFilePath is properly set before calling + * this function! + */ +void tex2lyx(std::istream &is, std::ostream &os) +{ + Parser p(is); + //p.dump(); + + stringstream ss; + LyXTextClass textclass = parse_preamble(p, ss, documentclass); + + active_environments.push_back("document"); + Context context(true, textclass); + parse_text(p, ss, FLAG_END, true, context); + if (Context::empty) + // Empty document body. LyX needs at least one paragraph. + context.check_layout(ss); + context.check_end_layout(ss); + ss << "\n\\end_body\n\\end_document\n"; + active_environments.pop_back(); + ss.seekg(0); + os << ss.str(); +#ifdef TEST_PARSER + p.reset(); + ofstream parsertest("parsertest.tex"); + while (p.good()) + parsertest << p.get_token().asInput(); + // and parsertest.tex should now have identical content +#endif +} + + +/// convert TeX from \p infilename to LyX and write it to \p os +bool tex2lyx(FileName const & infilename, std::ostream &os) +{ + ifstream is(infilename.toFilesystemEncoding().c_str()); + if (!is.good()) { + cerr << "Could not open input file \"" << infilename + << "\" for reading." << endl; + return false; + } + string const oldParentFilePath = parentFilePath; + parentFilePath = onlyPath(infilename.absFilename()); + tex2lyx(is, os); + parentFilePath = oldParentFilePath; + return true; +} + +} // anonymous namespace + + +bool tex2lyx(string const &infilename, FileName const &outfilename) +{ + if (isFileReadable(outfilename)) { + if (overwrite_files) { + cerr << "Overwriting existing file " + << outfilename << endl; + } else { + cerr << "Not overwriting existing file " + << outfilename << endl; + return false; + } + } else { + cerr << "Creating file " << outfilename << endl; + } + ofstream os(outfilename.toFilesystemEncoding().c_str()); + if (!os.good()) { + cerr << "Could not open output file \"" << outfilename + << "\" for writing." << endl; + return false; + } +#ifdef FILEDEBUG + cerr << "Input file: " << infilename << "\n"; + cerr << "Output file: " << outfilename << "\n"; +#endif + return tex2lyx(FileName(infilename), os); +} + +} // namespace lyx + + +int main(int argc, char * argv[]) +{ + using namespace lyx; + fs::path::default_name_check(fs::no_check); + + easyParse(argc, argv); + + if (argc <= 1) { + cerr << "Usage: tex2lyx [ command line switches ] []\n" + "See tex2lyx -help." << endl; + return 2; + } + + lyx::support::os::init(argc, argv); + + try { support::init_package(to_utf8(from_local8bit(argv[0])), + cl_system_support, cl_user_support, + support::top_build_dir_is_two_levels_up); + } catch (support::ExceptionMessage const & message) { + cerr << to_utf8(message.title_) << ":\n" + << to_utf8(message.details_) << endl; + if (message.type_ == support::ErrorException) + exit(1); + } + + // Now every known option is parsed. Look for input and output + // file name (the latter is optional). + string const infilename = makeAbsPath(to_utf8(from_local8bit(argv[1]))).absFilename(); + string outfilename; + if (argc > 2) { + outfilename = to_utf8(from_local8bit(argv[2])); + if (outfilename != "-") + outfilename = makeAbsPath(to_utf8(from_local8bit(argv[2]))).absFilename(); + } else + outfilename = changeExtension(infilename, ".lyx"); + + FileName const system_syntaxfile = lyx::support::libFileSearch("", "syntax.default"); + if (system_syntaxfile.empty()) { + cerr << "Error: Could not find syntax file \"syntax.default\"." << endl; + exit(1); + } + read_syntaxfile(system_syntaxfile); + if (!syntaxfile.empty()) + read_syntaxfile(makeAbsPath(syntaxfile)); + + masterFilePath = onlyPath(infilename); + parentFilePath = masterFilePath; + + if (outfilename == "-") { + if (tex2lyx(FileName(infilename), cout)) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; + } else { + if (tex2lyx(infilename, FileName(outfilename))) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; + } +} + +// }]) diff --git a/src/tex2lyx/texparser.C b/src/tex2lyx/texparser.C deleted file mode 100644 index cdd5c39921..0000000000 --- a/src/tex2lyx/texparser.C +++ /dev/null @@ -1,520 +0,0 @@ -/** - * \file Parser.cpp - * This file is part of LyX, the document processor. - * Licence details can be found in the file COPYING. - * - * \author André Pönitz - * - * Full author contact details are available in file CREDITS. - */ - -#include - -#include "Parser.h" - -#include -#include - - -namespace lyx { - -using std::cerr; -using std::endl; -using std::fill; -using std::istream; -using std::istringstream; -using std::ostringstream; -using std::ostream; -using std::string; - - -namespace { - -CatCode theCatcode[256]; - -void catInit() -{ - fill(theCatcode, theCatcode + 256, catOther); - fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter); - fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter); - - theCatcode[int('\\')] = catEscape; - theCatcode[int('{')] = catBegin; - theCatcode[int('}')] = catEnd; - theCatcode[int('$')] = catMath; - theCatcode[int('&')] = catAlign; - theCatcode[int('\n')] = catNewline; - theCatcode[int('#')] = catParameter; - theCatcode[int('^')] = catSuper; - theCatcode[int('_')] = catSub; - theCatcode[0x7f] = catIgnore; - theCatcode[int(' ')] = catSpace; - theCatcode[int('\t')] = catSpace; - theCatcode[int('\r')] = catNewline; - theCatcode[int('~')] = catActive; - theCatcode[int('%')] = catComment; - - // This is wrong! - theCatcode[int('@')] = catLetter; -} - - -/*! - * Translate a line ending to '\n'. - * \p c must have catcode catNewline, and it must be the last character read - * from \p is. - */ -char getNewline(istream & is, char c) -{ - // we have to handle 3 different line endings: - // - UNIX (\n) - // - MAC (\r) - // - DOS (\r\n) - if (c == '\r') { - // MAC or DOS - if (is.get(c) && c != '\n') { - // MAC - is.putback(c); - } - return '\n'; - } - // UNIX - return c; -} - -} - - -// -// catcodes -// - -CatCode catcode(unsigned char c) -{ - return theCatcode[c]; -} - - - -// -// Token -// - -ostream & operator<<(ostream & os, Token const & t) -{ - if (t.cat() == catComment) - os << '%' << t.cs() << '\n'; - else if (t.cat() == catSpace) - os << t.cs(); - else if (t.cat() == catEscape) - os << '\\' << t.cs() << ' '; - else if (t.cat() == catLetter) - os << t.character(); - else if (t.cat() == catNewline) - os << "[" << t.cs().size() << "\\n," << t.cat() << "]\n"; - else - os << '[' << t.character() << ',' << t.cat() << ']'; - return os; -} - - -string Token::asString() const -{ - return cs_.size() ? cs_ : string(1, char_); -} - - -string Token::asInput() const -{ - if (cat_ == catComment) - return '%' + cs_ + '\n'; - if (cat_ == catSpace || cat_ == catNewline) - return cs_; - return char_ ? string(1, char_) : '\\' + cs_; -} - - -// -// Parser -// - - -Parser::Parser(istream & is) - : lineno_(0), pos_(0) -{ - tokenize(is); -} - - -Parser::Parser(string const & s) - : lineno_(0), pos_(0) -{ - istringstream is(s); - tokenize(is); -} - - -void Parser::push_back(Token const & t) -{ - tokens_.push_back(t); -} - - -void Parser::pop_back() -{ - tokens_.pop_back(); -} - - -Token const & Parser::prev_token() const -{ - static const Token dummy; - return pos_ > 1 ? tokens_[pos_ - 2] : dummy; -} - - -Token const & Parser::curr_token() const -{ - static const Token dummy; - return pos_ > 0 ? tokens_[pos_ - 1] : dummy; -} - - -Token const & Parser::next_token() const -{ - static const Token dummy; - return good() ? tokens_[pos_] : dummy; -} - - -Token const & Parser::get_token() -{ - static const Token dummy; - //cerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n'; - return good() ? tokens_[pos_++] : dummy; -} - - -bool Parser::isParagraph() const -{ - // A new paragraph in TeX ist started - // - either by a newline, following any amount of whitespace - // characters (including zero), and another newline - // - or the token \par - if (curr_token().cat() == catNewline && - (curr_token().cs().size() > 1 || - (next_token().cat() == catSpace && - pos_ < tokens_.size() - 1 && - tokens_[pos_ + 1].cat() == catNewline))) - return true; - if (curr_token().cat() == catEscape && curr_token().cs() == "par") - return true; - return false; -} - - -void Parser::skip_spaces(bool skip_comments) -{ - // We just silently return if we have no more tokens. - // skip_spaces() should be callable at any time, - // the caller must check p::good() anyway. - while (good()) { - get_token(); - if (isParagraph()) { - putback(); - break; - } - if ( curr_token().cat() == catSpace || - curr_token().cat() == catNewline || - (curr_token().cat() == catComment && curr_token().cs().empty())) - continue; - if (skip_comments && curr_token().cat() == catComment) - cerr << " Ignoring comment: " << curr_token().asInput(); - else { - putback(); - break; - } - } -} - - -void Parser::unskip_spaces(bool skip_comments) -{ - while (pos_ > 0) { - if ( curr_token().cat() == catSpace || - (curr_token().cat() == catNewline && curr_token().cs().size() == 1)) - putback(); - else if (skip_comments && curr_token().cat() == catComment) { - // TODO: Get rid of this - cerr << "Unignoring comment: " << curr_token().asInput(); - putback(); - } - else - break; - } -} - - -void Parser::putback() -{ - --pos_; -} - - -bool Parser::good() const -{ - return pos_ < tokens_.size(); -} - - -char Parser::getChar() -{ - if (!good()) - error("The input stream is not well..."); - return tokens_[pos_++].character(); -} - - -Parser::Arg Parser::getFullArg(char left, char right) -{ - skip_spaces(true); - - // This is needed if a partial file ends with a command without arguments, - // e. g. \medskip - if (! good()) - return std::make_pair(false, string()); - - string result; - char c = getChar(); - - if (c != left) { - putback(); - return std::make_pair(false, string()); - } else - while ((c = getChar()) != right && good()) { - // Ignore comments - if (curr_token().cat() == catComment) { - if (!curr_token().cs().empty()) - cerr << "Ignoring comment: " << curr_token().asInput(); - } - else - result += curr_token().asInput(); - } - - return std::make_pair(true, result); -} - - -string Parser::getArg(char left, char right) -{ - return getFullArg(left, right).second; -} - - -string Parser::getFullOpt() -{ - Arg arg = getFullArg('[', ']'); - if (arg.first) - return '[' + arg.second + ']'; - return arg.second; -} - - -string Parser::getOpt() -{ - string const res = getArg('[', ']'); - return res.empty() ? string() : '[' + res + ']'; -} - - -string const Parser::verbatimEnvironment(string const & name) -{ - if (!good()) - return string(); - - ostringstream os; - for (Token t = get_token(); good(); t = get_token()) { - if (t.cat() == catBegin) { - putback(); - os << '{' << verbatim_item() << '}'; - } else if (t.asInput() == "\\begin") { - string const env = getArg('{', '}'); - os << "\\begin{" << env << '}' - << verbatimEnvironment(env) - << "\\end{" << env << '}'; - } else if (t.asInput() == "\\end") { - string const end = getArg('{', '}'); - if (end != name) - cerr << "\\end{" << end - << "} does not match \\begin{" << name - << "}." << endl; - return os.str(); - } else - os << t.asInput(); - } - cerr << "unexpected end of input" << endl; - return os.str(); -} - - -void Parser::tokenize(istream & is) -{ - static bool init_done = false; - - if (!init_done) { - catInit(); - init_done = true; - } - - char c; - while (is.get(c)) { - //cerr << "reading c: " << c << "\n"; - - switch (catcode(c)) { - case catSpace: { - string s(1, c); - while (is.get(c) && catcode(c) == catSpace) - s += c; - if (catcode(c) != catSpace) - is.putback(c); - push_back(Token(s, catSpace)); - break; - } - - case catNewline: { - ++lineno_; - string s(1, getNewline(is, c)); - while (is.get(c) && catcode(c) == catNewline) { - ++lineno_; - s += getNewline(is, c); - } - if (catcode(c) != catNewline) - is.putback(c); - push_back(Token(s, catNewline)); - break; - } - - case catComment: { - // We don't treat "%\n" combinations here specially because - // we want to preserve them in the preamble - string s; - while (is.get(c) && catcode(c) != catNewline) - s += c; - // handle possible DOS line ending - if (catcode(c) == catNewline) - c = getNewline(is, c); - // Note: The '%' at the beginning and the '\n' at the end - // of the comment are not stored. - ++lineno_; - push_back(Token(s, catComment)); - break; - } - - case catEscape: { - is.get(c); - if (!is) { - error("unexpected end of input"); - } else { - string s(1, c); - if (catcode(c) == catLetter) { - // collect letters - while (is.get(c) && catcode(c) == catLetter) - s += c; - if (catcode(c) != catLetter) - is.putback(c); - } - push_back(Token(s, catEscape)); - } - break; - } - - case catIgnore: { - cerr << "ignoring a char: " << int(c) << "\n"; - break; - } - - default: - push_back(Token(c, catcode(c))); - } - } -} - - -void Parser::dump() const -{ - cerr << "\nTokens: "; - for (unsigned i = 0; i < tokens_.size(); ++i) { - if (i == pos_) - cerr << " <#> "; - cerr << tokens_[i]; - } - cerr << " pos: " << pos_ << "\n"; -} - - -void Parser::error(string const & msg) -{ - cerr << "Line ~" << lineno_ << ": parse error: " << msg << endl; - dump(); - //exit(1); -} - - -string Parser::verbatimOption() -{ - string res; - if (next_token().character() == '[') { - Token t = get_token(); - for (Token t = get_token(); t.character() != ']' && good(); t = get_token()) { - if (t.cat() == catBegin) { - putback(); - res += '{' + verbatim_item() + '}'; - } else - res += t.asString(); - } - } - return res; -} - - -string Parser::verbatim_item() -{ - if (!good()) - error("stream bad"); - skip_spaces(); - if (next_token().cat() == catBegin) { - Token t = get_token(); // skip brace - string res; - for (Token t = get_token(); t.cat() != catEnd && good(); t = get_token()) { - if (t.cat() == catBegin) { - putback(); - res += '{' + verbatim_item() + '}'; - } - else - res += t.asInput(); - } - return res; - } - return get_token().asInput(); -} - - -void Parser::reset() -{ - pos_ = 0; -} - - -void Parser::setCatCode(char c, CatCode cat) -{ - theCatcode[(unsigned char)c] = cat; -} - - -CatCode Parser::getCatCode(char c) const -{ - return theCatcode[(unsigned char)c]; -} - - -} // namespace lyx diff --git a/src/tex2lyx/texparser.h b/src/tex2lyx/texparser.h deleted file mode 100644 index 5c84898210..0000000000 --- a/src/tex2lyx/texparser.h +++ /dev/null @@ -1,214 +0,0 @@ -// -*- C++ -*- -/** - * \file Parser.h - * This file is part of LyX, the document processor. - * Licence details can be found in the file COPYING. - * - * \author André Pönitz - * - * Full author contact details are available in file CREDITS. - */ - -#ifndef PARSER_H -#define PARSER_H - -#include -#include -#include - - -namespace lyx { - - -enum mode_type {UNDECIDED_MODE, TEXT_MODE, MATH_MODE, MATHTEXT_MODE, TABLE_MODE}; - -mode_type asMode(mode_type oldmode, std::string const & str); - - -// These are TeX's catcodes -enum CatCode { - catEscape, // 0 backslash - catBegin, // 1 { - catEnd, // 2 } - catMath, // 3 $ - catAlign, // 4 & - catNewline, // 5 ^^M - catParameter, // 6 # - catSuper, // 7 ^ - catSub, // 8 _ - catIgnore, // 9 - catSpace, // 10 space - catLetter, // 11 a-zA-Z - catOther, // 12 none of the above - catActive, // 13 ~ - catComment, // 14 % - catInvalid // 15 -}; - - -CatCode catcode(unsigned char c); - - -enum { - FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing - FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process - FLAG_END = 1 << 3, // next \\end ends the parsing process - FLAG_BRACK_LAST = 1 << 4, // next closing bracket ends the parsing - FLAG_TEXTMODE = 1 << 5, // we are in a box - FLAG_ITEM = 1 << 6, // read a (possibly braced token) - FLAG_LEAVE = 1 << 7, // leave the loop at the end - FLAG_SIMPLE = 1 << 8, // next $ leaves the loop - FLAG_EQUATION = 1 << 9, // next \] leaves the loop - FLAG_SIMPLE2 = 1 << 10, // next \) leaves the loop - FLAG_OPTION = 1 << 11, // read [...] style option - FLAG_BRACED = 1 << 12, // read {...} style argument - FLAG_CELL = 1 << 13, // read table cell - FLAG_TABBING = 1 << 14 // We are inside a tabbing environment -}; - - - -// -// Helper class for parsing -// - -class Token { -public: - /// - Token() : cs_(), char_(0), cat_(catIgnore) {} - /// - Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {} - /// - Token(std::string const & cs, CatCode cat) : cs_(cs), char_(0), cat_(cat) {} - - /// - std::string const & cs() const { return cs_; } - /// Returns the catcode of the token - CatCode cat() const { return cat_; } - /// - char character() const { return char_; } - /// Returns the token as string - std::string asString() const; - /// Returns the token verbatim - std::string asInput() const; - -private: - /// - std::string cs_; - /// - char char_; - /// - CatCode cat_; -}; - -std::ostream & operator<<(std::ostream & os, Token const & t); - - -/*! - * Actual parser class - * - * The parser parses every character of the inputstream into a token - * and classifies the token. - * The following transformations are done: - * - Consecutive spaces are combined into one single token with CatCode catSpace - * - Consecutive newlines are combined into one single token with CatCode catNewline - * - Comments and %\n combinations are parsed into one token with CatCode catComment - */ - -class Parser { - -public: - /// - Parser(std::istream & is); - /// - Parser(std::string const & s); - - /// - int lineno() const { return lineno_; } - /// - void putback(); - /// dump contents to screen - void dump() const; - - /// - typedef std::pair Arg; - /*! - * Get an argument enclosed by \p left and \p right. - * \returns wether an argument was found in \p Arg.first and the - * argument in \p Arg.second. \see getArg(). - */ - Arg getFullArg(char left, char right); - /*! - * Get an argument enclosed by \p left and \p right. - * \returns the argument (without \p left and \p right) or the empty - * string if the next non-space token is not \p left. Use - * getFullArg() if you need to know wether there was an empty - * argument or no argument at all. - */ - std::string getArg(char left, char right); - /*! - * \returns getFullArg('[', ']') including the brackets or the - * empty string if no argument was found. - */ - std::string getFullOpt(); - /// \returns getArg('[', ']') including the brackets - std::string getOpt(); - /*! - * \returns the contents of the environment \p name. - * \begin{name} must be parsed already, \end{name} - * is parsed but not returned. - */ - std::string const verbatimEnvironment(std::string const & name); - /// Returns the character of the current token and increments the token position. - char getChar(); - /// - void error(std::string const & msg); - /// Parses \p is into tokens - void tokenize(std::istream & is); - /// - void push_back(Token const & t); - /// - void pop_back(); - /// The previous token. - Token const & prev_token() const; - /// The current token. - Token const & curr_token() const; - /// The next token. - Token const & next_token() const; - /// Make the next token current and return that. - Token const & get_token(); - /// \return whether the current token starts a new paragraph - bool isParagraph() const; - /// skips spaces (and comments if \p skip_comments is true) - void skip_spaces(bool skip_comments = false); - /// puts back spaces (and comments if \p skip_comments is true) - void unskip_spaces(bool skip_comments = false); - /// - void lex(std::string const & s); - /// - bool good() const; - /// - std::string verbatim_item(); - /// - std::string verbatimOption(); - /// resets the parser to initial state - void reset(); - /// - void setCatCode(char c, CatCode cat); - /// - CatCode getCatCode(char c) const; - -private: - /// - int lineno_; - /// - std::vector tokens_; - /// - unsigned pos_; -}; - - - -} // namespace lyx - -#endif diff --git a/src/tex2lyx/text.C b/src/tex2lyx/text.C deleted file mode 100644 index 136903a283..0000000000 --- a/src/tex2lyx/text.C +++ /dev/null @@ -1,2389 +0,0 @@ -/** - * \file tex2lyx/text.cpp - * This file is part of LyX, the document processor. - * Licence details can be found in the file COPYING. - * - * \author André Pönitz - * \author Jean-Marc Lasgouttes - * - * Full author contact details are available in file CREDITS. - */ - -// {[( - -#include - -#include "tex2lyx.h" -#include "Context.h" -#include "FloatList.h" -#include "lengthcommon.h" -#include "support/lstrings.h" -#include "support/convert.h" -#include "support/filetools.h" - -#include -#include - -#include -#include -#include -#include - - -namespace lyx { - -using support::addExtension; -using support::changeExtension; -using support::FileName; -using support::makeAbsPath; -using support::makeRelPath; -using support::rtrim; -using support::suffixIs; -using support::contains; -using support::subst; - -using std::cerr; -using std::endl; - -using std::map; -using std::ostream; -using std::ostringstream; -using std::istringstream; -using std::string; -using std::vector; - -namespace fs = boost::filesystem; - - -void parse_text_in_inset(Parser & p, ostream & os, unsigned flags, bool outer, - Context const & context) -{ - Context newcontext(true, context.textclass); - newcontext.font = context.font; - parse_text(p, os, flags, outer, newcontext); - newcontext.check_end_layout(os); -} - - -namespace { - -/// parses a paragraph snippet, useful for example for \\emph{...} -void parse_text_snippet(Parser & p, ostream & os, unsigned flags, bool outer, - Context & context) -{ - Context newcontext(context); - // Don't inherit the extra stuff - newcontext.extra_stuff.clear(); - parse_text(p, os, flags, outer, newcontext); - // Make sure that we don't create invalid .lyx files - context.need_layout = newcontext.need_layout; - context.need_end_layout = newcontext.need_end_layout; -} - - -/*! - * Thin wrapper around parse_text_snippet() using a string. - * - * We completely ignore \c context.need_layout and \c context.need_end_layout, - * because our return value is not used directly (otherwise the stream version - * of parse_text_snippet() could be used). That means that the caller needs - * to do layout management manually. - * This is intended to parse text that does not create any layout changes. - */ -string parse_text_snippet(Parser & p, unsigned flags, const bool outer, - Context & context) -{ - Context newcontext(context); - newcontext.need_layout = false; - newcontext.need_end_layout = false; - newcontext.new_layout_allowed = false; - // Avoid warning by Context::~Context() - newcontext.extra_stuff.clear(); - ostringstream os; - parse_text_snippet(p, os, flags, outer, newcontext); - return os.str(); -} - - -char const * const known_latex_commands[] = { "ref", "cite", "label", "index", -"printindex", "pageref", "url", "vref", "vpageref", "prettyref", "eqref", 0 }; - -/*! - * natbib commands. - * We can't put these into known_latex_commands because the argument order - * is reversed in lyx if there are 2 arguments. - * The starred forms are also known. - */ -char const * const known_natbib_commands[] = { "cite", "citet", "citep", -"citealt", "citealp", "citeauthor", "citeyear", "citeyearpar", -"citefullauthor", "Citet", "Citep", "Citealt", "Citealp", "Citeauthor", 0 }; - -/*! - * jurabib commands. - * We can't put these into known_latex_commands because the argument order - * is reversed in lyx if there are 2 arguments. - * No starred form other than "cite*" known. - */ -char const * const known_jurabib_commands[] = { "cite", "citet", "citep", -"citealt", "citealp", "citeauthor", "citeyear", "citeyearpar", -// jurabib commands not (yet) supported by LyX: -// "fullcite", -// "footcite", "footcitet", "footcitep", "footcitealt", "footcitealp", -// "footciteauthor", "footciteyear", "footciteyearpar", -"citefield", "citetitle", "cite*", 0 }; - -/// LaTeX names for quotes -char const * const known_quotes[] = { "glqq", "grqq", "quotedblbase", -"textquotedblleft", "quotesinglbase", "guilsinglleft", "guilsinglright", 0}; - -/// the same as known_quotes with .lyx names -char const * const known_coded_quotes[] = { "gld", "grd", "gld", -"grd", "gls", "fls", "frs", 0}; - -/// LaTeX names for font sizes -char const * const known_sizes[] = { "tiny", "scriptsize", "footnotesize", -"small", "normalsize", "large", "Large", "LARGE", "huge", "Huge", 0}; - -/// the same as known_sizes with .lyx names -char const * const known_coded_sizes[] = { "tiny", "scriptsize", "footnotesize", -"small", "normal", "large", "larger", "largest", "huge", "giant", 0}; - -/// LaTeX 2.09 names for font families -char const * const known_old_font_families[] = { "rm", "sf", "tt", 0}; - -/// LaTeX names for font families -char const * const known_font_families[] = { "rmfamily", "sffamily", -"ttfamily", 0}; - -/// the same as known_old_font_families and known_font_families with .lyx names -char const * const known_coded_font_families[] = { "roman", "sans", -"typewriter", 0}; - -/// LaTeX 2.09 names for font series -char const * const known_old_font_series[] = { "bf", 0}; - -/// LaTeX names for font series -char const * const known_font_series[] = { "bfseries", "mdseries", 0}; - -/// the same as known_old_font_series and known_font_series with .lyx names -char const * const known_coded_font_series[] = { "bold", "medium", 0}; - -/// LaTeX 2.09 names for font shapes -char const * const known_old_font_shapes[] = { "it", "sl", "sc", 0}; - -/// LaTeX names for font shapes -char const * const known_font_shapes[] = { "itshape", "slshape", "scshape", -"upshape", 0}; - -/// the same as known_old_font_shapes and known_font_shapes with .lyx names -char const * const known_coded_font_shapes[] = { "italic", "slanted", -"smallcaps", "up", 0}; - -/*! - * Graphics file extensions known by the dvips driver of the graphics package. - * These extensions are used to complete the filename of an included - * graphics file if it does not contain an extension. - * The order must be the same that latex uses to find a file, because we - * will use the first extension that matches. - * This is only an approximation for the common cases. If we would want to - * do it right in all cases, we would need to know which graphics driver is - * used and know the extensions of every driver of the graphics package. - */ -char const * const known_dvips_graphics_formats[] = {"eps", "ps", "eps.gz", -"ps.gz", "eps.Z", "ps.Z", 0}; - -/*! - * Graphics file extensions known by the pdftex driver of the graphics package. - * \sa known_dvips_graphics_formats - */ -char const * const known_pdftex_graphics_formats[] = {"png", "pdf", "jpg", -"mps", "tif", 0}; - -/*! - * Known file extensions for TeX files as used by \\include. - */ -char const * const known_tex_extensions[] = {"tex", 0}; - -/// spaces known by InsetSpace -char const * const known_spaces[] = { " ", "space", ",", "thinspace", "quad", -"qquad", "enspace", "enskip", "negthinspace", 0}; - -/// the same as known_spaces with .lyx names -char const * const known_coded_spaces[] = { "space{}", "space{}", -"thinspace{}", "thinspace{}", "quad{}", "qquad{}", "enspace{}", "enskip{}", -"negthinspace{}", 0}; - - -/// splits "x=z, y=b" into a map -map split_map(string const & s) -{ - map res; - vector v; - split(s, v); - for (size_t i = 0; i < v.size(); ++i) { - size_t const pos = v[i].find('='); - string const index = v[i].substr(0, pos); - string const value = v[i].substr(pos + 1, string::npos); - res[trim(index)] = trim(value); - } - return res; -} - - -/*! - * Split a LaTeX length into value and unit. - * The latter can be a real unit like "pt", or a latex length variable - * like "\textwidth". The unit may contain additional stuff like glue - * lengths, but we don't care, because such lengths are ERT anyway. - * \returns true if \p value and \p unit are valid. - */ -bool splitLatexLength(string const & len, string & value, string & unit) -{ - if (len.empty()) - return false; - const string::size_type i = len.find_first_not_of(" -+0123456789.,"); - //'4,5' is a valid LaTeX length number. Change it to '4.5' - string const length = subst(len, ',', '.'); - if (i == string::npos) - return false; - if (i == 0) { - if (len[0] == '\\') { - // We had something like \textwidth without a factor - value = "1.0"; - } else { - return false; - } - } else { - value = trim(string(length, 0, i)); - } - if (value == "-") - value = "-1.0"; - // 'cM' is a valid LaTeX length unit. Change it to 'cm' - if (contains(len, '\\')) - unit = trim(string(len, i)); - else - unit = support::ascii_lowercase(trim(string(len, i))); - return true; -} - - -/// A simple function to translate a latex length to something lyx can -/// understand. Not perfect, but rather best-effort. -bool translate_len(string const & length, string & valstring, string & unit) -{ - if (!splitLatexLength(length, valstring, unit)) - return false; - // LyX uses percent values - double value; - istringstream iss(valstring); - iss >> value; - value *= 100; - ostringstream oss; - oss << value; - string const percentval = oss.str(); - // a normal length - if (unit.empty() || unit[0] != '\\') - return true; - string::size_type const i = unit.find(' '); - string const endlen = (i == string::npos) ? string() : string(unit, i); - if (unit == "\\textwidth") { - valstring = percentval; - unit = "text%" + endlen; - } else if (unit == "\\columnwidth") { - valstring = percentval; - unit = "col%" + endlen; - } else if (unit == "\\paperwidth") { - valstring = percentval; - unit = "page%" + endlen; - } else if (unit == "\\linewidth") { - valstring = percentval; - unit = "line%" + endlen; - } else if (unit == "\\paperheight") { - valstring = percentval; - unit = "pheight%" + endlen; - } else if (unit == "\\textheight") { - valstring = percentval; - unit = "theight%" + endlen; - } - return true; -} - -} - - -string translate_len(string const & length) -{ - string unit; - string value; - if (translate_len(length, value, unit)) - return value + unit; - // If the input is invalid, return what we have. - return length; -} - - -namespace { - -/*! - * Translates a LaTeX length into \p value, \p unit and - * \p special parts suitable for a box inset. - * The difference from translate_len() is that a box inset knows about - * some special "units" that are stored in \p special. - */ -void translate_box_len(string const & length, string & value, string & unit, string & special) -{ - if (translate_len(length, value, unit)) { - if (unit == "\\height" || unit == "\\depth" || - unit == "\\totalheight" || unit == "\\width") { - special = unit.substr(1); - // The unit is not used, but LyX requires a dummy setting - unit = "in"; - } else - special = "none"; - } else { - value.clear(); - unit = length; - special = "none"; - } -} - - -/*! - * Find a file with basename \p name in path \p path and an extension - * in \p extensions. - */ -string find_file(string const & name, string const & path, - char const * const * extensions) -{ - // FIXME UNICODE encoding of name and path may be wrong (makeAbsPath - // expects utf8) - for (char const * const * what = extensions; *what; ++what) { - string const trial = addExtension(name, *what); - if (fs::exists(makeAbsPath(trial, path).toFilesystemEncoding())) - return trial; - } - return string(); -} - - -void begin_inset(ostream & os, string const & name) -{ - os << "\n\\begin_inset " << name; -} - - -void end_inset(ostream & os) -{ - os << "\n\\end_inset\n\n"; -} - - -void skip_braces(Parser & p) -{ - if (p.next_token().cat() != catBegin) - return; - p.get_token(); - if (p.next_token().cat() == catEnd) { - p.get_token(); - return; - } - p.putback(); -} - - -void handle_ert(ostream & os, string const & s, Context & context) -{ - // We must have a valid layout before outputting the ERT inset. - context.check_layout(os); - Context newcontext(true, context.textclass); - begin_inset(os, "ERT"); - os << "\nstatus collapsed\n"; - newcontext.check_layout(os); - for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) { - if (*it == '\\') - os << "\n\\backslash\n"; - else if (*it == '\n') { - newcontext.new_paragraph(os); - newcontext.check_layout(os); - } else - os << *it; - } - newcontext.check_end_layout(os); - end_inset(os); -} - - -void handle_comment(ostream & os, string const & s, Context & context) -{ - // TODO: Handle this better - Context newcontext(true, context.textclass); - begin_inset(os, "ERT"); - os << "\nstatus collapsed\n"; - newcontext.check_layout(os); - for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) { - if (*it == '\\') - os << "\n\\backslash\n"; - else - os << *it; - } - // make sure that our comment is the last thing on the line - newcontext.new_paragraph(os); - newcontext.check_layout(os); - newcontext.check_end_layout(os); - end_inset(os); -} - - -class isLayout : public std::unary_function { -public: - isLayout(string const name) : name_(name) {} - bool operator()(LyXLayout_ptr const & ptr) const { - return ptr->latexname() == name_; - } -private: - string const name_; -}; - - -LyXLayout_ptr findLayout(LyXTextClass const & textclass, - string const & name) -{ - LyXTextClass::const_iterator beg = textclass.begin(); - LyXTextClass::const_iterator end = textclass.end(); - - LyXTextClass::const_iterator - it = std::find_if(beg, end, isLayout(name)); - - return (it == end) ? LyXLayout_ptr() : *it; -} - - -void eat_whitespace(Parser &, ostream &, Context &, bool); - - -void output_command_layout(ostream & os, Parser & p, bool outer, - Context & parent_context, - LyXLayout_ptr newlayout) -{ - parent_context.check_end_layout(os); - Context context(true, parent_context.textclass, newlayout, - parent_context.layout, parent_context.font); - if (parent_context.deeper_paragraph) { - // We are beginning a nested environment after a - // deeper paragraph inside the outer list environment. - // Therefore we don't need to output a "begin deeper". - context.need_end_deeper = true; - } - context.check_deeper(os); - context.check_layout(os); - if (context.layout->optionalargs > 0) { - eat_whitespace(p, os, context, false); - if (p.next_token().character() == '[') { - p.get_token(); // eat '[' - begin_inset(os, "OptArg\n"); - os << "status collapsed\n\n"; - parse_text_in_inset(p, os, FLAG_BRACK_LAST, outer, context); - end_inset(os); - eat_whitespace(p, os, context, false); - } - } - parse_text(p, os, FLAG_ITEM, outer, context); - context.check_end_layout(os); - if (parent_context.deeper_paragraph) { - // We must suppress the "end deeper" because we - // suppressed the "begin deeper" above. - context.need_end_deeper = false; - } - context.check_end_deeper(os); - // We don't need really a new paragraph, but - // we must make sure that the next item gets a \begin_layout. - parent_context.new_paragraph(os); -} - - -/*! - * Output a space if necessary. - * This function gets called for every whitespace token. - * - * We have three cases here: - * 1. A space must be suppressed. Example: The lyxcode case below - * 2. A space may be suppressed. Example: Spaces before "\par" - * 3. A space must not be suppressed. Example: A space between two words - * - * We currently handle only 1. and 3 and from 2. only the case of - * spaces before newlines as a side effect. - * - * 2. could be used to suppress as many spaces as possible. This has two effects: - * - Reimporting LyX generated LaTeX files changes almost no whitespace - * - Superflous whitespace from non LyX generated LaTeX files is removed. - * The drawback is that the logic inside the function becomes - * complicated, and that is the reason why it is not implemented. - */ -void check_space(Parser const & p, ostream & os, Context & context) -{ - Token const next = p.next_token(); - Token const curr = p.curr_token(); - // A space before a single newline and vice versa must be ignored - // LyX emits a newline before \end{lyxcode}. - // This newline must be ignored, - // otherwise LyX will add an additional protected space. - if (next.cat() == catSpace || - next.cat() == catNewline || - (next.cs() == "end" && context.layout->free_spacing && curr.cat() == catNewline)) { - return; - } - context.check_layout(os); - os << ' '; -} - - -/*! - * Parse all arguments of \p command - */ -void parse_arguments(string const & command, - vector const & template_arguments, - Parser & p, ostream & os, bool outer, Context & context) -{ - string ert = command; - size_t no_arguments = template_arguments.size(); - for (size_t i = 0; i < no_arguments; ++i) { - switch (template_arguments[i]) { - case required: - // This argument contains regular LaTeX - handle_ert(os, ert + '{', context); - eat_whitespace(p, os, context, false); - parse_text(p, os, FLAG_ITEM, outer, context); - ert = "}"; - break; - case verbatim: - // This argument may contain special characters - ert += '{' + p.verbatim_item() + '}'; - break; - case optional: - ert += p.getOpt(); - break; - } - } - handle_ert(os, ert, context); -} - - -/*! - * Check whether \p command is a known command. If yes, - * handle the command with all arguments. - * \return true if the command was parsed, false otherwise. - */ -bool parse_command(string const & command, Parser & p, ostream & os, - bool outer, Context & context) -{ - if (known_commands.find(command) != known_commands.end()) { - parse_arguments(command, known_commands[command], p, os, - outer, context); - return true; - } - return false; -} - - -/// Parses a minipage or parbox -void parse_box(Parser & p, ostream & os, unsigned flags, bool outer, - Context & parent_context, bool use_parbox) -{ - string position; - string inner_pos; - string height_value = "0"; - string height_unit = "pt"; - string height_special = "none"; - string latex_height; - if (p.next_token().asInput() == "[") { - position = p.getArg('[', ']'); - if (position != "t" && position != "c" && position != "b") { - position = "c"; - cerr << "invalid position for minipage/parbox" << endl; - } - if (p.next_token().asInput() == "[") { - latex_height = p.getArg('[', ']'); - translate_box_len(latex_height, height_value, height_unit, height_special); - - if (p.next_token().asInput() == "[") { - inner_pos = p.getArg('[', ']'); - if (inner_pos != "c" && inner_pos != "t" && - inner_pos != "b" && inner_pos != "s") { - inner_pos = position; - cerr << "invalid inner_pos for minipage/parbox" - << endl; - } - } - } - } - string width_value; - string width_unit; - string const latex_width = p.verbatim_item(); - translate_len(latex_width, width_value, width_unit); - if (contains(width_unit, '\\') || contains(height_unit, '\\')) { - // LyX can't handle length variables - ostringstream ss; - if (use_parbox) - ss << "\\parbox"; - else - ss << "\\begin{minipage}"; - if (!position.empty()) - ss << '[' << position << ']'; - if (!latex_height.empty()) - ss << '[' << latex_height << ']'; - if (!inner_pos.empty()) - ss << '[' << inner_pos << ']'; - ss << "{" << latex_width << "}"; - if (use_parbox) - ss << '{'; - handle_ert(os, ss.str(), parent_context); - parent_context.new_paragraph(os); - parse_text_in_inset(p, os, flags, outer, parent_context); - if (use_parbox) - handle_ert(os, "}", parent_context); - else - handle_ert(os, "\\end{minipage}", parent_context); - } else { - // LyX does not like empty positions, so we have - // to set them to the LaTeX default values here. - if (position.empty()) - position = "c"; - if (inner_pos.empty()) - inner_pos = position; - parent_context.check_layout(os); - begin_inset(os, "Box Frameless\n"); - os << "position \"" << position << "\"\n"; - os << "hor_pos \"c\"\n"; - os << "has_inner_box 1\n"; - os << "inner_pos \"" << inner_pos << "\"\n"; - os << "use_parbox " << use_parbox << "\n"; - os << "width \"" << width_value << width_unit << "\"\n"; - os << "special \"none\"\n"; - os << "height \"" << height_value << height_unit << "\"\n"; - os << "height_special \"" << height_special << "\"\n"; - os << "status open\n\n"; - parse_text_in_inset(p, os, flags, outer, parent_context); - end_inset(os); -#ifdef PRESERVE_LAYOUT - // lyx puts a % after the end of the minipage - if (p.next_token().cat() == catNewline && p.next_token().cs().size() > 1) { - // new paragraph - //handle_comment(os, "%dummy", parent_context); - p.get_token(); - p.skip_spaces(); - parent_context.new_paragraph(os); - } - else if (p.next_token().cat() == catSpace || p.next_token().cat() == catNewline) { - //handle_comment(os, "%dummy", parent_context); - p.get_token(); - p.skip_spaces(); - // We add a protected space if something real follows - if (p.good() && p.next_token().cat() != catComment) { - os << "\\InsetSpace ~\n"; - } - } -#endif - } -} - - -/// parse an unknown environment -void parse_unknown_environment(Parser & p, string const & name, ostream & os, - unsigned flags, bool outer, - Context & parent_context) -{ - if (name == "tabbing") - // We need to remember that we have to handle '\=' specially - flags |= FLAG_TABBING; - - // We need to translate font changes and paragraphs inside the - // environment to ERT if we have a non standard font. - // Otherwise things like - // \large\begin{foo}\huge bar\end{foo} - // will not work. - bool const specialfont = - (parent_context.font != parent_context.normalfont); - bool const new_layout_allowed = parent_context.new_layout_allowed; - if (specialfont) - parent_context.new_layout_allowed = false; - handle_ert(os, "\\begin{" + name + "}", parent_context); - parse_text_snippet(p, os, flags, outer, parent_context); - handle_ert(os, "\\end{" + name + "}", parent_context); - if (specialfont) - parent_context.new_layout_allowed = new_layout_allowed; -} - - -void parse_environment(Parser & p, ostream & os, bool outer, - Context & parent_context) -{ - LyXLayout_ptr newlayout; - string const name = p.getArg('{', '}'); - const bool is_starred = suffixIs(name, '*'); - string const unstarred_name = rtrim(name, "*"); - active_environments.push_back(name); - - if (is_math_env(name)) { - parent_context.check_layout(os); - begin_inset(os, "Formula "); - os << "\\begin{" << name << "}"; - parse_math(p, os, FLAG_END, MATH_MODE); - os << "\\end{" << name << "}"; - end_inset(os); - } - - else if (name == "tabular" || name == "longtable") { - eat_whitespace(p, os, parent_context, false); - parent_context.check_layout(os); - begin_inset(os, "Tabular "); - handle_tabular(p, os, name == "longtable", parent_context); - end_inset(os); - p.skip_spaces(); - } - - else if (parent_context.textclass.floats().typeExist(unstarred_name)) { - eat_whitespace(p, os, parent_context, false); - parent_context.check_layout(os); - begin_inset(os, "Float " + unstarred_name + "\n"); - if (p.next_token().asInput() == "[") { - os << "placement " << p.getArg('[', ']') << '\n'; - } - os << "wide " << convert(is_starred) - << "\nsideways false" - << "\nstatus open\n\n"; - parse_text_in_inset(p, os, FLAG_END, outer, parent_context); - end_inset(os); - // We don't need really a new paragraph, but - // we must make sure that the next item gets a \begin_layout. - parent_context.new_paragraph(os); - p.skip_spaces(); - } - - else if (name == "minipage") { - eat_whitespace(p, os, parent_context, false); - parse_box(p, os, FLAG_END, outer, parent_context, false); - p.skip_spaces(); - } - - else if (name == "comment") { - eat_whitespace(p, os, parent_context, false); - parent_context.check_layout(os); - begin_inset(os, "Note Comment\n"); - os << "status open\n"; - parse_text_in_inset(p, os, FLAG_END, outer, parent_context); - end_inset(os); - p.skip_spaces(); - } - - else if (name == "lyxgreyedout") { - eat_whitespace(p, os, parent_context, false); - parent_context.check_layout(os); - begin_inset(os, "Note Greyedout\n"); - os << "status open\n"; - parse_text_in_inset(p, os, FLAG_END, outer, parent_context); - end_inset(os); - p.skip_spaces(); - } - - else if (!parent_context.new_layout_allowed) - parse_unknown_environment(p, name, os, FLAG_END, outer, - parent_context); - - // Alignment settings - else if (name == "center" || name == "flushleft" || name == "flushright" || - name == "centering" || name == "raggedright" || name == "raggedleft") { - eat_whitespace(p, os, parent_context, false); - // We must begin a new paragraph if not already done - if (! parent_context.atParagraphStart()) { - parent_context.check_end_layout(os); - parent_context.new_paragraph(os); - } - if (name == "flushleft" || name == "raggedright") - parent_context.add_extra_stuff("\\align left\n"); - else if (name == "flushright" || name == "raggedleft") - parent_context.add_extra_stuff("\\align right\n"); - else - parent_context.add_extra_stuff("\\align center\n"); - parse_text(p, os, FLAG_END, outer, parent_context); - // Just in case the environment is empty .. - parent_context.extra_stuff.erase(); - // We must begin a new paragraph to reset the alignment - parent_context.new_paragraph(os); - p.skip_spaces(); - } - - // The single '=' is meant here. - else if ((newlayout = findLayout(parent_context.textclass, name)).get() && - newlayout->isEnvironment()) { - eat_whitespace(p, os, parent_context, false); - Context context(true, parent_context.textclass, newlayout, - parent_context.layout, parent_context.font); - if (parent_context.deeper_paragraph) { - // We are beginning a nested environment after a - // deeper paragraph inside the outer list environment. - // Therefore we don't need to output a "begin deeper". - context.need_end_deeper = true; - } - parent_context.check_end_layout(os); - switch (context.layout->latextype) { - case LATEX_LIST_ENVIRONMENT: - context.extra_stuff = "\\labelwidthstring " - + p.verbatim_item() + '\n'; - p.skip_spaces(); - break; - case LATEX_BIB_ENVIRONMENT: - p.verbatim_item(); // swallow next arg - p.skip_spaces(); - break; - default: - break; - } - context.check_deeper(os); - parse_text(p, os, FLAG_END, outer, context); - context.check_end_layout(os); - if (parent_context.deeper_paragraph) { - // We must suppress the "end deeper" because we - // suppressed the "begin deeper" above. - context.need_end_deeper = false; - } - context.check_end_deeper(os); - parent_context.new_paragraph(os); - p.skip_spaces(); - } - - else if (name == "appendix") { - // This is no good latex style, but it works and is used in some documents... - eat_whitespace(p, os, parent_context, false); - parent_context.check_end_layout(os); - Context context(true, parent_context.textclass, parent_context.layout, - parent_context.layout, parent_context.font); - context.check_layout(os); - os << "\\start_of_appendix\n"; - parse_text(p, os, FLAG_END, outer, context); - context.check_end_layout(os); - p.skip_spaces(); - } - - else if (known_environments.find(name) != known_environments.end()) { - vector arguments = known_environments[name]; - // The last "argument" denotes wether we may translate the - // environment contents to LyX - // The default required if no argument is given makes us - // compatible with the reLyXre environment. - ArgumentType contents = arguments.empty() ? - required : - arguments.back(); - if (!arguments.empty()) - arguments.pop_back(); - // See comment in parse_unknown_environment() - bool const specialfont = - (parent_context.font != parent_context.normalfont); - bool const new_layout_allowed = - parent_context.new_layout_allowed; - if (specialfont) - parent_context.new_layout_allowed = false; - parse_arguments("\\begin{" + name + "}", arguments, p, os, - outer, parent_context); - if (contents == verbatim) - handle_ert(os, p.verbatimEnvironment(name), - parent_context); - else - parse_text_snippet(p, os, FLAG_END, outer, - parent_context); - handle_ert(os, "\\end{" + name + "}", parent_context); - if (specialfont) - parent_context.new_layout_allowed = new_layout_allowed; - } - - else - parse_unknown_environment(p, name, os, FLAG_END, outer, - parent_context); - - active_environments.pop_back(); -} - - -/// parses a comment and outputs it to \p os. -void parse_comment(Parser & p, ostream & os, Token const & t, Context & context) -{ - BOOST_ASSERT(t.cat() == catComment); - if (!t.cs().empty()) { - context.check_layout(os); - handle_comment(os, '%' + t.cs(), context); - if (p.next_token().cat() == catNewline) { - // A newline after a comment line starts a new - // paragraph - if (context.new_layout_allowed) { - if(!context.atParagraphStart()) - // Only start a new paragraph if not already - // done (we might get called recursively) - context.new_paragraph(os); - } else - handle_ert(os, "\n", context); - eat_whitespace(p, os, context, true); - } - } else { - // "%\n" combination - p.skip_spaces(); - } -} - - -/*! - * Reads spaces and comments until the first non-space, non-comment token. - * New paragraphs (double newlines or \\par) are handled like simple spaces - * if \p eatParagraph is true. - * Spaces are skipped, but comments are written to \p os. - */ -void eat_whitespace(Parser & p, ostream & os, Context & context, - bool eatParagraph) -{ - while (p.good()) { - Token const & t = p.get_token(); - if (t.cat() == catComment) - parse_comment(p, os, t, context); - else if ((! eatParagraph && p.isParagraph()) || - (t.cat() != catSpace && t.cat() != catNewline)) { - p.putback(); - return; - } - } -} - - -/*! - * Set a font attribute, parse text and reset the font attribute. - * \param attribute Attribute name (e.g. \\family, \\shape etc.) - * \param currentvalue Current value of the attribute. Is set to the new - * value during parsing. - * \param newvalue New value of the attribute - */ -void parse_text_attributes(Parser & p, ostream & os, unsigned flags, bool outer, - Context & context, string const & attribute, - string & currentvalue, string const & newvalue) -{ - context.check_layout(os); - string const oldvalue = currentvalue; - currentvalue = newvalue; - os << '\n' << attribute << ' ' << newvalue << "\n"; - parse_text_snippet(p, os, flags, outer, context); - context.check_layout(os); - os << '\n' << attribute << ' ' << oldvalue << "\n"; - currentvalue = oldvalue; -} - - -/// get the arguments of a natbib or jurabib citation command -std::pair getCiteArguments(Parser & p, bool natbibOrder) -{ - // We need to distinguish "" and "[]", so we can't use p.getOpt(). - - // text before the citation - string before; - // text after the citation - string after = p.getFullOpt(); - - if (!after.empty()) { - before = p.getFullOpt(); - if (natbibOrder && !before.empty()) - std::swap(before, after); - } - return std::make_pair(before, after); -} - - -/// Convert filenames with TeX macros and/or quotes to something LyX can understand -string const normalize_filename(string const & name) -{ - Parser p(trim(name, "\"")); - ostringstream os; - while (p.good()) { - Token const & t = p.get_token(); - if (t.cat() != catEscape) - os << t.asInput(); - else if (t.cs() == "lyxdot") { - // This is used by LyX for simple dots in relative - // names - os << '.'; - p.skip_spaces(); - } else if (t.cs() == "space") { - os << ' '; - p.skip_spaces(); - } else - os << t.asInput(); - } - return os.str(); -} - - -/// Convert \p name from TeX convention (relative to master file) to LyX -/// convention (relative to .lyx file) if it is relative -void fix_relative_filename(string & name) -{ - if (lyx::support::absolutePath(name)) - return; - // FIXME UNICODE encoding of name may be wrong (makeAbsPath expects - // utf8) - name = to_utf8(makeRelPath(from_utf8(makeAbsPath(name, getMasterFilePath()).absFilename()), - from_utf8(getParentFilePath()))); -} - - -/// Parse a NoWeb Scrap section. The initial "<<" is already parsed. -void parse_noweb(Parser & p, ostream & os, Context & context) -{ - // assemble the rest of the keyword - string name("<<"); - bool scrap = false; - while (p.good()) { - Token const & t = p.get_token(); - if (t.asInput() == ">" && p.next_token().asInput() == ">") { - name += ">>"; - p.get_token(); - scrap = (p.good() && p.next_token().asInput() == "="); - if (scrap) - name += p.get_token().asInput(); - break; - } - name += t.asInput(); - } - - if (!scrap || !context.new_layout_allowed || - !context.textclass.hasLayout("Scrap")) { - cerr << "Warning: Could not interpret '" << name - << "'. Ignoring it." << endl; - return; - } - - // We use new_paragraph instead of check_end_layout because the stuff - // following the noweb chunk needs to start with a \begin_layout. - // This may create a new paragraph even if there was none in the - // noweb file, but the alternative is an invalid LyX file. Since - // noweb code chunks are implemented with a layout style in LyX they - // always must be in an own paragraph. - context.new_paragraph(os); - Context newcontext(true, context.textclass, context.textclass["Scrap"]); - newcontext.check_layout(os); - os << name; - while (p.good()) { - Token const & t = p.get_token(); - // We abuse the parser a bit, because this is no TeX syntax - // at all. - if (t.cat() == catEscape) - os << subst(t.asInput(), "\\", "\n\\backslash\n"); - else - os << subst(t.asInput(), "\n", "\n\\newline\n"); - // The scrap chunk is ended by an @ at the beginning of a line. - // After the @ the line may contain a comment and/or - // whitespace, but nothing else. - if (t.asInput() == "@" && p.prev_token().cat() == catNewline && - (p.next_token().cat() == catSpace || - p.next_token().cat() == catNewline || - p.next_token().cat() == catComment)) { - while (p.good() && p.next_token().cat() == catSpace) - os << p.get_token().asInput(); - if (p.next_token().cat() == catComment) - // The comment includes a final '\n' - os << p.get_token().asInput(); - else { - if (p.next_token().cat() == catNewline) - p.get_token(); - os << '\n'; - } - break; - } - } - newcontext.check_end_layout(os); -} - -} // anonymous namespace - - -void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, - Context & context) -{ - LyXLayout_ptr newlayout; - // Store the latest bibliographystyle (needed for bibtex inset) - string bibliographystyle; - bool const use_natbib = used_packages.find("natbib") != used_packages.end(); - bool const use_jurabib = used_packages.find("jurabib") != used_packages.end(); - while (p.good()) { - Token const & t = p.get_token(); - -#ifdef FILEDEBUG - cerr << "t: " << t << " flags: " << flags << "\n"; -#endif - - if (flags & FLAG_ITEM) { - if (t.cat() == catSpace) - continue; - - flags &= ~FLAG_ITEM; - if (t.cat() == catBegin) { - // skip the brace and collect everything to the next matching - // closing brace - flags |= FLAG_BRACE_LAST; - continue; - } - - // handle only this single token, leave the loop if done - flags |= FLAG_LEAVE; - } - - if (t.character() == ']' && (flags & FLAG_BRACK_LAST)) - return; - - // - // cat codes - // - if (t.cat() == catMath) { - // we are inside some text mode thingy, so opening new math is allowed - context.check_layout(os); - begin_inset(os, "Formula "); - Token const & n = p.get_token(); - if (n.cat() == catMath && outer) { - // TeX's $$...$$ syntax for displayed math - os << "\\["; - parse_math(p, os, FLAG_SIMPLE, MATH_MODE); - os << "\\]"; - p.get_token(); // skip the second '$' token - } else { - // simple $...$ stuff - p.putback(); - os << '$'; - parse_math(p, os, FLAG_SIMPLE, MATH_MODE); - os << '$'; - } - end_inset(os); - } - - else if (t.cat() == catSuper || t.cat() == catSub) - cerr << "catcode " << t << " illegal in text mode\n"; - - // Basic support for english quotes. This should be - // extended to other quotes, but is not so easy (a - // left english quote is the same as a right german - // quote...) - else if (t.asInput() == "`" - && p.next_token().asInput() == "`") { - context.check_layout(os); - begin_inset(os, "Quotes "); - os << "eld"; - end_inset(os); - p.get_token(); - skip_braces(p); - } - else if (t.asInput() == "'" - && p.next_token().asInput() == "'") { - context.check_layout(os); - begin_inset(os, "Quotes "); - os << "erd"; - end_inset(os); - p.get_token(); - skip_braces(p); - } - - else if (t.asInput() == "<" - && p.next_token().asInput() == "<" && noweb_mode) { - p.get_token(); - parse_noweb(p, os, context); - } - - else if (t.cat() == catSpace || (t.cat() == catNewline && ! p.isParagraph())) - check_space(p, os, context); - - else if (t.character() == '[' && noweb_mode && - p.next_token().character() == '[') { - // These can contain underscores - p.putback(); - string const s = p.getFullOpt() + ']'; - if (p.next_token().character() == ']') - p.get_token(); - else - cerr << "Warning: Inserting missing ']' in '" - << s << "'." << endl; - handle_ert(os, s, context); - } - - else if (t.cat() == catLetter || - t.cat() == catOther || - t.cat() == catAlign || - t.cat() == catParameter) { - // This translates "&" to "\\&" which may be wrong... - context.check_layout(os); - os << t.character(); - } - - else if (p.isParagraph()) { - if (context.new_layout_allowed) - context.new_paragraph(os); - else - handle_ert(os, "\\par ", context); - eat_whitespace(p, os, context, true); - } - - else if (t.cat() == catActive) { - context.check_layout(os); - if (t.character() == '~') { - if (context.layout->free_spacing) - os << ' '; - else - os << "\\InsetSpace ~\n"; - } else - os << t.character(); - } - - else if (t.cat() == catBegin && - p.next_token().cat() == catEnd) { - // {} - Token const prev = p.prev_token(); - p.get_token(); - if (p.next_token().character() == '`' || - (prev.character() == '-' && - p.next_token().character() == '-')) - ; // ignore it in {}`` or -{}- - else - handle_ert(os, "{}", context); - - } - - else if (t.cat() == catBegin) { - context.check_layout(os); - // special handling of font attribute changes - Token const prev = p.prev_token(); - Token const next = p.next_token(); - Font const oldFont = context.font; - if (next.character() == '[' || - next.character() == ']' || - next.character() == '*') { - p.get_token(); - if (p.next_token().cat() == catEnd) { - os << next.character(); - p.get_token(); - } else { - p.putback(); - handle_ert(os, "{", context); - parse_text_snippet(p, os, - FLAG_BRACE_LAST, - outer, context); - handle_ert(os, "}", context); - } - } else if (! context.new_layout_allowed) { - handle_ert(os, "{", context); - parse_text_snippet(p, os, FLAG_BRACE_LAST, - outer, context); - handle_ert(os, "}", context); - } else if (is_known(next.cs(), known_sizes)) { - // next will change the size, so we must - // reset it here - parse_text_snippet(p, os, FLAG_BRACE_LAST, - outer, context); - if (!context.atParagraphStart()) - os << "\n\\size " - << context.font.size << "\n"; - } else if (is_known(next.cs(), known_font_families)) { - // next will change the font family, so we - // must reset it here - parse_text_snippet(p, os, FLAG_BRACE_LAST, - outer, context); - if (!context.atParagraphStart()) - os << "\n\\family " - << context.font.family << "\n"; - } else if (is_known(next.cs(), known_font_series)) { - // next will change the font series, so we - // must reset it here - parse_text_snippet(p, os, FLAG_BRACE_LAST, - outer, context); - if (!context.atParagraphStart()) - os << "\n\\series " - << context.font.series << "\n"; - } else if (is_known(next.cs(), known_font_shapes)) { - // next will change the font shape, so we - // must reset it here - parse_text_snippet(p, os, FLAG_BRACE_LAST, - outer, context); - if (!context.atParagraphStart()) - os << "\n\\shape " - << context.font.shape << "\n"; - } else if (is_known(next.cs(), known_old_font_families) || - is_known(next.cs(), known_old_font_series) || - is_known(next.cs(), known_old_font_shapes)) { - // next will change the font family, series - // and shape, so we must reset it here - parse_text_snippet(p, os, FLAG_BRACE_LAST, - outer, context); - if (!context.atParagraphStart()) - os << "\n\\family " - << context.font.family - << "\n\\series " - << context.font.series - << "\n\\shape " - << context.font.shape << "\n"; - } else { - handle_ert(os, "{", context); - parse_text_snippet(p, os, FLAG_BRACE_LAST, - outer, context); - handle_ert(os, "}", context); - } - } - - else if (t.cat() == catEnd) { - if (flags & FLAG_BRACE_LAST) { - return; - } - cerr << "stray '}' in text\n"; - handle_ert(os, "}", context); - } - - else if (t.cat() == catComment) - parse_comment(p, os, t, context); - - // - // control sequences - // - - else if (t.cs() == "(") { - context.check_layout(os); - begin_inset(os, "Formula"); - os << " \\("; - parse_math(p, os, FLAG_SIMPLE2, MATH_MODE); - os << "\\)"; - end_inset(os); - } - - else if (t.cs() == "[") { - context.check_layout(os); - begin_inset(os, "Formula"); - os << " \\["; - parse_math(p, os, FLAG_EQUATION, MATH_MODE); - os << "\\]"; - end_inset(os); - } - - else if (t.cs() == "begin") - parse_environment(p, os, outer, context); - - else if (t.cs() == "end") { - if (flags & FLAG_END) { - // eat environment name - string const name = p.getArg('{', '}'); - if (name != active_environment()) - cerr << "\\end{" + name + "} does not match \\begin{" - + active_environment() + "}\n"; - return; - } - p.error("found 'end' unexpectedly"); - } - - else if (t.cs() == "item") { - p.skip_spaces(); - string s; - bool optarg = false; - if (p.next_token().character() == '[') { - p.get_token(); // eat '[' - s = parse_text_snippet(p, FLAG_BRACK_LAST, - outer, context); - optarg = true; - } - context.set_item(); - context.check_layout(os); - if (context.has_item) { - // An item in an unknown list-like environment - // FIXME: Do this in check_layout()! - context.has_item = false; - if (optarg) - handle_ert(os, "\\item", context); - else - handle_ert(os, "\\item ", context); - } - if (optarg) { - if (context.layout->labeltype != LABEL_MANUAL) { - // lyx does not support \item[\mybullet] - // in itemize environments - handle_ert(os, "[", context); - os << s; - handle_ert(os, "]", context); - } else if (!s.empty()) { - // The space is needed to separate the - // item from the rest of the sentence. - os << s << ' '; - eat_whitespace(p, os, context, false); - } - } - } - - else if (t.cs() == "bibitem") { - context.set_item(); - context.check_layout(os); - os << "\\bibitem "; - os << p.getOpt(); - os << '{' << p.verbatim_item() << '}' << "\n"; - } - - else if (t.cs() == "def") { - context.check_layout(os); - eat_whitespace(p, os, context, false); - string name = p.get_token().cs(); - while (p.next_token().cat() != catBegin) - name += p.get_token().asString(); - handle_ert(os, "\\def\\" + name + '{' + p.verbatim_item() + '}', context); - } - - else if (t.cs() == "noindent") { - p.skip_spaces(); - context.add_extra_stuff("\\noindent\n"); - } - - else if (t.cs() == "appendix") { - context.add_extra_stuff("\\start_of_appendix\n"); - // We need to start a new paragraph. Otherwise the - // appendix in 'bla\appendix\chapter{' would start - // too late. - context.new_paragraph(os); - // We need to make sure that the paragraph is - // generated even if it is empty. Otherwise the - // appendix in '\par\appendix\par\chapter{' would - // start too late. - context.check_layout(os); - // FIXME: This is a hack to prevent paragraph - // deletion if it is empty. Handle this better! - handle_comment(os, - "%dummy comment inserted by tex2lyx to " - "ensure that this paragraph is not empty", - context); - // Both measures above may generate an additional - // empty paragraph, but that does not hurt, because - // whitespace does not matter here. - eat_whitespace(p, os, context, true); - } - - // Must attempt to parse "Section*" before "Section". - else if ((p.next_token().asInput() == "*") && - context.new_layout_allowed && - // The single '=' is meant here. - (newlayout = findLayout(context.textclass, - t.cs() + '*')).get() && - newlayout->isCommand()) { - p.get_token(); - output_command_layout(os, p, outer, context, newlayout); - p.skip_spaces(); - } - - // The single '=' is meant here. - else if (context.new_layout_allowed && - (newlayout = findLayout(context.textclass, t.cs())).get() && - newlayout->isCommand()) { - output_command_layout(os, p, outer, context, newlayout); - p.skip_spaces(); - } - - else if (t.cs() == "includegraphics") { - bool const clip = p.next_token().asInput() == "*"; - if (clip) - p.get_token(); - map opts = split_map(p.getArg('[', ']')); - if (clip) - opts["clip"] = string(); - string name = normalize_filename(p.verbatim_item()); - - string const path = getMasterFilePath(); - // We want to preserve relative / absolute filenames, - // therefore path is only used for testing - // FIXME UNICODE encoding of name and path may be - // wrong (makeAbsPath expects utf8) - if (!fs::exists(makeAbsPath(name, path).toFilesystemEncoding())) { - // The file extension is probably missing. - // Now try to find it out. - string const dvips_name = - find_file(name, path, - known_dvips_graphics_formats); - string const pdftex_name = - find_file(name, path, - known_pdftex_graphics_formats); - if (!dvips_name.empty()) { - if (!pdftex_name.empty()) { - cerr << "This file contains the " - "latex snippet\n" - "\"\\includegraphics{" - << name << "}\".\n" - "However, files\n\"" - << dvips_name << "\" and\n\"" - << pdftex_name << "\"\n" - "both exist, so I had to make a " - "choice and took the first one.\n" - "Please move the unwanted one " - "someplace else and try again\n" - "if my choice was wrong." - << endl; - } - name = dvips_name; - } else if (!pdftex_name.empty()) - name = pdftex_name; - } - - // FIXME UNICODE encoding of name and path may be - // wrong (makeAbsPath expects utf8) - if (fs::exists(makeAbsPath(name, path).toFilesystemEncoding())) - fix_relative_filename(name); - else - cerr << "Warning: Could not find graphics file '" - << name << "'." << endl; - - context.check_layout(os); - begin_inset(os, "Graphics "); - os << "\n\tfilename " << name << '\n'; - if (opts.find("width") != opts.end()) - os << "\twidth " - << translate_len(opts["width"]) << '\n'; - if (opts.find("height") != opts.end()) - os << "\theight " - << translate_len(opts["height"]) << '\n'; - if (opts.find("scale") != opts.end()) { - istringstream iss(opts["scale"]); - double val; - iss >> val; - val = val*100; - os << "\tscale " << val << '\n'; - } - if (opts.find("angle") != opts.end()) - os << "\trotateAngle " - << opts["angle"] << '\n'; - if (opts.find("origin") != opts.end()) { - ostringstream ss; - string const opt = opts["origin"]; - if (opt.find('l') != string::npos) ss << "left"; - if (opt.find('r') != string::npos) ss << "right"; - if (opt.find('c') != string::npos) ss << "center"; - if (opt.find('t') != string::npos) ss << "Top"; - if (opt.find('b') != string::npos) ss << "Bottom"; - if (opt.find('B') != string::npos) ss << "Baseline"; - if (!ss.str().empty()) - os << "\trotateOrigin " << ss.str() << '\n'; - else - cerr << "Warning: Ignoring unknown includegraphics origin argument '" << opt << "'\n"; - } - if (opts.find("keepaspectratio") != opts.end()) - os << "\tkeepAspectRatio\n"; - if (opts.find("clip") != opts.end()) - os << "\tclip\n"; - if (opts.find("draft") != opts.end()) - os << "\tdraft\n"; - if (opts.find("bb") != opts.end()) - os << "\tBoundingBox " - << opts["bb"] << '\n'; - int numberOfbbOptions = 0; - if (opts.find("bbllx") != opts.end()) - numberOfbbOptions++; - if (opts.find("bblly") != opts.end()) - numberOfbbOptions++; - if (opts.find("bburx") != opts.end()) - numberOfbbOptions++; - if (opts.find("bbury") != opts.end()) - numberOfbbOptions++; - if (numberOfbbOptions == 4) - os << "\tBoundingBox " - << opts["bbllx"] << opts["bblly"] - << opts["bburx"] << opts["bbury"] << '\n'; - else if (numberOfbbOptions > 0) - cerr << "Warning: Ignoring incomplete includegraphics boundingbox arguments.\n"; - numberOfbbOptions = 0; - if (opts.find("natwidth") != opts.end()) - numberOfbbOptions++; - if (opts.find("natheight") != opts.end()) - numberOfbbOptions++; - if (numberOfbbOptions == 2) - os << "\tBoundingBox 0bp 0bp " - << opts["natwidth"] << opts["natheight"] << '\n'; - else if (numberOfbbOptions > 0) - cerr << "Warning: Ignoring incomplete includegraphics boundingbox arguments.\n"; - ostringstream special; - if (opts.find("hiresbb") != opts.end()) - special << "hiresbb,"; - if (opts.find("trim") != opts.end()) - special << "trim,"; - if (opts.find("viewport") != opts.end()) - special << "viewport=" << opts["viewport"] << ','; - if (opts.find("totalheight") != opts.end()) - special << "totalheight=" << opts["totalheight"] << ','; - if (opts.find("type") != opts.end()) - special << "type=" << opts["type"] << ','; - if (opts.find("ext") != opts.end()) - special << "ext=" << opts["ext"] << ','; - if (opts.find("read") != opts.end()) - special << "read=" << opts["read"] << ','; - if (opts.find("command") != opts.end()) - special << "command=" << opts["command"] << ','; - string s_special = special.str(); - if (!s_special.empty()) { - // We had special arguments. Remove the trailing ','. - os << "\tspecial " << s_special.substr(0, s_special.size() - 1) << '\n'; - } - // TODO: Handle the unknown settings better. - // Warn about invalid options. - // Check whether some option was given twice. - end_inset(os); - } - - else if (t.cs() == "footnote" || - (t.cs() == "thanks" && context.layout->intitle)) { - p.skip_spaces(); - context.check_layout(os); - begin_inset(os, "Foot\n"); - os << "status collapsed\n\n"; - parse_text_in_inset(p, os, FLAG_ITEM, false, context); - end_inset(os); - } - - else if (t.cs() == "marginpar") { - p.skip_spaces(); - context.check_layout(os); - begin_inset(os, "Marginal\n"); - os << "status collapsed\n\n"; - parse_text_in_inset(p, os, FLAG_ITEM, false, context); - end_inset(os); - } - - else if (t.cs() == "ensuremath") { - p.skip_spaces(); - context.check_layout(os); - string const s = p.verbatim_item(); - if (s == "±" || s == "³" || s == "²" || s == "µ") - os << s; - else - handle_ert(os, "\\ensuremath{" + s + "}", - context); - } - - else if (t.cs() == "hfill") { - context.check_layout(os); - os << "\n\\hfill\n"; - skip_braces(p); - p.skip_spaces(); - } - - else if (t.cs() == "makeindex" || t.cs() == "maketitle") { - // FIXME: Somehow prevent title layouts if - // "maketitle" was not found - p.skip_spaces(); - skip_braces(p); // swallow this - } - - else if (t.cs() == "tableofcontents") { - p.skip_spaces(); - context.check_layout(os); - begin_inset(os, "LatexCommand \\tableofcontents\n"); - end_inset(os); - skip_braces(p); // swallow this - } - - else if (t.cs() == "listoffigures") { - p.skip_spaces(); - context.check_layout(os); - begin_inset(os, "FloatList figure\n"); - end_inset(os); - skip_braces(p); // swallow this - } - - else if (t.cs() == "listoftables") { - p.skip_spaces(); - context.check_layout(os); - begin_inset(os, "FloatList table\n"); - end_inset(os); - skip_braces(p); // swallow this - } - - else if (t.cs() == "listof") { - p.skip_spaces(true); - string const name = p.get_token().asString(); - if (context.textclass.floats().typeExist(name)) { - context.check_layout(os); - begin_inset(os, "FloatList "); - os << name << "\n"; - end_inset(os); - p.get_token(); // swallow second arg - } else - handle_ert(os, "\\listof{" + name + "}", context); - } - - else if (t.cs() == "textrm") - parse_text_attributes(p, os, FLAG_ITEM, outer, - context, "\\family", - context.font.family, "roman"); - - else if (t.cs() == "textsf") - parse_text_attributes(p, os, FLAG_ITEM, outer, - context, "\\family", - context.font.family, "sans"); - - else if (t.cs() == "texttt") - parse_text_attributes(p, os, FLAG_ITEM, outer, - context, "\\family", - context.font.family, "typewriter"); - - else if (t.cs() == "textmd") - parse_text_attributes(p, os, FLAG_ITEM, outer, - context, "\\series", - context.font.series, "medium"); - - else if (t.cs() == "textbf") - parse_text_attributes(p, os, FLAG_ITEM, outer, - context, "\\series", - context.font.series, "bold"); - - else if (t.cs() == "textup") - parse_text_attributes(p, os, FLAG_ITEM, outer, - context, "\\shape", - context.font.shape, "up"); - - else if (t.cs() == "textit") - parse_text_attributes(p, os, FLAG_ITEM, outer, - context, "\\shape", - context.font.shape, "italic"); - - else if (t.cs() == "textsl") - parse_text_attributes(p, os, FLAG_ITEM, outer, - context, "\\shape", - context.font.shape, "slanted"); - - else if (t.cs() == "textsc") - parse_text_attributes(p, os, FLAG_ITEM, outer, - context, "\\shape", - context.font.shape, "smallcaps"); - - else if (t.cs() == "textnormal" || t.cs() == "normalfont") { - context.check_layout(os); - Font oldFont = context.font; - context.font.init(); - context.font.size = oldFont.size; - os << "\n\\family " << context.font.family << "\n"; - os << "\n\\series " << context.font.series << "\n"; - os << "\n\\shape " << context.font.shape << "\n"; - if (t.cs() == "textnormal") { - parse_text_snippet(p, os, FLAG_ITEM, outer, context); - output_font_change(os, context.font, oldFont); - context.font = oldFont; - } else - eat_whitespace(p, os, context, false); - } - - else if (t.cs() == "underbar") { - // Do NOT handle \underline. - // \underbar cuts through y, g, q, p etc., - // \underline does not. - context.check_layout(os); - os << "\n\\bar under\n"; - parse_text_snippet(p, os, FLAG_ITEM, outer, context); - context.check_layout(os); - os << "\n\\bar default\n"; - } - - else if (t.cs() == "emph" || t.cs() == "noun") { - context.check_layout(os); - os << "\n\\" << t.cs() << " on\n"; - parse_text_snippet(p, os, FLAG_ITEM, outer, context); - context.check_layout(os); - os << "\n\\" << t.cs() << " default\n"; - } - - else if (use_natbib && - is_known(t.cs(), known_natbib_commands) && - ((t.cs() != "citefullauthor" && - t.cs() != "citeyear" && - t.cs() != "citeyearpar") || - p.next_token().asInput() != "*")) { - context.check_layout(os); - // tex lyx - // \citet[before][after]{a} \citet[after][before]{a} - // \citet[before][]{a} \citet[][before]{a} - // \citet[after]{a} \citet[after]{a} - // \citet{a} \citet{a} - string command = '\\' + t.cs(); - if (p.next_token().asInput() == "*") { - command += '*'; - p.get_token(); - } - if (command == "\\citefullauthor") - // alternative name for "\\citeauthor*" - command = "\\citeauthor*"; - - // text before the citation - string before; - // text after the citation - string after; - - boost::tie(before, after) = getCiteArguments(p, true); - if (command == "\\cite") { - // \cite without optional argument means - // \citet, \cite with at least one optional - // argument means \citep. - if (before.empty() && after.empty()) - command = "\\citet"; - else - command = "\\citep"; - } - if (before.empty() && after == "[]") - // avoid \citet[]{a} - after.erase(); - else if (before == "[]" && after == "[]") { - // avoid \citet[][]{a} - before.erase(); - after.erase(); - } - begin_inset(os, "LatexCommand "); - os << command << after << before - << '{' << p.verbatim_item() << "}\n"; - end_inset(os); - } - - else if (use_jurabib && - is_known(t.cs(), known_jurabib_commands)) { - context.check_layout(os); - string const command = '\\' + t.cs(); - char argumentOrder = '\0'; - vector const & options = used_packages["jurabib"]; - if (std::find(options.begin(), options.end(), - "natbiborder") != options.end()) - argumentOrder = 'n'; - else if (std::find(options.begin(), options.end(), - "jurabiborder") != options.end()) - argumentOrder = 'j'; - - // text before the citation - string before; - // text after the citation - string after; - - boost::tie(before, after) = - getCiteArguments(p, argumentOrder != 'j'); - string const citation = p.verbatim_item(); - if (!before.empty() && argumentOrder == '\0') { - cerr << "Warning: Assuming argument order " - "of jurabib version 0.6 for\n'" - << command << before << after << '{' - << citation << "}'.\n" - "Add 'jurabiborder' to the jurabib " - "package options if you used an\n" - "earlier jurabib version." << endl; - } - begin_inset(os, "LatexCommand "); - os << command << after << before - << '{' << citation << "}\n"; - end_inset(os); - } - - else if (is_known(t.cs(), known_latex_commands)) { - // This needs to be after the check for natbib and - // jurabib commands, because "cite" has different - // arguments with natbib and jurabib. - context.check_layout(os); - begin_inset(os, "LatexCommand "); - os << '\\' << t.cs(); - // lyx cannot handle newlines in a latex command - // FIXME: Move the substitution into parser::getOpt()? - os << subst(p.getOpt(), "\n", " "); - os << subst(p.getOpt(), "\n", " "); - os << '{' << subst(p.verbatim_item(), "\n", " ") << "}\n"; - end_inset(os); - } - - else if (is_known(t.cs(), known_quotes)) { - char const * const * where = is_known(t.cs(), known_quotes); - context.check_layout(os); - begin_inset(os, "Quotes "); - os << known_coded_quotes[where - known_quotes]; - end_inset(os); - // LyX adds {} after the quote, so we have to eat - // spaces here if there are any before a possible - // {} pair. - eat_whitespace(p, os, context, false); - skip_braces(p); - } - - else if (is_known(t.cs(), known_sizes) && - context.new_layout_allowed) { - char const * const * where = is_known(t.cs(), known_sizes); - context.check_layout(os); - Font const oldFont = context.font; - context.font.size = known_coded_sizes[where - known_sizes]; - output_font_change(os, oldFont, context.font); - eat_whitespace(p, os, context, false); - } - - else if (is_known(t.cs(), known_font_families) && - context.new_layout_allowed) { - char const * const * where = - is_known(t.cs(), known_font_families); - context.check_layout(os); - Font const oldFont = context.font; - context.font.family = - known_coded_font_families[where - known_font_families]; - output_font_change(os, oldFont, context.font); - eat_whitespace(p, os, context, false); - } - - else if (is_known(t.cs(), known_font_series) && - context.new_layout_allowed) { - char const * const * where = - is_known(t.cs(), known_font_series); - context.check_layout(os); - Font const oldFont = context.font; - context.font.series = - known_coded_font_series[where - known_font_series]; - output_font_change(os, oldFont, context.font); - eat_whitespace(p, os, context, false); - } - - else if (is_known(t.cs(), known_font_shapes) && - context.new_layout_allowed) { - char const * const * where = - is_known(t.cs(), known_font_shapes); - context.check_layout(os); - Font const oldFont = context.font; - context.font.shape = - known_coded_font_shapes[where - known_font_shapes]; - output_font_change(os, oldFont, context.font); - eat_whitespace(p, os, context, false); - } - else if (is_known(t.cs(), known_old_font_families) && - context.new_layout_allowed) { - char const * const * where = - is_known(t.cs(), known_old_font_families); - context.check_layout(os); - Font const oldFont = context.font; - context.font.init(); - context.font.size = oldFont.size; - context.font.family = - known_coded_font_families[where - known_old_font_families]; - output_font_change(os, oldFont, context.font); - eat_whitespace(p, os, context, false); - } - - else if (is_known(t.cs(), known_old_font_series) && - context.new_layout_allowed) { - char const * const * where = - is_known(t.cs(), known_old_font_series); - context.check_layout(os); - Font const oldFont = context.font; - context.font.init(); - context.font.size = oldFont.size; - context.font.series = - known_coded_font_series[where - known_old_font_series]; - output_font_change(os, oldFont, context.font); - eat_whitespace(p, os, context, false); - } - - else if (is_known(t.cs(), known_old_font_shapes) && - context.new_layout_allowed) { - char const * const * where = - is_known(t.cs(), known_old_font_shapes); - context.check_layout(os); - Font const oldFont = context.font; - context.font.init(); - context.font.size = oldFont.size; - context.font.shape = - known_coded_font_shapes[where - known_old_font_shapes]; - output_font_change(os, oldFont, context.font); - eat_whitespace(p, os, context, false); - } - - else if (t.cs() == "LyX" || t.cs() == "TeX" - || t.cs() == "LaTeX") { - context.check_layout(os); - os << t.cs(); - skip_braces(p); // eat {} - } - - else if (t.cs() == "LaTeXe") { - context.check_layout(os); - os << "LaTeX2e"; - skip_braces(p); // eat {} - } - - else if (t.cs() == "ldots") { - context.check_layout(os); - skip_braces(p); - os << "\\SpecialChar \\ldots{}\n"; - } - - else if (t.cs() == "lyxarrow") { - context.check_layout(os); - os << "\\SpecialChar \\menuseparator\n"; - skip_braces(p); - } - - else if (t.cs() == "textcompwordmark") { - context.check_layout(os); - os << "\\SpecialChar \\textcompwordmark{}\n"; - skip_braces(p); - } - - else if (t.cs() == "@" && p.next_token().asInput() == ".") { - context.check_layout(os); - os << "\\SpecialChar \\@.\n"; - p.get_token(); - } - - else if (t.cs() == "-") { - context.check_layout(os); - os << "\\SpecialChar \\-\n"; - } - - else if (t.cs() == "textasciitilde") { - context.check_layout(os); - os << '~'; - skip_braces(p); - } - - else if (t.cs() == "textasciicircum") { - context.check_layout(os); - os << '^'; - skip_braces(p); - } - - else if (t.cs() == "textbackslash") { - context.check_layout(os); - os << "\n\\backslash\n"; - skip_braces(p); - } - - else if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#" - || t.cs() == "$" || t.cs() == "{" || t.cs() == "}" - || t.cs() == "%") { - context.check_layout(os); - os << t.cs(); - } - - else if (t.cs() == "char") { - context.check_layout(os); - if (p.next_token().character() == '`') { - p.get_token(); - if (p.next_token().cs() == "\"") { - p.get_token(); - os << '"'; - skip_braces(p); - } else { - handle_ert(os, "\\char`", context); - } - } else { - handle_ert(os, "\\char", context); - } - } - - else if (t.cs() == "verb") { - context.check_layout(os); - char const delimiter = p.next_token().character(); - string const arg = p.getArg(delimiter, delimiter); - ostringstream oss; - oss << "\\verb" << delimiter << arg << delimiter; - handle_ert(os, oss.str(), context); - } - - else if (t.cs() == "\"") { - context.check_layout(os); - string const name = p.verbatim_item(); - if (name == "a") os << 'ä'; - else if (name == "o") os << 'ö'; - else if (name == "u") os << 'ü'; - else if (name == "A") os << 'Ä'; - else if (name == "O") os << 'Ö'; - else if (name == "U") os << 'Ü'; - else handle_ert(os, "\"{" + name + "}", context); - } - - // Problem: \= creates a tabstop inside the tabbing environment - // and else an accent. In the latter case we really would want - // \={o} instead of \= o. - else if (t.cs() == "=" && (flags & FLAG_TABBING)) - handle_ert(os, t.asInput(), context); - - else if (t.cs() == "H" || t.cs() == "c" || t.cs() == "^" - || t.cs() == "'" || t.cs() == "`" - || t.cs() == "~" || t.cs() == "." || t.cs() == "=") { - // we need the trim as the LyX parser chokes on such spaces - // The argument of InsetLatexAccent is parsed as a - // subset of LaTeX, so don't parse anything here, - // but use the raw argument. - // Otherwise we would convert \~{\i} wrongly. - // This will of course not translate \~{\ss} to \~{ß}, - // but that does at least compile and does only look - // strange on screen. - context.check_layout(os); - os << "\\i \\" << t.cs() << "{" - << trim(p.verbatim_item(), " ") - << "}\n"; - } - - else if (t.cs() == "ss") { - context.check_layout(os); - os << "ß"; - skip_braces(p); // eat {} - } - - else if (t.cs() == "i" || t.cs() == "j" || t.cs() == "l" || - t.cs() == "L") { - context.check_layout(os); - os << "\\i \\" << t.cs() << "{}\n"; - skip_braces(p); // eat {} - } - - else if (t.cs() == "\\") { - context.check_layout(os); - string const next = p.next_token().asInput(); - if (next == "[") - handle_ert(os, "\\\\" + p.getOpt(), context); - else if (next == "*") { - p.get_token(); - handle_ert(os, "\\\\*" + p.getOpt(), context); - } - else { - os << "\n\\newline\n"; - } - } - - else if (t.cs() == "input" || t.cs() == "include" - || t.cs() == "verbatiminput") { - string name = '\\' + t.cs(); - if (t.cs() == "verbatiminput" - && p.next_token().asInput() == "*") - name += p.get_token().asInput(); - context.check_layout(os); - begin_inset(os, "Include "); - string filename(normalize_filename(p.getArg('{', '}'))); - string const path = getMasterFilePath(); - // We want to preserve relative / absolute filenames, - // therefore path is only used for testing - // FIXME UNICODE encoding of filename and path may be - // wrong (makeAbsPath expects utf8) - if (t.cs() == "include" && - !fs::exists(makeAbsPath(filename, path).toFilesystemEncoding())) { - // The file extension is probably missing. - // Now try to find it out. - string const tex_name = - find_file(filename, path, - known_tex_extensions); - if (!tex_name.empty()) - filename = tex_name; - } - // FIXME UNICODE encoding of filename and path may be - // wrong (makeAbsPath expects utf8) - if (fs::exists(makeAbsPath(filename, path).toFilesystemEncoding())) { - string const abstexname = - makeAbsPath(filename, path).absFilename(); - string const abslyxname = - changeExtension(abstexname, ".lyx"); - fix_relative_filename(filename); - string const lyxname = - changeExtension(filename, ".lyx"); - if (t.cs() != "verbatiminput" && - tex2lyx(abstexname, FileName(abslyxname))) { - os << name << '{' << lyxname << "}\n"; - } else { - os << name << '{' << filename << "}\n"; - } - } else { - cerr << "Warning: Could not find included file '" - << filename << "'." << endl; - os << name << '{' << filename << "}\n"; - } - os << "preview false\n"; - end_inset(os); - } - - else if (t.cs() == "bibliographystyle") { - // store new bibliographystyle - bibliographystyle = p.verbatim_item(); - // output new bibliographystyle. - // This is only necessary if used in some other macro than \bibliography. - handle_ert(os, "\\bibliographystyle{" + bibliographystyle + "}", context); - } - - else if (t.cs() == "bibliography") { - context.check_layout(os); - begin_inset(os, "LatexCommand "); - os << "\\bibtex"; - // Do we have a bibliographystyle set? - if (!bibliographystyle.empty()) { - os << '[' << bibliographystyle << ']'; - } - os << '{' << p.verbatim_item() << "}\n"; - end_inset(os); - } - - else if (t.cs() == "parbox") - parse_box(p, os, FLAG_ITEM, outer, context, true); - - else if (t.cs() == "smallskip" || - t.cs() == "medskip" || - t.cs() == "bigskip" || - t.cs() == "vfill") { - context.check_layout(os); - begin_inset(os, "VSpace "); - os << t.cs(); - end_inset(os); - skip_braces(p); - } - - else if (is_known(t.cs(), known_spaces)) { - char const * const * where = is_known(t.cs(), known_spaces); - context.check_layout(os); - begin_inset(os, "InsetSpace "); - os << '\\' << known_coded_spaces[where - known_spaces] - << '\n'; - // LaTeX swallows whitespace after all spaces except - // "\\,". We have to do that here, too, because LyX - // adds "{}" which would make the spaces significant. - if (t.cs() != ",") - eat_whitespace(p, os, context, false); - // LyX adds "{}" after all spaces except "\\ " and - // "\\,", so we have to remove "{}". - // "\\,{}" is equivalent to "\\," in LaTeX, so we - // remove the braces after "\\,", too. - if (t.cs() != " ") - skip_braces(p); - } - - else if (t.cs() == "newpage" || - t.cs() == "clearpage" || - t.cs() == "cleardoublepage") { - context.check_layout(os); - // FIXME: what about \\pagebreak? - os << "\n\\" << t.cs() << "\n"; - skip_braces(p); // eat {} - } - - else if (t.cs() == "newcommand" || - t.cs() == "providecommand" || - t.cs() == "renewcommand") { - // these could be handled by parse_command(), but - // we need to call add_known_command() here. - string name = t.asInput(); - if (p.next_token().asInput() == "*") { - // Starred form. Eat '*' - p.get_token(); - name += '*'; - } - string const command = p.verbatim_item(); - string const opt1 = p.getOpt(); - string const opt2 = p.getFullOpt(); - add_known_command(command, opt1, !opt2.empty()); - string const ert = name + '{' + command + '}' + - opt1 + opt2 + - '{' + p.verbatim_item() + '}'; - handle_ert(os, ert, context); - } - - else if (t.cs() == "vspace") { - bool starred = false; - if (p.next_token().asInput() == "*") { - p.get_token(); - starred = true; - } - string const length = p.verbatim_item(); - string unit; - string valstring; - bool valid = splitLatexLength(length, valstring, unit); - bool known_vspace = false; - bool known_unit = false; - double value; - if (valid) { - istringstream iss(valstring); - iss >> value; - if (value == 1.0) { - if (unit == "\\smallskipamount") { - unit = "smallskip"; - known_vspace = true; - } else if (unit == "\\medskipamount") { - unit = "medskip"; - known_vspace = true; - } else if (unit == "\\bigskipamount") { - unit = "bigskip"; - known_vspace = true; - } else if (unit == "\\fill") { - unit = "vfill"; - known_vspace = true; - } - } - if (!known_vspace) { - switch (unitFromString(unit)) { - case LyXLength::SP: - case LyXLength::PT: - case LyXLength::BP: - case LyXLength::DD: - case LyXLength::MM: - case LyXLength::PC: - case LyXLength::CC: - case LyXLength::CM: - case LyXLength::IN: - case LyXLength::EX: - case LyXLength::EM: - case LyXLength::MU: - known_unit = true; - break; - default: - break; - } - } - } - - if (known_unit || known_vspace) { - // Literal length or known variable - context.check_layout(os); - begin_inset(os, "VSpace "); - if (known_unit) - os << value; - os << unit; - if (starred) - os << '*'; - end_inset(os); - } else { - // LyX can't handle other length variables in Inset VSpace - string name = t.asInput(); - if (starred) - name += '*'; - if (valid) { - if (value == 1.0) - handle_ert(os, name + '{' + unit + '}', context); - else if (value == -1.0) - handle_ert(os, name + "{-" + unit + '}', context); - else - handle_ert(os, name + '{' + valstring + unit + '}', context); - } else - handle_ert(os, name + '{' + length + '}', context); - } - } - - else { - //cerr << "#: " << t << " mode: " << mode << endl; - // heuristic: read up to next non-nested space - /* - string s = t.asInput(); - string z = p.verbatim_item(); - while (p.good() && z != " " && z.size()) { - //cerr << "read: " << z << endl; - s += z; - z = p.verbatim_item(); - } - cerr << "found ERT: " << s << endl; - handle_ert(os, s + ' ', context); - */ - string name = t.asInput(); - if (p.next_token().asInput() == "*") { - // Starred commands like \vspace*{} - p.get_token(); // Eat '*' - name += '*'; - } - if (! parse_command(name, p, os, outer, context)) - handle_ert(os, name, context); - } - - if (flags & FLAG_LEAVE) { - flags &= ~FLAG_LEAVE; - break; - } - } -} - -// }]) - - -} // namespace lyx diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp new file mode 100644 index 0000000000..136903a283 --- /dev/null +++ b/src/tex2lyx/text.cpp @@ -0,0 +1,2389 @@ +/** + * \file tex2lyx/text.cpp + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author André Pönitz + * \author Jean-Marc Lasgouttes + * + * Full author contact details are available in file CREDITS. + */ + +// {[( + +#include + +#include "tex2lyx.h" +#include "Context.h" +#include "FloatList.h" +#include "lengthcommon.h" +#include "support/lstrings.h" +#include "support/convert.h" +#include "support/filetools.h" + +#include +#include + +#include +#include +#include +#include + + +namespace lyx { + +using support::addExtension; +using support::changeExtension; +using support::FileName; +using support::makeAbsPath; +using support::makeRelPath; +using support::rtrim; +using support::suffixIs; +using support::contains; +using support::subst; + +using std::cerr; +using std::endl; + +using std::map; +using std::ostream; +using std::ostringstream; +using std::istringstream; +using std::string; +using std::vector; + +namespace fs = boost::filesystem; + + +void parse_text_in_inset(Parser & p, ostream & os, unsigned flags, bool outer, + Context const & context) +{ + Context newcontext(true, context.textclass); + newcontext.font = context.font; + parse_text(p, os, flags, outer, newcontext); + newcontext.check_end_layout(os); +} + + +namespace { + +/// parses a paragraph snippet, useful for example for \\emph{...} +void parse_text_snippet(Parser & p, ostream & os, unsigned flags, bool outer, + Context & context) +{ + Context newcontext(context); + // Don't inherit the extra stuff + newcontext.extra_stuff.clear(); + parse_text(p, os, flags, outer, newcontext); + // Make sure that we don't create invalid .lyx files + context.need_layout = newcontext.need_layout; + context.need_end_layout = newcontext.need_end_layout; +} + + +/*! + * Thin wrapper around parse_text_snippet() using a string. + * + * We completely ignore \c context.need_layout and \c context.need_end_layout, + * because our return value is not used directly (otherwise the stream version + * of parse_text_snippet() could be used). That means that the caller needs + * to do layout management manually. + * This is intended to parse text that does not create any layout changes. + */ +string parse_text_snippet(Parser & p, unsigned flags, const bool outer, + Context & context) +{ + Context newcontext(context); + newcontext.need_layout = false; + newcontext.need_end_layout = false; + newcontext.new_layout_allowed = false; + // Avoid warning by Context::~Context() + newcontext.extra_stuff.clear(); + ostringstream os; + parse_text_snippet(p, os, flags, outer, newcontext); + return os.str(); +} + + +char const * const known_latex_commands[] = { "ref", "cite", "label", "index", +"printindex", "pageref", "url", "vref", "vpageref", "prettyref", "eqref", 0 }; + +/*! + * natbib commands. + * We can't put these into known_latex_commands because the argument order + * is reversed in lyx if there are 2 arguments. + * The starred forms are also known. + */ +char const * const known_natbib_commands[] = { "cite", "citet", "citep", +"citealt", "citealp", "citeauthor", "citeyear", "citeyearpar", +"citefullauthor", "Citet", "Citep", "Citealt", "Citealp", "Citeauthor", 0 }; + +/*! + * jurabib commands. + * We can't put these into known_latex_commands because the argument order + * is reversed in lyx if there are 2 arguments. + * No starred form other than "cite*" known. + */ +char const * const known_jurabib_commands[] = { "cite", "citet", "citep", +"citealt", "citealp", "citeauthor", "citeyear", "citeyearpar", +// jurabib commands not (yet) supported by LyX: +// "fullcite", +// "footcite", "footcitet", "footcitep", "footcitealt", "footcitealp", +// "footciteauthor", "footciteyear", "footciteyearpar", +"citefield", "citetitle", "cite*", 0 }; + +/// LaTeX names for quotes +char const * const known_quotes[] = { "glqq", "grqq", "quotedblbase", +"textquotedblleft", "quotesinglbase", "guilsinglleft", "guilsinglright", 0}; + +/// the same as known_quotes with .lyx names +char const * const known_coded_quotes[] = { "gld", "grd", "gld", +"grd", "gls", "fls", "frs", 0}; + +/// LaTeX names for font sizes +char const * const known_sizes[] = { "tiny", "scriptsize", "footnotesize", +"small", "normalsize", "large", "Large", "LARGE", "huge", "Huge", 0}; + +/// the same as known_sizes with .lyx names +char const * const known_coded_sizes[] = { "tiny", "scriptsize", "footnotesize", +"small", "normal", "large", "larger", "largest", "huge", "giant", 0}; + +/// LaTeX 2.09 names for font families +char const * const known_old_font_families[] = { "rm", "sf", "tt", 0}; + +/// LaTeX names for font families +char const * const known_font_families[] = { "rmfamily", "sffamily", +"ttfamily", 0}; + +/// the same as known_old_font_families and known_font_families with .lyx names +char const * const known_coded_font_families[] = { "roman", "sans", +"typewriter", 0}; + +/// LaTeX 2.09 names for font series +char const * const known_old_font_series[] = { "bf", 0}; + +/// LaTeX names for font series +char const * const known_font_series[] = { "bfseries", "mdseries", 0}; + +/// the same as known_old_font_series and known_font_series with .lyx names +char const * const known_coded_font_series[] = { "bold", "medium", 0}; + +/// LaTeX 2.09 names for font shapes +char const * const known_old_font_shapes[] = { "it", "sl", "sc", 0}; + +/// LaTeX names for font shapes +char const * const known_font_shapes[] = { "itshape", "slshape", "scshape", +"upshape", 0}; + +/// the same as known_old_font_shapes and known_font_shapes with .lyx names +char const * const known_coded_font_shapes[] = { "italic", "slanted", +"smallcaps", "up", 0}; + +/*! + * Graphics file extensions known by the dvips driver of the graphics package. + * These extensions are used to complete the filename of an included + * graphics file if it does not contain an extension. + * The order must be the same that latex uses to find a file, because we + * will use the first extension that matches. + * This is only an approximation for the common cases. If we would want to + * do it right in all cases, we would need to know which graphics driver is + * used and know the extensions of every driver of the graphics package. + */ +char const * const known_dvips_graphics_formats[] = {"eps", "ps", "eps.gz", +"ps.gz", "eps.Z", "ps.Z", 0}; + +/*! + * Graphics file extensions known by the pdftex driver of the graphics package. + * \sa known_dvips_graphics_formats + */ +char const * const known_pdftex_graphics_formats[] = {"png", "pdf", "jpg", +"mps", "tif", 0}; + +/*! + * Known file extensions for TeX files as used by \\include. + */ +char const * const known_tex_extensions[] = {"tex", 0}; + +/// spaces known by InsetSpace +char const * const known_spaces[] = { " ", "space", ",", "thinspace", "quad", +"qquad", "enspace", "enskip", "negthinspace", 0}; + +/// the same as known_spaces with .lyx names +char const * const known_coded_spaces[] = { "space{}", "space{}", +"thinspace{}", "thinspace{}", "quad{}", "qquad{}", "enspace{}", "enskip{}", +"negthinspace{}", 0}; + + +/// splits "x=z, y=b" into a map +map split_map(string const & s) +{ + map res; + vector v; + split(s, v); + for (size_t i = 0; i < v.size(); ++i) { + size_t const pos = v[i].find('='); + string const index = v[i].substr(0, pos); + string const value = v[i].substr(pos + 1, string::npos); + res[trim(index)] = trim(value); + } + return res; +} + + +/*! + * Split a LaTeX length into value and unit. + * The latter can be a real unit like "pt", or a latex length variable + * like "\textwidth". The unit may contain additional stuff like glue + * lengths, but we don't care, because such lengths are ERT anyway. + * \returns true if \p value and \p unit are valid. + */ +bool splitLatexLength(string const & len, string & value, string & unit) +{ + if (len.empty()) + return false; + const string::size_type i = len.find_first_not_of(" -+0123456789.,"); + //'4,5' is a valid LaTeX length number. Change it to '4.5' + string const length = subst(len, ',', '.'); + if (i == string::npos) + return false; + if (i == 0) { + if (len[0] == '\\') { + // We had something like \textwidth without a factor + value = "1.0"; + } else { + return false; + } + } else { + value = trim(string(length, 0, i)); + } + if (value == "-") + value = "-1.0"; + // 'cM' is a valid LaTeX length unit. Change it to 'cm' + if (contains(len, '\\')) + unit = trim(string(len, i)); + else + unit = support::ascii_lowercase(trim(string(len, i))); + return true; +} + + +/// A simple function to translate a latex length to something lyx can +/// understand. Not perfect, but rather best-effort. +bool translate_len(string const & length, string & valstring, string & unit) +{ + if (!splitLatexLength(length, valstring, unit)) + return false; + // LyX uses percent values + double value; + istringstream iss(valstring); + iss >> value; + value *= 100; + ostringstream oss; + oss << value; + string const percentval = oss.str(); + // a normal length + if (unit.empty() || unit[0] != '\\') + return true; + string::size_type const i = unit.find(' '); + string const endlen = (i == string::npos) ? string() : string(unit, i); + if (unit == "\\textwidth") { + valstring = percentval; + unit = "text%" + endlen; + } else if (unit == "\\columnwidth") { + valstring = percentval; + unit = "col%" + endlen; + } else if (unit == "\\paperwidth") { + valstring = percentval; + unit = "page%" + endlen; + } else if (unit == "\\linewidth") { + valstring = percentval; + unit = "line%" + endlen; + } else if (unit == "\\paperheight") { + valstring = percentval; + unit = "pheight%" + endlen; + } else if (unit == "\\textheight") { + valstring = percentval; + unit = "theight%" + endlen; + } + return true; +} + +} + + +string translate_len(string const & length) +{ + string unit; + string value; + if (translate_len(length, value, unit)) + return value + unit; + // If the input is invalid, return what we have. + return length; +} + + +namespace { + +/*! + * Translates a LaTeX length into \p value, \p unit and + * \p special parts suitable for a box inset. + * The difference from translate_len() is that a box inset knows about + * some special "units" that are stored in \p special. + */ +void translate_box_len(string const & length, string & value, string & unit, string & special) +{ + if (translate_len(length, value, unit)) { + if (unit == "\\height" || unit == "\\depth" || + unit == "\\totalheight" || unit == "\\width") { + special = unit.substr(1); + // The unit is not used, but LyX requires a dummy setting + unit = "in"; + } else + special = "none"; + } else { + value.clear(); + unit = length; + special = "none"; + } +} + + +/*! + * Find a file with basename \p name in path \p path and an extension + * in \p extensions. + */ +string find_file(string const & name, string const & path, + char const * const * extensions) +{ + // FIXME UNICODE encoding of name and path may be wrong (makeAbsPath + // expects utf8) + for (char const * const * what = extensions; *what; ++what) { + string const trial = addExtension(name, *what); + if (fs::exists(makeAbsPath(trial, path).toFilesystemEncoding())) + return trial; + } + return string(); +} + + +void begin_inset(ostream & os, string const & name) +{ + os << "\n\\begin_inset " << name; +} + + +void end_inset(ostream & os) +{ + os << "\n\\end_inset\n\n"; +} + + +void skip_braces(Parser & p) +{ + if (p.next_token().cat() != catBegin) + return; + p.get_token(); + if (p.next_token().cat() == catEnd) { + p.get_token(); + return; + } + p.putback(); +} + + +void handle_ert(ostream & os, string const & s, Context & context) +{ + // We must have a valid layout before outputting the ERT inset. + context.check_layout(os); + Context newcontext(true, context.textclass); + begin_inset(os, "ERT"); + os << "\nstatus collapsed\n"; + newcontext.check_layout(os); + for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) { + if (*it == '\\') + os << "\n\\backslash\n"; + else if (*it == '\n') { + newcontext.new_paragraph(os); + newcontext.check_layout(os); + } else + os << *it; + } + newcontext.check_end_layout(os); + end_inset(os); +} + + +void handle_comment(ostream & os, string const & s, Context & context) +{ + // TODO: Handle this better + Context newcontext(true, context.textclass); + begin_inset(os, "ERT"); + os << "\nstatus collapsed\n"; + newcontext.check_layout(os); + for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) { + if (*it == '\\') + os << "\n\\backslash\n"; + else + os << *it; + } + // make sure that our comment is the last thing on the line + newcontext.new_paragraph(os); + newcontext.check_layout(os); + newcontext.check_end_layout(os); + end_inset(os); +} + + +class isLayout : public std::unary_function { +public: + isLayout(string const name) : name_(name) {} + bool operator()(LyXLayout_ptr const & ptr) const { + return ptr->latexname() == name_; + } +private: + string const name_; +}; + + +LyXLayout_ptr findLayout(LyXTextClass const & textclass, + string const & name) +{ + LyXTextClass::const_iterator beg = textclass.begin(); + LyXTextClass::const_iterator end = textclass.end(); + + LyXTextClass::const_iterator + it = std::find_if(beg, end, isLayout(name)); + + return (it == end) ? LyXLayout_ptr() : *it; +} + + +void eat_whitespace(Parser &, ostream &, Context &, bool); + + +void output_command_layout(ostream & os, Parser & p, bool outer, + Context & parent_context, + LyXLayout_ptr newlayout) +{ + parent_context.check_end_layout(os); + Context context(true, parent_context.textclass, newlayout, + parent_context.layout, parent_context.font); + if (parent_context.deeper_paragraph) { + // We are beginning a nested environment after a + // deeper paragraph inside the outer list environment. + // Therefore we don't need to output a "begin deeper". + context.need_end_deeper = true; + } + context.check_deeper(os); + context.check_layout(os); + if (context.layout->optionalargs > 0) { + eat_whitespace(p, os, context, false); + if (p.next_token().character() == '[') { + p.get_token(); // eat '[' + begin_inset(os, "OptArg\n"); + os << "status collapsed\n\n"; + parse_text_in_inset(p, os, FLAG_BRACK_LAST, outer, context); + end_inset(os); + eat_whitespace(p, os, context, false); + } + } + parse_text(p, os, FLAG_ITEM, outer, context); + context.check_end_layout(os); + if (parent_context.deeper_paragraph) { + // We must suppress the "end deeper" because we + // suppressed the "begin deeper" above. + context.need_end_deeper = false; + } + context.check_end_deeper(os); + // We don't need really a new paragraph, but + // we must make sure that the next item gets a \begin_layout. + parent_context.new_paragraph(os); +} + + +/*! + * Output a space if necessary. + * This function gets called for every whitespace token. + * + * We have three cases here: + * 1. A space must be suppressed. Example: The lyxcode case below + * 2. A space may be suppressed. Example: Spaces before "\par" + * 3. A space must not be suppressed. Example: A space between two words + * + * We currently handle only 1. and 3 and from 2. only the case of + * spaces before newlines as a side effect. + * + * 2. could be used to suppress as many spaces as possible. This has two effects: + * - Reimporting LyX generated LaTeX files changes almost no whitespace + * - Superflous whitespace from non LyX generated LaTeX files is removed. + * The drawback is that the logic inside the function becomes + * complicated, and that is the reason why it is not implemented. + */ +void check_space(Parser const & p, ostream & os, Context & context) +{ + Token const next = p.next_token(); + Token const curr = p.curr_token(); + // A space before a single newline and vice versa must be ignored + // LyX emits a newline before \end{lyxcode}. + // This newline must be ignored, + // otherwise LyX will add an additional protected space. + if (next.cat() == catSpace || + next.cat() == catNewline || + (next.cs() == "end" && context.layout->free_spacing && curr.cat() == catNewline)) { + return; + } + context.check_layout(os); + os << ' '; +} + + +/*! + * Parse all arguments of \p command + */ +void parse_arguments(string const & command, + vector const & template_arguments, + Parser & p, ostream & os, bool outer, Context & context) +{ + string ert = command; + size_t no_arguments = template_arguments.size(); + for (size_t i = 0; i < no_arguments; ++i) { + switch (template_arguments[i]) { + case required: + // This argument contains regular LaTeX + handle_ert(os, ert + '{', context); + eat_whitespace(p, os, context, false); + parse_text(p, os, FLAG_ITEM, outer, context); + ert = "}"; + break; + case verbatim: + // This argument may contain special characters + ert += '{' + p.verbatim_item() + '}'; + break; + case optional: + ert += p.getOpt(); + break; + } + } + handle_ert(os, ert, context); +} + + +/*! + * Check whether \p command is a known command. If yes, + * handle the command with all arguments. + * \return true if the command was parsed, false otherwise. + */ +bool parse_command(string const & command, Parser & p, ostream & os, + bool outer, Context & context) +{ + if (known_commands.find(command) != known_commands.end()) { + parse_arguments(command, known_commands[command], p, os, + outer, context); + return true; + } + return false; +} + + +/// Parses a minipage or parbox +void parse_box(Parser & p, ostream & os, unsigned flags, bool outer, + Context & parent_context, bool use_parbox) +{ + string position; + string inner_pos; + string height_value = "0"; + string height_unit = "pt"; + string height_special = "none"; + string latex_height; + if (p.next_token().asInput() == "[") { + position = p.getArg('[', ']'); + if (position != "t" && position != "c" && position != "b") { + position = "c"; + cerr << "invalid position for minipage/parbox" << endl; + } + if (p.next_token().asInput() == "[") { + latex_height = p.getArg('[', ']'); + translate_box_len(latex_height, height_value, height_unit, height_special); + + if (p.next_token().asInput() == "[") { + inner_pos = p.getArg('[', ']'); + if (inner_pos != "c" && inner_pos != "t" && + inner_pos != "b" && inner_pos != "s") { + inner_pos = position; + cerr << "invalid inner_pos for minipage/parbox" + << endl; + } + } + } + } + string width_value; + string width_unit; + string const latex_width = p.verbatim_item(); + translate_len(latex_width, width_value, width_unit); + if (contains(width_unit, '\\') || contains(height_unit, '\\')) { + // LyX can't handle length variables + ostringstream ss; + if (use_parbox) + ss << "\\parbox"; + else + ss << "\\begin{minipage}"; + if (!position.empty()) + ss << '[' << position << ']'; + if (!latex_height.empty()) + ss << '[' << latex_height << ']'; + if (!inner_pos.empty()) + ss << '[' << inner_pos << ']'; + ss << "{" << latex_width << "}"; + if (use_parbox) + ss << '{'; + handle_ert(os, ss.str(), parent_context); + parent_context.new_paragraph(os); + parse_text_in_inset(p, os, flags, outer, parent_context); + if (use_parbox) + handle_ert(os, "}", parent_context); + else + handle_ert(os, "\\end{minipage}", parent_context); + } else { + // LyX does not like empty positions, so we have + // to set them to the LaTeX default values here. + if (position.empty()) + position = "c"; + if (inner_pos.empty()) + inner_pos = position; + parent_context.check_layout(os); + begin_inset(os, "Box Frameless\n"); + os << "position \"" << position << "\"\n"; + os << "hor_pos \"c\"\n"; + os << "has_inner_box 1\n"; + os << "inner_pos \"" << inner_pos << "\"\n"; + os << "use_parbox " << use_parbox << "\n"; + os << "width \"" << width_value << width_unit << "\"\n"; + os << "special \"none\"\n"; + os << "height \"" << height_value << height_unit << "\"\n"; + os << "height_special \"" << height_special << "\"\n"; + os << "status open\n\n"; + parse_text_in_inset(p, os, flags, outer, parent_context); + end_inset(os); +#ifdef PRESERVE_LAYOUT + // lyx puts a % after the end of the minipage + if (p.next_token().cat() == catNewline && p.next_token().cs().size() > 1) { + // new paragraph + //handle_comment(os, "%dummy", parent_context); + p.get_token(); + p.skip_spaces(); + parent_context.new_paragraph(os); + } + else if (p.next_token().cat() == catSpace || p.next_token().cat() == catNewline) { + //handle_comment(os, "%dummy", parent_context); + p.get_token(); + p.skip_spaces(); + // We add a protected space if something real follows + if (p.good() && p.next_token().cat() != catComment) { + os << "\\InsetSpace ~\n"; + } + } +#endif + } +} + + +/// parse an unknown environment +void parse_unknown_environment(Parser & p, string const & name, ostream & os, + unsigned flags, bool outer, + Context & parent_context) +{ + if (name == "tabbing") + // We need to remember that we have to handle '\=' specially + flags |= FLAG_TABBING; + + // We need to translate font changes and paragraphs inside the + // environment to ERT if we have a non standard font. + // Otherwise things like + // \large\begin{foo}\huge bar\end{foo} + // will not work. + bool const specialfont = + (parent_context.font != parent_context.normalfont); + bool const new_layout_allowed = parent_context.new_layout_allowed; + if (specialfont) + parent_context.new_layout_allowed = false; + handle_ert(os, "\\begin{" + name + "}", parent_context); + parse_text_snippet(p, os, flags, outer, parent_context); + handle_ert(os, "\\end{" + name + "}", parent_context); + if (specialfont) + parent_context.new_layout_allowed = new_layout_allowed; +} + + +void parse_environment(Parser & p, ostream & os, bool outer, + Context & parent_context) +{ + LyXLayout_ptr newlayout; + string const name = p.getArg('{', '}'); + const bool is_starred = suffixIs(name, '*'); + string const unstarred_name = rtrim(name, "*"); + active_environments.push_back(name); + + if (is_math_env(name)) { + parent_context.check_layout(os); + begin_inset(os, "Formula "); + os << "\\begin{" << name << "}"; + parse_math(p, os, FLAG_END, MATH_MODE); + os << "\\end{" << name << "}"; + end_inset(os); + } + + else if (name == "tabular" || name == "longtable") { + eat_whitespace(p, os, parent_context, false); + parent_context.check_layout(os); + begin_inset(os, "Tabular "); + handle_tabular(p, os, name == "longtable", parent_context); + end_inset(os); + p.skip_spaces(); + } + + else if (parent_context.textclass.floats().typeExist(unstarred_name)) { + eat_whitespace(p, os, parent_context, false); + parent_context.check_layout(os); + begin_inset(os, "Float " + unstarred_name + "\n"); + if (p.next_token().asInput() == "[") { + os << "placement " << p.getArg('[', ']') << '\n'; + } + os << "wide " << convert(is_starred) + << "\nsideways false" + << "\nstatus open\n\n"; + parse_text_in_inset(p, os, FLAG_END, outer, parent_context); + end_inset(os); + // We don't need really a new paragraph, but + // we must make sure that the next item gets a \begin_layout. + parent_context.new_paragraph(os); + p.skip_spaces(); + } + + else if (name == "minipage") { + eat_whitespace(p, os, parent_context, false); + parse_box(p, os, FLAG_END, outer, parent_context, false); + p.skip_spaces(); + } + + else if (name == "comment") { + eat_whitespace(p, os, parent_context, false); + parent_context.check_layout(os); + begin_inset(os, "Note Comment\n"); + os << "status open\n"; + parse_text_in_inset(p, os, FLAG_END, outer, parent_context); + end_inset(os); + p.skip_spaces(); + } + + else if (name == "lyxgreyedout") { + eat_whitespace(p, os, parent_context, false); + parent_context.check_layout(os); + begin_inset(os, "Note Greyedout\n"); + os << "status open\n"; + parse_text_in_inset(p, os, FLAG_END, outer, parent_context); + end_inset(os); + p.skip_spaces(); + } + + else if (!parent_context.new_layout_allowed) + parse_unknown_environment(p, name, os, FLAG_END, outer, + parent_context); + + // Alignment settings + else if (name == "center" || name == "flushleft" || name == "flushright" || + name == "centering" || name == "raggedright" || name == "raggedleft") { + eat_whitespace(p, os, parent_context, false); + // We must begin a new paragraph if not already done + if (! parent_context.atParagraphStart()) { + parent_context.check_end_layout(os); + parent_context.new_paragraph(os); + } + if (name == "flushleft" || name == "raggedright") + parent_context.add_extra_stuff("\\align left\n"); + else if (name == "flushright" || name == "raggedleft") + parent_context.add_extra_stuff("\\align right\n"); + else + parent_context.add_extra_stuff("\\align center\n"); + parse_text(p, os, FLAG_END, outer, parent_context); + // Just in case the environment is empty .. + parent_context.extra_stuff.erase(); + // We must begin a new paragraph to reset the alignment + parent_context.new_paragraph(os); + p.skip_spaces(); + } + + // The single '=' is meant here. + else if ((newlayout = findLayout(parent_context.textclass, name)).get() && + newlayout->isEnvironment()) { + eat_whitespace(p, os, parent_context, false); + Context context(true, parent_context.textclass, newlayout, + parent_context.layout, parent_context.font); + if (parent_context.deeper_paragraph) { + // We are beginning a nested environment after a + // deeper paragraph inside the outer list environment. + // Therefore we don't need to output a "begin deeper". + context.need_end_deeper = true; + } + parent_context.check_end_layout(os); + switch (context.layout->latextype) { + case LATEX_LIST_ENVIRONMENT: + context.extra_stuff = "\\labelwidthstring " + + p.verbatim_item() + '\n'; + p.skip_spaces(); + break; + case LATEX_BIB_ENVIRONMENT: + p.verbatim_item(); // swallow next arg + p.skip_spaces(); + break; + default: + break; + } + context.check_deeper(os); + parse_text(p, os, FLAG_END, outer, context); + context.check_end_layout(os); + if (parent_context.deeper_paragraph) { + // We must suppress the "end deeper" because we + // suppressed the "begin deeper" above. + context.need_end_deeper = false; + } + context.check_end_deeper(os); + parent_context.new_paragraph(os); + p.skip_spaces(); + } + + else if (name == "appendix") { + // This is no good latex style, but it works and is used in some documents... + eat_whitespace(p, os, parent_context, false); + parent_context.check_end_layout(os); + Context context(true, parent_context.textclass, parent_context.layout, + parent_context.layout, parent_context.font); + context.check_layout(os); + os << "\\start_of_appendix\n"; + parse_text(p, os, FLAG_END, outer, context); + context.check_end_layout(os); + p.skip_spaces(); + } + + else if (known_environments.find(name) != known_environments.end()) { + vector arguments = known_environments[name]; + // The last "argument" denotes wether we may translate the + // environment contents to LyX + // The default required if no argument is given makes us + // compatible with the reLyXre environment. + ArgumentType contents = arguments.empty() ? + required : + arguments.back(); + if (!arguments.empty()) + arguments.pop_back(); + // See comment in parse_unknown_environment() + bool const specialfont = + (parent_context.font != parent_context.normalfont); + bool const new_layout_allowed = + parent_context.new_layout_allowed; + if (specialfont) + parent_context.new_layout_allowed = false; + parse_arguments("\\begin{" + name + "}", arguments, p, os, + outer, parent_context); + if (contents == verbatim) + handle_ert(os, p.verbatimEnvironment(name), + parent_context); + else + parse_text_snippet(p, os, FLAG_END, outer, + parent_context); + handle_ert(os, "\\end{" + name + "}", parent_context); + if (specialfont) + parent_context.new_layout_allowed = new_layout_allowed; + } + + else + parse_unknown_environment(p, name, os, FLAG_END, outer, + parent_context); + + active_environments.pop_back(); +} + + +/// parses a comment and outputs it to \p os. +void parse_comment(Parser & p, ostream & os, Token const & t, Context & context) +{ + BOOST_ASSERT(t.cat() == catComment); + if (!t.cs().empty()) { + context.check_layout(os); + handle_comment(os, '%' + t.cs(), context); + if (p.next_token().cat() == catNewline) { + // A newline after a comment line starts a new + // paragraph + if (context.new_layout_allowed) { + if(!context.atParagraphStart()) + // Only start a new paragraph if not already + // done (we might get called recursively) + context.new_paragraph(os); + } else + handle_ert(os, "\n", context); + eat_whitespace(p, os, context, true); + } + } else { + // "%\n" combination + p.skip_spaces(); + } +} + + +/*! + * Reads spaces and comments until the first non-space, non-comment token. + * New paragraphs (double newlines or \\par) are handled like simple spaces + * if \p eatParagraph is true. + * Spaces are skipped, but comments are written to \p os. + */ +void eat_whitespace(Parser & p, ostream & os, Context & context, + bool eatParagraph) +{ + while (p.good()) { + Token const & t = p.get_token(); + if (t.cat() == catComment) + parse_comment(p, os, t, context); + else if ((! eatParagraph && p.isParagraph()) || + (t.cat() != catSpace && t.cat() != catNewline)) { + p.putback(); + return; + } + } +} + + +/*! + * Set a font attribute, parse text and reset the font attribute. + * \param attribute Attribute name (e.g. \\family, \\shape etc.) + * \param currentvalue Current value of the attribute. Is set to the new + * value during parsing. + * \param newvalue New value of the attribute + */ +void parse_text_attributes(Parser & p, ostream & os, unsigned flags, bool outer, + Context & context, string const & attribute, + string & currentvalue, string const & newvalue) +{ + context.check_layout(os); + string const oldvalue = currentvalue; + currentvalue = newvalue; + os << '\n' << attribute << ' ' << newvalue << "\n"; + parse_text_snippet(p, os, flags, outer, context); + context.check_layout(os); + os << '\n' << attribute << ' ' << oldvalue << "\n"; + currentvalue = oldvalue; +} + + +/// get the arguments of a natbib or jurabib citation command +std::pair getCiteArguments(Parser & p, bool natbibOrder) +{ + // We need to distinguish "" and "[]", so we can't use p.getOpt(). + + // text before the citation + string before; + // text after the citation + string after = p.getFullOpt(); + + if (!after.empty()) { + before = p.getFullOpt(); + if (natbibOrder && !before.empty()) + std::swap(before, after); + } + return std::make_pair(before, after); +} + + +/// Convert filenames with TeX macros and/or quotes to something LyX can understand +string const normalize_filename(string const & name) +{ + Parser p(trim(name, "\"")); + ostringstream os; + while (p.good()) { + Token const & t = p.get_token(); + if (t.cat() != catEscape) + os << t.asInput(); + else if (t.cs() == "lyxdot") { + // This is used by LyX for simple dots in relative + // names + os << '.'; + p.skip_spaces(); + } else if (t.cs() == "space") { + os << ' '; + p.skip_spaces(); + } else + os << t.asInput(); + } + return os.str(); +} + + +/// Convert \p name from TeX convention (relative to master file) to LyX +/// convention (relative to .lyx file) if it is relative +void fix_relative_filename(string & name) +{ + if (lyx::support::absolutePath(name)) + return; + // FIXME UNICODE encoding of name may be wrong (makeAbsPath expects + // utf8) + name = to_utf8(makeRelPath(from_utf8(makeAbsPath(name, getMasterFilePath()).absFilename()), + from_utf8(getParentFilePath()))); +} + + +/// Parse a NoWeb Scrap section. The initial "<<" is already parsed. +void parse_noweb(Parser & p, ostream & os, Context & context) +{ + // assemble the rest of the keyword + string name("<<"); + bool scrap = false; + while (p.good()) { + Token const & t = p.get_token(); + if (t.asInput() == ">" && p.next_token().asInput() == ">") { + name += ">>"; + p.get_token(); + scrap = (p.good() && p.next_token().asInput() == "="); + if (scrap) + name += p.get_token().asInput(); + break; + } + name += t.asInput(); + } + + if (!scrap || !context.new_layout_allowed || + !context.textclass.hasLayout("Scrap")) { + cerr << "Warning: Could not interpret '" << name + << "'. Ignoring it." << endl; + return; + } + + // We use new_paragraph instead of check_end_layout because the stuff + // following the noweb chunk needs to start with a \begin_layout. + // This may create a new paragraph even if there was none in the + // noweb file, but the alternative is an invalid LyX file. Since + // noweb code chunks are implemented with a layout style in LyX they + // always must be in an own paragraph. + context.new_paragraph(os); + Context newcontext(true, context.textclass, context.textclass["Scrap"]); + newcontext.check_layout(os); + os << name; + while (p.good()) { + Token const & t = p.get_token(); + // We abuse the parser a bit, because this is no TeX syntax + // at all. + if (t.cat() == catEscape) + os << subst(t.asInput(), "\\", "\n\\backslash\n"); + else + os << subst(t.asInput(), "\n", "\n\\newline\n"); + // The scrap chunk is ended by an @ at the beginning of a line. + // After the @ the line may contain a comment and/or + // whitespace, but nothing else. + if (t.asInput() == "@" && p.prev_token().cat() == catNewline && + (p.next_token().cat() == catSpace || + p.next_token().cat() == catNewline || + p.next_token().cat() == catComment)) { + while (p.good() && p.next_token().cat() == catSpace) + os << p.get_token().asInput(); + if (p.next_token().cat() == catComment) + // The comment includes a final '\n' + os << p.get_token().asInput(); + else { + if (p.next_token().cat() == catNewline) + p.get_token(); + os << '\n'; + } + break; + } + } + newcontext.check_end_layout(os); +} + +} // anonymous namespace + + +void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, + Context & context) +{ + LyXLayout_ptr newlayout; + // Store the latest bibliographystyle (needed for bibtex inset) + string bibliographystyle; + bool const use_natbib = used_packages.find("natbib") != used_packages.end(); + bool const use_jurabib = used_packages.find("jurabib") != used_packages.end(); + while (p.good()) { + Token const & t = p.get_token(); + +#ifdef FILEDEBUG + cerr << "t: " << t << " flags: " << flags << "\n"; +#endif + + if (flags & FLAG_ITEM) { + if (t.cat() == catSpace) + continue; + + flags &= ~FLAG_ITEM; + if (t.cat() == catBegin) { + // skip the brace and collect everything to the next matching + // closing brace + flags |= FLAG_BRACE_LAST; + continue; + } + + // handle only this single token, leave the loop if done + flags |= FLAG_LEAVE; + } + + if (t.character() == ']' && (flags & FLAG_BRACK_LAST)) + return; + + // + // cat codes + // + if (t.cat() == catMath) { + // we are inside some text mode thingy, so opening new math is allowed + context.check_layout(os); + begin_inset(os, "Formula "); + Token const & n = p.get_token(); + if (n.cat() == catMath && outer) { + // TeX's $$...$$ syntax for displayed math + os << "\\["; + parse_math(p, os, FLAG_SIMPLE, MATH_MODE); + os << "\\]"; + p.get_token(); // skip the second '$' token + } else { + // simple $...$ stuff + p.putback(); + os << '$'; + parse_math(p, os, FLAG_SIMPLE, MATH_MODE); + os << '$'; + } + end_inset(os); + } + + else if (t.cat() == catSuper || t.cat() == catSub) + cerr << "catcode " << t << " illegal in text mode\n"; + + // Basic support for english quotes. This should be + // extended to other quotes, but is not so easy (a + // left english quote is the same as a right german + // quote...) + else if (t.asInput() == "`" + && p.next_token().asInput() == "`") { + context.check_layout(os); + begin_inset(os, "Quotes "); + os << "eld"; + end_inset(os); + p.get_token(); + skip_braces(p); + } + else if (t.asInput() == "'" + && p.next_token().asInput() == "'") { + context.check_layout(os); + begin_inset(os, "Quotes "); + os << "erd"; + end_inset(os); + p.get_token(); + skip_braces(p); + } + + else if (t.asInput() == "<" + && p.next_token().asInput() == "<" && noweb_mode) { + p.get_token(); + parse_noweb(p, os, context); + } + + else if (t.cat() == catSpace || (t.cat() == catNewline && ! p.isParagraph())) + check_space(p, os, context); + + else if (t.character() == '[' && noweb_mode && + p.next_token().character() == '[') { + // These can contain underscores + p.putback(); + string const s = p.getFullOpt() + ']'; + if (p.next_token().character() == ']') + p.get_token(); + else + cerr << "Warning: Inserting missing ']' in '" + << s << "'." << endl; + handle_ert(os, s, context); + } + + else if (t.cat() == catLetter || + t.cat() == catOther || + t.cat() == catAlign || + t.cat() == catParameter) { + // This translates "&" to "\\&" which may be wrong... + context.check_layout(os); + os << t.character(); + } + + else if (p.isParagraph()) { + if (context.new_layout_allowed) + context.new_paragraph(os); + else + handle_ert(os, "\\par ", context); + eat_whitespace(p, os, context, true); + } + + else if (t.cat() == catActive) { + context.check_layout(os); + if (t.character() == '~') { + if (context.layout->free_spacing) + os << ' '; + else + os << "\\InsetSpace ~\n"; + } else + os << t.character(); + } + + else if (t.cat() == catBegin && + p.next_token().cat() == catEnd) { + // {} + Token const prev = p.prev_token(); + p.get_token(); + if (p.next_token().character() == '`' || + (prev.character() == '-' && + p.next_token().character() == '-')) + ; // ignore it in {}`` or -{}- + else + handle_ert(os, "{}", context); + + } + + else if (t.cat() == catBegin) { + context.check_layout(os); + // special handling of font attribute changes + Token const prev = p.prev_token(); + Token const next = p.next_token(); + Font const oldFont = context.font; + if (next.character() == '[' || + next.character() == ']' || + next.character() == '*') { + p.get_token(); + if (p.next_token().cat() == catEnd) { + os << next.character(); + p.get_token(); + } else { + p.putback(); + handle_ert(os, "{", context); + parse_text_snippet(p, os, + FLAG_BRACE_LAST, + outer, context); + handle_ert(os, "}", context); + } + } else if (! context.new_layout_allowed) { + handle_ert(os, "{", context); + parse_text_snippet(p, os, FLAG_BRACE_LAST, + outer, context); + handle_ert(os, "}", context); + } else if (is_known(next.cs(), known_sizes)) { + // next will change the size, so we must + // reset it here + parse_text_snippet(p, os, FLAG_BRACE_LAST, + outer, context); + if (!context.atParagraphStart()) + os << "\n\\size " + << context.font.size << "\n"; + } else if (is_known(next.cs(), known_font_families)) { + // next will change the font family, so we + // must reset it here + parse_text_snippet(p, os, FLAG_BRACE_LAST, + outer, context); + if (!context.atParagraphStart()) + os << "\n\\family " + << context.font.family << "\n"; + } else if (is_known(next.cs(), known_font_series)) { + // next will change the font series, so we + // must reset it here + parse_text_snippet(p, os, FLAG_BRACE_LAST, + outer, context); + if (!context.atParagraphStart()) + os << "\n\\series " + << context.font.series << "\n"; + } else if (is_known(next.cs(), known_font_shapes)) { + // next will change the font shape, so we + // must reset it here + parse_text_snippet(p, os, FLAG_BRACE_LAST, + outer, context); + if (!context.atParagraphStart()) + os << "\n\\shape " + << context.font.shape << "\n"; + } else if (is_known(next.cs(), known_old_font_families) || + is_known(next.cs(), known_old_font_series) || + is_known(next.cs(), known_old_font_shapes)) { + // next will change the font family, series + // and shape, so we must reset it here + parse_text_snippet(p, os, FLAG_BRACE_LAST, + outer, context); + if (!context.atParagraphStart()) + os << "\n\\family " + << context.font.family + << "\n\\series " + << context.font.series + << "\n\\shape " + << context.font.shape << "\n"; + } else { + handle_ert(os, "{", context); + parse_text_snippet(p, os, FLAG_BRACE_LAST, + outer, context); + handle_ert(os, "}", context); + } + } + + else if (t.cat() == catEnd) { + if (flags & FLAG_BRACE_LAST) { + return; + } + cerr << "stray '}' in text\n"; + handle_ert(os, "}", context); + } + + else if (t.cat() == catComment) + parse_comment(p, os, t, context); + + // + // control sequences + // + + else if (t.cs() == "(") { + context.check_layout(os); + begin_inset(os, "Formula"); + os << " \\("; + parse_math(p, os, FLAG_SIMPLE2, MATH_MODE); + os << "\\)"; + end_inset(os); + } + + else if (t.cs() == "[") { + context.check_layout(os); + begin_inset(os, "Formula"); + os << " \\["; + parse_math(p, os, FLAG_EQUATION, MATH_MODE); + os << "\\]"; + end_inset(os); + } + + else if (t.cs() == "begin") + parse_environment(p, os, outer, context); + + else if (t.cs() == "end") { + if (flags & FLAG_END) { + // eat environment name + string const name = p.getArg('{', '}'); + if (name != active_environment()) + cerr << "\\end{" + name + "} does not match \\begin{" + + active_environment() + "}\n"; + return; + } + p.error("found 'end' unexpectedly"); + } + + else if (t.cs() == "item") { + p.skip_spaces(); + string s; + bool optarg = false; + if (p.next_token().character() == '[') { + p.get_token(); // eat '[' + s = parse_text_snippet(p, FLAG_BRACK_LAST, + outer, context); + optarg = true; + } + context.set_item(); + context.check_layout(os); + if (context.has_item) { + // An item in an unknown list-like environment + // FIXME: Do this in check_layout()! + context.has_item = false; + if (optarg) + handle_ert(os, "\\item", context); + else + handle_ert(os, "\\item ", context); + } + if (optarg) { + if (context.layout->labeltype != LABEL_MANUAL) { + // lyx does not support \item[\mybullet] + // in itemize environments + handle_ert(os, "[", context); + os << s; + handle_ert(os, "]", context); + } else if (!s.empty()) { + // The space is needed to separate the + // item from the rest of the sentence. + os << s << ' '; + eat_whitespace(p, os, context, false); + } + } + } + + else if (t.cs() == "bibitem") { + context.set_item(); + context.check_layout(os); + os << "\\bibitem "; + os << p.getOpt(); + os << '{' << p.verbatim_item() << '}' << "\n"; + } + + else if (t.cs() == "def") { + context.check_layout(os); + eat_whitespace(p, os, context, false); + string name = p.get_token().cs(); + while (p.next_token().cat() != catBegin) + name += p.get_token().asString(); + handle_ert(os, "\\def\\" + name + '{' + p.verbatim_item() + '}', context); + } + + else if (t.cs() == "noindent") { + p.skip_spaces(); + context.add_extra_stuff("\\noindent\n"); + } + + else if (t.cs() == "appendix") { + context.add_extra_stuff("\\start_of_appendix\n"); + // We need to start a new paragraph. Otherwise the + // appendix in 'bla\appendix\chapter{' would start + // too late. + context.new_paragraph(os); + // We need to make sure that the paragraph is + // generated even if it is empty. Otherwise the + // appendix in '\par\appendix\par\chapter{' would + // start too late. + context.check_layout(os); + // FIXME: This is a hack to prevent paragraph + // deletion if it is empty. Handle this better! + handle_comment(os, + "%dummy comment inserted by tex2lyx to " + "ensure that this paragraph is not empty", + context); + // Both measures above may generate an additional + // empty paragraph, but that does not hurt, because + // whitespace does not matter here. + eat_whitespace(p, os, context, true); + } + + // Must attempt to parse "Section*" before "Section". + else if ((p.next_token().asInput() == "*") && + context.new_layout_allowed && + // The single '=' is meant here. + (newlayout = findLayout(context.textclass, + t.cs() + '*')).get() && + newlayout->isCommand()) { + p.get_token(); + output_command_layout(os, p, outer, context, newlayout); + p.skip_spaces(); + } + + // The single '=' is meant here. + else if (context.new_layout_allowed && + (newlayout = findLayout(context.textclass, t.cs())).get() && + newlayout->isCommand()) { + output_command_layout(os, p, outer, context, newlayout); + p.skip_spaces(); + } + + else if (t.cs() == "includegraphics") { + bool const clip = p.next_token().asInput() == "*"; + if (clip) + p.get_token(); + map opts = split_map(p.getArg('[', ']')); + if (clip) + opts["clip"] = string(); + string name = normalize_filename(p.verbatim_item()); + + string const path = getMasterFilePath(); + // We want to preserve relative / absolute filenames, + // therefore path is only used for testing + // FIXME UNICODE encoding of name and path may be + // wrong (makeAbsPath expects utf8) + if (!fs::exists(makeAbsPath(name, path).toFilesystemEncoding())) { + // The file extension is probably missing. + // Now try to find it out. + string const dvips_name = + find_file(name, path, + known_dvips_graphics_formats); + string const pdftex_name = + find_file(name, path, + known_pdftex_graphics_formats); + if (!dvips_name.empty()) { + if (!pdftex_name.empty()) { + cerr << "This file contains the " + "latex snippet\n" + "\"\\includegraphics{" + << name << "}\".\n" + "However, files\n\"" + << dvips_name << "\" and\n\"" + << pdftex_name << "\"\n" + "both exist, so I had to make a " + "choice and took the first one.\n" + "Please move the unwanted one " + "someplace else and try again\n" + "if my choice was wrong." + << endl; + } + name = dvips_name; + } else if (!pdftex_name.empty()) + name = pdftex_name; + } + + // FIXME UNICODE encoding of name and path may be + // wrong (makeAbsPath expects utf8) + if (fs::exists(makeAbsPath(name, path).toFilesystemEncoding())) + fix_relative_filename(name); + else + cerr << "Warning: Could not find graphics file '" + << name << "'." << endl; + + context.check_layout(os); + begin_inset(os, "Graphics "); + os << "\n\tfilename " << name << '\n'; + if (opts.find("width") != opts.end()) + os << "\twidth " + << translate_len(opts["width"]) << '\n'; + if (opts.find("height") != opts.end()) + os << "\theight " + << translate_len(opts["height"]) << '\n'; + if (opts.find("scale") != opts.end()) { + istringstream iss(opts["scale"]); + double val; + iss >> val; + val = val*100; + os << "\tscale " << val << '\n'; + } + if (opts.find("angle") != opts.end()) + os << "\trotateAngle " + << opts["angle"] << '\n'; + if (opts.find("origin") != opts.end()) { + ostringstream ss; + string const opt = opts["origin"]; + if (opt.find('l') != string::npos) ss << "left"; + if (opt.find('r') != string::npos) ss << "right"; + if (opt.find('c') != string::npos) ss << "center"; + if (opt.find('t') != string::npos) ss << "Top"; + if (opt.find('b') != string::npos) ss << "Bottom"; + if (opt.find('B') != string::npos) ss << "Baseline"; + if (!ss.str().empty()) + os << "\trotateOrigin " << ss.str() << '\n'; + else + cerr << "Warning: Ignoring unknown includegraphics origin argument '" << opt << "'\n"; + } + if (opts.find("keepaspectratio") != opts.end()) + os << "\tkeepAspectRatio\n"; + if (opts.find("clip") != opts.end()) + os << "\tclip\n"; + if (opts.find("draft") != opts.end()) + os << "\tdraft\n"; + if (opts.find("bb") != opts.end()) + os << "\tBoundingBox " + << opts["bb"] << '\n'; + int numberOfbbOptions = 0; + if (opts.find("bbllx") != opts.end()) + numberOfbbOptions++; + if (opts.find("bblly") != opts.end()) + numberOfbbOptions++; + if (opts.find("bburx") != opts.end()) + numberOfbbOptions++; + if (opts.find("bbury") != opts.end()) + numberOfbbOptions++; + if (numberOfbbOptions == 4) + os << "\tBoundingBox " + << opts["bbllx"] << opts["bblly"] + << opts["bburx"] << opts["bbury"] << '\n'; + else if (numberOfbbOptions > 0) + cerr << "Warning: Ignoring incomplete includegraphics boundingbox arguments.\n"; + numberOfbbOptions = 0; + if (opts.find("natwidth") != opts.end()) + numberOfbbOptions++; + if (opts.find("natheight") != opts.end()) + numberOfbbOptions++; + if (numberOfbbOptions == 2) + os << "\tBoundingBox 0bp 0bp " + << opts["natwidth"] << opts["natheight"] << '\n'; + else if (numberOfbbOptions > 0) + cerr << "Warning: Ignoring incomplete includegraphics boundingbox arguments.\n"; + ostringstream special; + if (opts.find("hiresbb") != opts.end()) + special << "hiresbb,"; + if (opts.find("trim") != opts.end()) + special << "trim,"; + if (opts.find("viewport") != opts.end()) + special << "viewport=" << opts["viewport"] << ','; + if (opts.find("totalheight") != opts.end()) + special << "totalheight=" << opts["totalheight"] << ','; + if (opts.find("type") != opts.end()) + special << "type=" << opts["type"] << ','; + if (opts.find("ext") != opts.end()) + special << "ext=" << opts["ext"] << ','; + if (opts.find("read") != opts.end()) + special << "read=" << opts["read"] << ','; + if (opts.find("command") != opts.end()) + special << "command=" << opts["command"] << ','; + string s_special = special.str(); + if (!s_special.empty()) { + // We had special arguments. Remove the trailing ','. + os << "\tspecial " << s_special.substr(0, s_special.size() - 1) << '\n'; + } + // TODO: Handle the unknown settings better. + // Warn about invalid options. + // Check whether some option was given twice. + end_inset(os); + } + + else if (t.cs() == "footnote" || + (t.cs() == "thanks" && context.layout->intitle)) { + p.skip_spaces(); + context.check_layout(os); + begin_inset(os, "Foot\n"); + os << "status collapsed\n\n"; + parse_text_in_inset(p, os, FLAG_ITEM, false, context); + end_inset(os); + } + + else if (t.cs() == "marginpar") { + p.skip_spaces(); + context.check_layout(os); + begin_inset(os, "Marginal\n"); + os << "status collapsed\n\n"; + parse_text_in_inset(p, os, FLAG_ITEM, false, context); + end_inset(os); + } + + else if (t.cs() == "ensuremath") { + p.skip_spaces(); + context.check_layout(os); + string const s = p.verbatim_item(); + if (s == "±" || s == "³" || s == "²" || s == "µ") + os << s; + else + handle_ert(os, "\\ensuremath{" + s + "}", + context); + } + + else if (t.cs() == "hfill") { + context.check_layout(os); + os << "\n\\hfill\n"; + skip_braces(p); + p.skip_spaces(); + } + + else if (t.cs() == "makeindex" || t.cs() == "maketitle") { + // FIXME: Somehow prevent title layouts if + // "maketitle" was not found + p.skip_spaces(); + skip_braces(p); // swallow this + } + + else if (t.cs() == "tableofcontents") { + p.skip_spaces(); + context.check_layout(os); + begin_inset(os, "LatexCommand \\tableofcontents\n"); + end_inset(os); + skip_braces(p); // swallow this + } + + else if (t.cs() == "listoffigures") { + p.skip_spaces(); + context.check_layout(os); + begin_inset(os, "FloatList figure\n"); + end_inset(os); + skip_braces(p); // swallow this + } + + else if (t.cs() == "listoftables") { + p.skip_spaces(); + context.check_layout(os); + begin_inset(os, "FloatList table\n"); + end_inset(os); + skip_braces(p); // swallow this + } + + else if (t.cs() == "listof") { + p.skip_spaces(true); + string const name = p.get_token().asString(); + if (context.textclass.floats().typeExist(name)) { + context.check_layout(os); + begin_inset(os, "FloatList "); + os << name << "\n"; + end_inset(os); + p.get_token(); // swallow second arg + } else + handle_ert(os, "\\listof{" + name + "}", context); + } + + else if (t.cs() == "textrm") + parse_text_attributes(p, os, FLAG_ITEM, outer, + context, "\\family", + context.font.family, "roman"); + + else if (t.cs() == "textsf") + parse_text_attributes(p, os, FLAG_ITEM, outer, + context, "\\family", + context.font.family, "sans"); + + else if (t.cs() == "texttt") + parse_text_attributes(p, os, FLAG_ITEM, outer, + context, "\\family", + context.font.family, "typewriter"); + + else if (t.cs() == "textmd") + parse_text_attributes(p, os, FLAG_ITEM, outer, + context, "\\series", + context.font.series, "medium"); + + else if (t.cs() == "textbf") + parse_text_attributes(p, os, FLAG_ITEM, outer, + context, "\\series", + context.font.series, "bold"); + + else if (t.cs() == "textup") + parse_text_attributes(p, os, FLAG_ITEM, outer, + context, "\\shape", + context.font.shape, "up"); + + else if (t.cs() == "textit") + parse_text_attributes(p, os, FLAG_ITEM, outer, + context, "\\shape", + context.font.shape, "italic"); + + else if (t.cs() == "textsl") + parse_text_attributes(p, os, FLAG_ITEM, outer, + context, "\\shape", + context.font.shape, "slanted"); + + else if (t.cs() == "textsc") + parse_text_attributes(p, os, FLAG_ITEM, outer, + context, "\\shape", + context.font.shape, "smallcaps"); + + else if (t.cs() == "textnormal" || t.cs() == "normalfont") { + context.check_layout(os); + Font oldFont = context.font; + context.font.init(); + context.font.size = oldFont.size; + os << "\n\\family " << context.font.family << "\n"; + os << "\n\\series " << context.font.series << "\n"; + os << "\n\\shape " << context.font.shape << "\n"; + if (t.cs() == "textnormal") { + parse_text_snippet(p, os, FLAG_ITEM, outer, context); + output_font_change(os, context.font, oldFont); + context.font = oldFont; + } else + eat_whitespace(p, os, context, false); + } + + else if (t.cs() == "underbar") { + // Do NOT handle \underline. + // \underbar cuts through y, g, q, p etc., + // \underline does not. + context.check_layout(os); + os << "\n\\bar under\n"; + parse_text_snippet(p, os, FLAG_ITEM, outer, context); + context.check_layout(os); + os << "\n\\bar default\n"; + } + + else if (t.cs() == "emph" || t.cs() == "noun") { + context.check_layout(os); + os << "\n\\" << t.cs() << " on\n"; + parse_text_snippet(p, os, FLAG_ITEM, outer, context); + context.check_layout(os); + os << "\n\\" << t.cs() << " default\n"; + } + + else if (use_natbib && + is_known(t.cs(), known_natbib_commands) && + ((t.cs() != "citefullauthor" && + t.cs() != "citeyear" && + t.cs() != "citeyearpar") || + p.next_token().asInput() != "*")) { + context.check_layout(os); + // tex lyx + // \citet[before][after]{a} \citet[after][before]{a} + // \citet[before][]{a} \citet[][before]{a} + // \citet[after]{a} \citet[after]{a} + // \citet{a} \citet{a} + string command = '\\' + t.cs(); + if (p.next_token().asInput() == "*") { + command += '*'; + p.get_token(); + } + if (command == "\\citefullauthor") + // alternative name for "\\citeauthor*" + command = "\\citeauthor*"; + + // text before the citation + string before; + // text after the citation + string after; + + boost::tie(before, after) = getCiteArguments(p, true); + if (command == "\\cite") { + // \cite without optional argument means + // \citet, \cite with at least one optional + // argument means \citep. + if (before.empty() && after.empty()) + command = "\\citet"; + else + command = "\\citep"; + } + if (before.empty() && after == "[]") + // avoid \citet[]{a} + after.erase(); + else if (before == "[]" && after == "[]") { + // avoid \citet[][]{a} + before.erase(); + after.erase(); + } + begin_inset(os, "LatexCommand "); + os << command << after << before + << '{' << p.verbatim_item() << "}\n"; + end_inset(os); + } + + else if (use_jurabib && + is_known(t.cs(), known_jurabib_commands)) { + context.check_layout(os); + string const command = '\\' + t.cs(); + char argumentOrder = '\0'; + vector const & options = used_packages["jurabib"]; + if (std::find(options.begin(), options.end(), + "natbiborder") != options.end()) + argumentOrder = 'n'; + else if (std::find(options.begin(), options.end(), + "jurabiborder") != options.end()) + argumentOrder = 'j'; + + // text before the citation + string before; + // text after the citation + string after; + + boost::tie(before, after) = + getCiteArguments(p, argumentOrder != 'j'); + string const citation = p.verbatim_item(); + if (!before.empty() && argumentOrder == '\0') { + cerr << "Warning: Assuming argument order " + "of jurabib version 0.6 for\n'" + << command << before << after << '{' + << citation << "}'.\n" + "Add 'jurabiborder' to the jurabib " + "package options if you used an\n" + "earlier jurabib version." << endl; + } + begin_inset(os, "LatexCommand "); + os << command << after << before + << '{' << citation << "}\n"; + end_inset(os); + } + + else if (is_known(t.cs(), known_latex_commands)) { + // This needs to be after the check for natbib and + // jurabib commands, because "cite" has different + // arguments with natbib and jurabib. + context.check_layout(os); + begin_inset(os, "LatexCommand "); + os << '\\' << t.cs(); + // lyx cannot handle newlines in a latex command + // FIXME: Move the substitution into parser::getOpt()? + os << subst(p.getOpt(), "\n", " "); + os << subst(p.getOpt(), "\n", " "); + os << '{' << subst(p.verbatim_item(), "\n", " ") << "}\n"; + end_inset(os); + } + + else if (is_known(t.cs(), known_quotes)) { + char const * const * where = is_known(t.cs(), known_quotes); + context.check_layout(os); + begin_inset(os, "Quotes "); + os << known_coded_quotes[where - known_quotes]; + end_inset(os); + // LyX adds {} after the quote, so we have to eat + // spaces here if there are any before a possible + // {} pair. + eat_whitespace(p, os, context, false); + skip_braces(p); + } + + else if (is_known(t.cs(), known_sizes) && + context.new_layout_allowed) { + char const * const * where = is_known(t.cs(), known_sizes); + context.check_layout(os); + Font const oldFont = context.font; + context.font.size = known_coded_sizes[where - known_sizes]; + output_font_change(os, oldFont, context.font); + eat_whitespace(p, os, context, false); + } + + else if (is_known(t.cs(), known_font_families) && + context.new_layout_allowed) { + char const * const * where = + is_known(t.cs(), known_font_families); + context.check_layout(os); + Font const oldFont = context.font; + context.font.family = + known_coded_font_families[where - known_font_families]; + output_font_change(os, oldFont, context.font); + eat_whitespace(p, os, context, false); + } + + else if (is_known(t.cs(), known_font_series) && + context.new_layout_allowed) { + char const * const * where = + is_known(t.cs(), known_font_series); + context.check_layout(os); + Font const oldFont = context.font; + context.font.series = + known_coded_font_series[where - known_font_series]; + output_font_change(os, oldFont, context.font); + eat_whitespace(p, os, context, false); + } + + else if (is_known(t.cs(), known_font_shapes) && + context.new_layout_allowed) { + char const * const * where = + is_known(t.cs(), known_font_shapes); + context.check_layout(os); + Font const oldFont = context.font; + context.font.shape = + known_coded_font_shapes[where - known_font_shapes]; + output_font_change(os, oldFont, context.font); + eat_whitespace(p, os, context, false); + } + else if (is_known(t.cs(), known_old_font_families) && + context.new_layout_allowed) { + char const * const * where = + is_known(t.cs(), known_old_font_families); + context.check_layout(os); + Font const oldFont = context.font; + context.font.init(); + context.font.size = oldFont.size; + context.font.family = + known_coded_font_families[where - known_old_font_families]; + output_font_change(os, oldFont, context.font); + eat_whitespace(p, os, context, false); + } + + else if (is_known(t.cs(), known_old_font_series) && + context.new_layout_allowed) { + char const * const * where = + is_known(t.cs(), known_old_font_series); + context.check_layout(os); + Font const oldFont = context.font; + context.font.init(); + context.font.size = oldFont.size; + context.font.series = + known_coded_font_series[where - known_old_font_series]; + output_font_change(os, oldFont, context.font); + eat_whitespace(p, os, context, false); + } + + else if (is_known(t.cs(), known_old_font_shapes) && + context.new_layout_allowed) { + char const * const * where = + is_known(t.cs(), known_old_font_shapes); + context.check_layout(os); + Font const oldFont = context.font; + context.font.init(); + context.font.size = oldFont.size; + context.font.shape = + known_coded_font_shapes[where - known_old_font_shapes]; + output_font_change(os, oldFont, context.font); + eat_whitespace(p, os, context, false); + } + + else if (t.cs() == "LyX" || t.cs() == "TeX" + || t.cs() == "LaTeX") { + context.check_layout(os); + os << t.cs(); + skip_braces(p); // eat {} + } + + else if (t.cs() == "LaTeXe") { + context.check_layout(os); + os << "LaTeX2e"; + skip_braces(p); // eat {} + } + + else if (t.cs() == "ldots") { + context.check_layout(os); + skip_braces(p); + os << "\\SpecialChar \\ldots{}\n"; + } + + else if (t.cs() == "lyxarrow") { + context.check_layout(os); + os << "\\SpecialChar \\menuseparator\n"; + skip_braces(p); + } + + else if (t.cs() == "textcompwordmark") { + context.check_layout(os); + os << "\\SpecialChar \\textcompwordmark{}\n"; + skip_braces(p); + } + + else if (t.cs() == "@" && p.next_token().asInput() == ".") { + context.check_layout(os); + os << "\\SpecialChar \\@.\n"; + p.get_token(); + } + + else if (t.cs() == "-") { + context.check_layout(os); + os << "\\SpecialChar \\-\n"; + } + + else if (t.cs() == "textasciitilde") { + context.check_layout(os); + os << '~'; + skip_braces(p); + } + + else if (t.cs() == "textasciicircum") { + context.check_layout(os); + os << '^'; + skip_braces(p); + } + + else if (t.cs() == "textbackslash") { + context.check_layout(os); + os << "\n\\backslash\n"; + skip_braces(p); + } + + else if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#" + || t.cs() == "$" || t.cs() == "{" || t.cs() == "}" + || t.cs() == "%") { + context.check_layout(os); + os << t.cs(); + } + + else if (t.cs() == "char") { + context.check_layout(os); + if (p.next_token().character() == '`') { + p.get_token(); + if (p.next_token().cs() == "\"") { + p.get_token(); + os << '"'; + skip_braces(p); + } else { + handle_ert(os, "\\char`", context); + } + } else { + handle_ert(os, "\\char", context); + } + } + + else if (t.cs() == "verb") { + context.check_layout(os); + char const delimiter = p.next_token().character(); + string const arg = p.getArg(delimiter, delimiter); + ostringstream oss; + oss << "\\verb" << delimiter << arg << delimiter; + handle_ert(os, oss.str(), context); + } + + else if (t.cs() == "\"") { + context.check_layout(os); + string const name = p.verbatim_item(); + if (name == "a") os << 'ä'; + else if (name == "o") os << 'ö'; + else if (name == "u") os << 'ü'; + else if (name == "A") os << 'Ä'; + else if (name == "O") os << 'Ö'; + else if (name == "U") os << 'Ü'; + else handle_ert(os, "\"{" + name + "}", context); + } + + // Problem: \= creates a tabstop inside the tabbing environment + // and else an accent. In the latter case we really would want + // \={o} instead of \= o. + else if (t.cs() == "=" && (flags & FLAG_TABBING)) + handle_ert(os, t.asInput(), context); + + else if (t.cs() == "H" || t.cs() == "c" || t.cs() == "^" + || t.cs() == "'" || t.cs() == "`" + || t.cs() == "~" || t.cs() == "." || t.cs() == "=") { + // we need the trim as the LyX parser chokes on such spaces + // The argument of InsetLatexAccent is parsed as a + // subset of LaTeX, so don't parse anything here, + // but use the raw argument. + // Otherwise we would convert \~{\i} wrongly. + // This will of course not translate \~{\ss} to \~{ß}, + // but that does at least compile and does only look + // strange on screen. + context.check_layout(os); + os << "\\i \\" << t.cs() << "{" + << trim(p.verbatim_item(), " ") + << "}\n"; + } + + else if (t.cs() == "ss") { + context.check_layout(os); + os << "ß"; + skip_braces(p); // eat {} + } + + else if (t.cs() == "i" || t.cs() == "j" || t.cs() == "l" || + t.cs() == "L") { + context.check_layout(os); + os << "\\i \\" << t.cs() << "{}\n"; + skip_braces(p); // eat {} + } + + else if (t.cs() == "\\") { + context.check_layout(os); + string const next = p.next_token().asInput(); + if (next == "[") + handle_ert(os, "\\\\" + p.getOpt(), context); + else if (next == "*") { + p.get_token(); + handle_ert(os, "\\\\*" + p.getOpt(), context); + } + else { + os << "\n\\newline\n"; + } + } + + else if (t.cs() == "input" || t.cs() == "include" + || t.cs() == "verbatiminput") { + string name = '\\' + t.cs(); + if (t.cs() == "verbatiminput" + && p.next_token().asInput() == "*") + name += p.get_token().asInput(); + context.check_layout(os); + begin_inset(os, "Include "); + string filename(normalize_filename(p.getArg('{', '}'))); + string const path = getMasterFilePath(); + // We want to preserve relative / absolute filenames, + // therefore path is only used for testing + // FIXME UNICODE encoding of filename and path may be + // wrong (makeAbsPath expects utf8) + if (t.cs() == "include" && + !fs::exists(makeAbsPath(filename, path).toFilesystemEncoding())) { + // The file extension is probably missing. + // Now try to find it out. + string const tex_name = + find_file(filename, path, + known_tex_extensions); + if (!tex_name.empty()) + filename = tex_name; + } + // FIXME UNICODE encoding of filename and path may be + // wrong (makeAbsPath expects utf8) + if (fs::exists(makeAbsPath(filename, path).toFilesystemEncoding())) { + string const abstexname = + makeAbsPath(filename, path).absFilename(); + string const abslyxname = + changeExtension(abstexname, ".lyx"); + fix_relative_filename(filename); + string const lyxname = + changeExtension(filename, ".lyx"); + if (t.cs() != "verbatiminput" && + tex2lyx(abstexname, FileName(abslyxname))) { + os << name << '{' << lyxname << "}\n"; + } else { + os << name << '{' << filename << "}\n"; + } + } else { + cerr << "Warning: Could not find included file '" + << filename << "'." << endl; + os << name << '{' << filename << "}\n"; + } + os << "preview false\n"; + end_inset(os); + } + + else if (t.cs() == "bibliographystyle") { + // store new bibliographystyle + bibliographystyle = p.verbatim_item(); + // output new bibliographystyle. + // This is only necessary if used in some other macro than \bibliography. + handle_ert(os, "\\bibliographystyle{" + bibliographystyle + "}", context); + } + + else if (t.cs() == "bibliography") { + context.check_layout(os); + begin_inset(os, "LatexCommand "); + os << "\\bibtex"; + // Do we have a bibliographystyle set? + if (!bibliographystyle.empty()) { + os << '[' << bibliographystyle << ']'; + } + os << '{' << p.verbatim_item() << "}\n"; + end_inset(os); + } + + else if (t.cs() == "parbox") + parse_box(p, os, FLAG_ITEM, outer, context, true); + + else if (t.cs() == "smallskip" || + t.cs() == "medskip" || + t.cs() == "bigskip" || + t.cs() == "vfill") { + context.check_layout(os); + begin_inset(os, "VSpace "); + os << t.cs(); + end_inset(os); + skip_braces(p); + } + + else if (is_known(t.cs(), known_spaces)) { + char const * const * where = is_known(t.cs(), known_spaces); + context.check_layout(os); + begin_inset(os, "InsetSpace "); + os << '\\' << known_coded_spaces[where - known_spaces] + << '\n'; + // LaTeX swallows whitespace after all spaces except + // "\\,". We have to do that here, too, because LyX + // adds "{}" which would make the spaces significant. + if (t.cs() != ",") + eat_whitespace(p, os, context, false); + // LyX adds "{}" after all spaces except "\\ " and + // "\\,", so we have to remove "{}". + // "\\,{}" is equivalent to "\\," in LaTeX, so we + // remove the braces after "\\,", too. + if (t.cs() != " ") + skip_braces(p); + } + + else if (t.cs() == "newpage" || + t.cs() == "clearpage" || + t.cs() == "cleardoublepage") { + context.check_layout(os); + // FIXME: what about \\pagebreak? + os << "\n\\" << t.cs() << "\n"; + skip_braces(p); // eat {} + } + + else if (t.cs() == "newcommand" || + t.cs() == "providecommand" || + t.cs() == "renewcommand") { + // these could be handled by parse_command(), but + // we need to call add_known_command() here. + string name = t.asInput(); + if (p.next_token().asInput() == "*") { + // Starred form. Eat '*' + p.get_token(); + name += '*'; + } + string const command = p.verbatim_item(); + string const opt1 = p.getOpt(); + string const opt2 = p.getFullOpt(); + add_known_command(command, opt1, !opt2.empty()); + string const ert = name + '{' + command + '}' + + opt1 + opt2 + + '{' + p.verbatim_item() + '}'; + handle_ert(os, ert, context); + } + + else if (t.cs() == "vspace") { + bool starred = false; + if (p.next_token().asInput() == "*") { + p.get_token(); + starred = true; + } + string const length = p.verbatim_item(); + string unit; + string valstring; + bool valid = splitLatexLength(length, valstring, unit); + bool known_vspace = false; + bool known_unit = false; + double value; + if (valid) { + istringstream iss(valstring); + iss >> value; + if (value == 1.0) { + if (unit == "\\smallskipamount") { + unit = "smallskip"; + known_vspace = true; + } else if (unit == "\\medskipamount") { + unit = "medskip"; + known_vspace = true; + } else if (unit == "\\bigskipamount") { + unit = "bigskip"; + known_vspace = true; + } else if (unit == "\\fill") { + unit = "vfill"; + known_vspace = true; + } + } + if (!known_vspace) { + switch (unitFromString(unit)) { + case LyXLength::SP: + case LyXLength::PT: + case LyXLength::BP: + case LyXLength::DD: + case LyXLength::MM: + case LyXLength::PC: + case LyXLength::CC: + case LyXLength::CM: + case LyXLength::IN: + case LyXLength::EX: + case LyXLength::EM: + case LyXLength::MU: + known_unit = true; + break; + default: + break; + } + } + } + + if (known_unit || known_vspace) { + // Literal length or known variable + context.check_layout(os); + begin_inset(os, "VSpace "); + if (known_unit) + os << value; + os << unit; + if (starred) + os << '*'; + end_inset(os); + } else { + // LyX can't handle other length variables in Inset VSpace + string name = t.asInput(); + if (starred) + name += '*'; + if (valid) { + if (value == 1.0) + handle_ert(os, name + '{' + unit + '}', context); + else if (value == -1.0) + handle_ert(os, name + "{-" + unit + '}', context); + else + handle_ert(os, name + '{' + valstring + unit + '}', context); + } else + handle_ert(os, name + '{' + length + '}', context); + } + } + + else { + //cerr << "#: " << t << " mode: " << mode << endl; + // heuristic: read up to next non-nested space + /* + string s = t.asInput(); + string z = p.verbatim_item(); + while (p.good() && z != " " && z.size()) { + //cerr << "read: " << z << endl; + s += z; + z = p.verbatim_item(); + } + cerr << "found ERT: " << s << endl; + handle_ert(os, s + ' ', context); + */ + string name = t.asInput(); + if (p.next_token().asInput() == "*") { + // Starred commands like \vspace*{} + p.get_token(); // Eat '*' + name += '*'; + } + if (! parse_command(name, p, os, outer, context)) + handle_ert(os, name, context); + } + + if (flags & FLAG_LEAVE) { + flags &= ~FLAG_LEAVE; + break; + } + } +} + +// }]) + + +} // namespace lyx