From b580294d0b9fb4cb2d210dfdbac53e5944110062 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Andr=C3=A9=20P=C3=B6nitz?= Date: Thu, 17 Apr 2003 09:47:21 +0000 Subject: [PATCH] re-structure the beast git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@6829 a592a061-630c-0410-9148-cb99ea01b6c8 --- src/tex2lyx/ChangeLog | 12 + src/tex2lyx/Makefile.am | 23 +- src/tex2lyx/math.C | 225 +++++++ src/tex2lyx/table.C | 477 ++++++++++++++ src/tex2lyx/tex2lyx.C | 1378 +-------------------------------------- src/tex2lyx/text.C | 495 ++++++++++++++ 6 files changed, 1247 insertions(+), 1363 deletions(-) create mode 100644 src/tex2lyx/math.C create mode 100644 src/tex2lyx/table.C create mode 100644 src/tex2lyx/text.C diff --git a/src/tex2lyx/ChangeLog b/src/tex2lyx/ChangeLog index 81454199dd..f29c45816f 100644 --- a/src/tex2lyx/ChangeLog +++ b/src/tex2lyx/ChangeLog @@ -1,3 +1,15 @@ + +2003-04-17 André Pönitz + + * math.C: + preamble.C: + table.C: + text.C: new files + + * table.C: better table handling + + * tex2lyx.C: lots of small changes + 2003-02-25 Angus Leeming * .cvsignore: ignore *.lo and libtexparser.la. diff --git a/src/tex2lyx/Makefile.am b/src/tex2lyx/Makefile.am index 43ec6ad475..1852cbf71d 100644 --- a/src/tex2lyx/Makefile.am +++ b/src/tex2lyx/Makefile.am @@ -2,17 +2,22 @@ include $(top_srcdir)/config/common.am INCLUDES = -I$(srcdir)/../ $(BOOST_INCLUDES) -noinst_LTLIBRARIES = libtexparser.la - -libtexparser_la_SOURCES = \ - texparser.C \ - texparser.h - -tex2lyx_LDADD = libtexparser.la +#noinst_LTLIBRARIES = libtexparser.la +# +#libtexparser_la_SOURCES = \ +# texparser.C \ +# texparser.h +# +#tex2lyx_LDADD = libtexparser.la bin_PROGRAMS = tex2lyx tex2lyx_SOURCES = \ - tex2lyx.C texparser.C \ - texparser.h + texparser.h \ + tex2lyx.C \ + tex2lyx.h \ + preamble.C \ + math.C \ + table.C \ + text.C diff --git a/src/tex2lyx/math.C b/src/tex2lyx/math.C new file mode 100644 index 0000000000..b02a58c88a --- /dev/null +++ b/src/tex2lyx/math.C @@ -0,0 +1,225 @@ +/** The .tex to .lyx converter + \author André Pönitz (2003) + */ + +// {[( + +#include + +#include "Lsstream.h" +#include "tex2lyx.h" + +#include +#include +#include + +using std::cerr; +using std::endl; +using std::ostream; +using std::string; +using std::vector; + + +bool is_math_env(string const & name) +{ + static char const * known_math_envs[] = { "equation", "equation*", + "eqnarray", "eqnarray*", "align", "align*", 0}; + + for (char const ** what = known_math_envs; *what; ++what) + if (*what == name) + return true; + return false; +} + + +void parse_math(Parser & p, ostream & os, unsigned flags, const mode_type mode) +{ + while (p.good()) { + Token const & t = p.getToken(); + +#ifdef FILEDEBUG + cerr << "t: " << t << " flags: " << flags << "\n"; +#endif + + if (flags & FLAG_ITEM) { + if (t.cat() == catSpace) + continue; + + flags &= ~FLAG_ITEM; + if (t.cat() == catBegin) { + // skip the brace and collect everything to the next matching + // closing brace + flags |= FLAG_BRACE_LAST; + continue; + } + + // handle only this single token, leave the loop if done + flags |= FLAG_LEAVE; + } + + + // + // cat codes + // + if (t.cat() == catMath) { + if (mode == MATHTEXT_MODE) { + // we are inside some text mode thingy, so opening new math is allowed + Token const & n = p.getToken(); + if (n.cat() == catMath) { + // TeX's $$...$$ syntax for displayed math + os << "\\["; + parse_math(p, os, FLAG_SIMPLE, MATH_MODE); + os << "\\]"; + p.getToken(); // skip the second '$' token + } else { + // simple $...$ stuff + p.putback(); + os << '$'; + parse_math(p, os, FLAG_SIMPLE, MATH_MODE); + os << '$'; + } + } + + else if (flags & FLAG_SIMPLE) { + // this is the end of the formula + return; + } + + else { + cerr << "\nmode: " << mode << endl; + p.error("something strange in the parser\n"); + break; + } + } + + else if (t.cat() == catLetter || + t.cat() == catSpace || + t.cat() == catSuper || + t.cat() == catSub || + t.cat() == catOther || + t.cat() == catAlign || + t.cat() == catActive || + t.cat() == catParameter) + os << t.character(); + + else if (t.cat() == catNewline) { + //if (p.nextToken().cat() == catNewline) { + // p.getToken(); + // handle_par(os); + //} else { + os << "\n "; // note the space + //} + } + + else if (t.cat() == catBegin) { + os << '{'; + parse_math(p, os, FLAG_BRACE_LAST, mode); + os << '}'; + } + + else if (t.cat() == catEnd) { + if (flags & FLAG_BRACE_LAST) + return; + os << "unexpected '}' in math\n"; + } + + else if (t.cat() == catComment) + handle_comment(p); + + // + // control sequences + // + + else if (t.cs() == "(") { + os << "\\("; + parse_math(p, os, FLAG_SIMPLE2, MATH_MODE); + os << "\\)"; + } + + else if (t.cs() == "[") { + os << "\\["; + parse_math(p, os, FLAG_EQUATION, MATH_MODE); + os << "\\]"; + } + + else if (t.cs() == "protect") + // ignore \\protect, will hopefully be re-added during output + ; + + else if (t.cs() == "begin") { + string const name = p.getArg('{', '}'); + active_environments_push(name); + os << "\\begin{" << name << "}"; + if (name == "tabular") + parse_math(p, os, FLAG_END, MATHTEXT_MODE); + else + parse_math(p, os, FLAG_END, mode); + os << "\\end{" << name << "}"; + } + + else if (t.cs() == "end") { + if (flags & FLAG_END) { + // eat environment name + string const name = p.getArg('{', '}'); + if (name != curr_env()) + p.error("\\end{" + name + "} does not match \\begin{" + + curr_env() + "}"); + active_environments_pop(); + return; + } + p.error("found 'end' unexpectedly"); + } + + else if (t.cs() == ")") { + if (flags & FLAG_SIMPLE2) + return; + p.error("found '\\)' unexpectedly"); + } + + else if (t.cs() == "]") { + if (flags & FLAG_EQUATION) + return; + p.error("found '\\]' unexpectedly"); + } + + else if (t.cs() == "textrm" || t.cs() == "textsf" || t.cs() == "textbf" + || t.cs() == "texttt" || t.cs() == "textsc") { + os << '\\' << t.cs() << '{'; + parse_math(p, os, FLAG_ITEM, MATHTEXT_MODE); + os << '}'; + } + + else if (t.cs() == "mbox") { + os << "\\mbox{"; + parse_math(p, os, FLAG_ITEM, MATHTEXT_MODE); + os << '}'; + } + + else if (t.cs() == "\"") { + string const name = p.verbatimItem(); + if (name == "a") os << 'ä'; + else if (name == "o") os << 'ö'; + else if (name == "u") os << 'ü'; + else if (name == "A") os << 'Ä'; + else if (name == "O") os << 'Ö'; + else if (name == "U") os << 'Ü'; + else os << "\"{" << name << "}"; + } + + else if (t.cs() == "ss") + os << "ß"; + + else + os << t.asInput(); + + if (flags & FLAG_LEAVE) { + flags &= ~FLAG_LEAVE; + break; + } + } +} + + + + +// }]) diff --git a/src/tex2lyx/table.C b/src/tex2lyx/table.C new file mode 100644 index 0000000000..5b2971e32a --- /dev/null +++ b/src/tex2lyx/table.C @@ -0,0 +1,477 @@ +/** The .tex to .lyx converter + \author André Pönitz (2003) + */ + +// {[( + +#include + +#include "Lsstream.h" +#include "tex2lyx.h" + +#include +#include +#include +#include +#include + +using std::cerr; +using std::endl; +using std::istringstream; +using std::ostream; +using std::ostringstream; +using std::string; +using std::vector; + + +namespace { + +struct ColInfo +{ + ColInfo() : rightline(false) {} + string align; // column alignment + string width; // column width + bool rightline; // a line on the right? +}; + + +struct RowInfo +{ + RowInfo() : topline(false), bottomline(false) {} + bool topline; // horizontal line above + bool bottomline; // horizontal line below +}; + + +struct CellInfo +{ + CellInfo() + : multi(0), leftline(false), rightline(false), + topline(false), bottomline(false) + {} + + string content; // cell content + int multi; // multicolumn flag + string align; // cell alignment + bool leftline; // do we have a line on the left? + bool rightline; // do we have a line on the right? + bool topline; // do we have a line above? + bool bottomline; // do we have a line below? +}; + + +int string2int(string const & s, int deflt = 0) +{ + istringstream is(s); + int i = deflt; + is >> i; + return i; +} + + +string read_hlines(Parser & p) +{ + ostringstream os; + p.skipSpaces(); + while (p.good()) { + if (p.nextToken().cs() == "hline") { + p.getToken(); + os << "\\hline"; + } else if (p.nextToken().cs() == "cline") { + p.getToken(); + os << "\\cline{" << p.verbatimItem() << "}"; + } else + break; + p.skipSpaces(); + }; + //cerr << "read_hlines(), read: '" << os.str() << "'\n"; + //cerr << "read_hlines(), next token: " << p.nextToken() << "\n"; + return os.str(); +} + + + +/* rather brutish way to code table structure in a string: + + \begin{tabular}{ccc} + 1 & 2 & 3\\ \hline + \multicolumn{2}{c}{4} & 5 // + 6 & 7 \\ + \end{tabular} + + gets "translated" to: + + 1 TAB 2 TAB 3 LINE + \hline HLINE TAB 5 LINE + 5 TAB 7 LINE +*/ + +char const TAB = '\001'; +char const LINE = '\002'; +char const HLINE = '\004'; + + +bool handle_colalign(Parser & p, vector & colinfo) +{ + if (p.getToken().cat() != catBegin) + cerr << "wrong syntax for table column alignment. '{' expected\n"; + + string nextalign = "block"; + bool leftline = false; + for (Token t = p.getToken(); p.good() && t.cat() != catEnd; t = p.getToken()){ +#ifdef FILEDEBUG + cerr << "t: " << t << " c: '" << t.character() << "'\n"; +#endif + + switch (t.character()) { + case 'c': + colinfo.push_back(ColInfo()); + colinfo.back().align = "center"; + break; + case 'l': + colinfo.push_back(ColInfo()); + colinfo.back().align = "left"; + break; + case 'r': + colinfo.push_back(ColInfo()); + colinfo.back().align = "right"; + break; + case 'p': + colinfo.push_back(ColInfo()); + colinfo.back().align = nextalign; + colinfo.back().width = p.verbatimItem(); + nextalign = "block"; + break; + case '|': + if (colinfo.empty()) + leftline = true; + else + colinfo.back().rightline = true; + break; + case '>': { + string s = p.verbatimItem(); + if (s == "\\raggedleft ") + nextalign = "left"; + else if (s == "\\raggedright ") + nextalign = "right"; + else + cerr << "unknown '>' column '" << s << "'\n"; + break; + } + default: + cerr << "ignoring special separator '" << t << "'\n"; + break; + } + } + return leftline; +} + + +} // anonymous namespace + + +void parse_table(Parser & p, ostream & os, unsigned flags) +{ + string hlines; + + while (p.good()) { + Token const & t = p.getToken(); + +#ifdef FILEDEBUG + cerr << "t: " << t << " flags: " << flags << "\n"; +#endif + + // + // cat codes + // + if (t.cat() == catMath) { + // we are inside some text mode thingy, so opening new math is allowed + Token const & n = p.getToken(); + if (n.cat() == catMath) { + // TeX's $$...$$ syntax for displayed math + os << "\\["; + parse_math(p, os, FLAG_SIMPLE, MATH_MODE); + os << "\\]"; + p.getToken(); // skip the second '$' token + } else { + // simple $...$ stuff + p.putback(); + os << '$'; + parse_math(p, os, FLAG_SIMPLE, MATH_MODE); + os << '$'; + } + } + + else if (t.cat() == catLetter || + t.cat() == catSpace || + t.cat() == catSuper || + t.cat() == catSub || + t.cat() == catOther || + t.cat() == catActive || + t.cat() == catNewline || + t.cat() == catParameter) + os << t.character(); + + else if (t.cat() == catBegin) { + os << '{'; + parse_table(p, os, FLAG_BRACE_LAST); + os << '}'; + } + + else if (t.cat() == catEnd) { + if (flags & FLAG_BRACE_LAST) + return; + cerr << "unexpected '}'\n"; + } + + else if (t.cat() == catAlign) { + os << TAB; + } + + else if (t.cs() == "tabularnewline" || t.cs() == "\\") { + // stuff before the line break + // and look ahead for stuff after the line break + os << HLINE << hlines << HLINE << LINE << read_hlines(p) << HLINE; + hlines.clear(); + } + + else if (t.cs() == "hline") + hlines += "\\hline"; + + else if (t.cs() == "cline") + hlines += "\\cline{" + p.verbatimItem() + '}'; + + else if (t.cat() == catComment) + handle_comment(p); + + else if (t.cs() == "(") { + os << "\\("; + parse_math(p, os, FLAG_SIMPLE2, MATH_MODE); + os << "\\)"; + } + + else if (t.cs() == "[") { + os << "\\["; + parse_math(p, os, FLAG_EQUATION, MATH_MODE); + os << "\\]"; + } + + else if (t.cs() == "begin") { + string const name = p.getArg('{', '}'); + active_environments_push(name); + parse_table(p, os, FLAG_END); + } + + else if (t.cs() == "end") { + if (flags & FLAG_END) { + // eat environment name + string const name = p.getArg('{', '}'); + if (name != curr_env()) + p.error("\\end{" + name + "} does not match \\begin{" + + curr_env() + "}"); + active_environments_pop(); + return; + } + p.error("found 'end' unexpectedly"); + } + } +} + + +void handle_tabular(Parser & p, ostream & os) +{ + string posopts = p.getOpt(); + if (posopts.size()) + cerr << "vertical tabular positioning '" << posopts << "' ignored\n"; + + vector colinfo; + + // handle column formatting + bool leftline = handle_colalign(p, colinfo); + + // handle initial hlines + + // first scan of cells + // use table mode to keep it minimal-invasive + // not exactly what's TeX doing... + vector lines; + ostringstream ss; + ss << read_hlines(p) << HLINE; // handle initial hlines + parse_table(p, ss, FLAG_END); + split(ss.str(), lines, LINE); + + vector< vector > cellinfo(lines.size()); + vector rowinfo(lines.size()); + + // split into rows + //cerr << "// split into rows\n"; + for (size_t row = 0; row < rowinfo.size(); ++row) { + + // init row + vector & cellinfos = cellinfo[row]; + cellinfos.resize(colinfo.size()); + + // split row + vector dummy; + //cerr << "\n########### LINE: " << lines[row] << "########\n"; + split(lines[row], dummy, HLINE); + + // handle horizontal line fragments + if (dummy.size() != 3) { + if (dummy.size() != 1) + cerr << "unexpected dummy size: " << dummy.size() + << " content: " << lines[row] << "\n"; + dummy.resize(3); + } + lines[row] = dummy[1]; + + //cerr << "line: " << row << " above 0: " << dummy[0] << "\n"; + //cerr << "line: " << row << " below 2: " << dummy[2] << "\n"; + //cerr << "line: " << row << " cells 1: " << dummy[1] << "\n"; + + for (int i = 0; i <= 2; i += 2) { + //cerr << " reading from line string '" << dummy[i] << "'\n"; + Parser p1(dummy[i]); + while (p1.good()) { + Token t = p1.getToken(); + //cerr << "read token: " << t << "\n"; + if (t.cs() == "hline") { + if (i == 0) { + rowinfo[row].topline = true; + for (size_t col = 0; col < colinfo.size(); ++col) + cellinfos[col].topline = true; + } else { + rowinfo[row].bottomline = true; + for (size_t col = 0; col < colinfo.size(); ++col) + cellinfos[col].bottomline = true; + } + } else if (t.cs() == "cline") { + string arg = p1.verbatimItem(); + //cerr << "read cline arg: '" << arg << "'\n"; + vector t; + split(arg, t, '-'); + t.resize(2); + size_t from = string2int(t[0]); + size_t to = string2int(t[1]); + for (size_t col = from; col < to; ++col) { + if (i == 0) + cellinfos[col].topline = true; + else + cellinfos[col].bottomline = true; + } + } else { + cerr << "unexpected line token: " << t << endl; + } + } + } + + // split into cells + vector cells; + split(lines[row], cells, TAB); + for (size_t col = 0, cell = 0; cell < cells.size() && col < colinfo.size(); ++col, ++cell) { + //cerr << "cell content: " << cells[cell] << "\n"; + Parser p(cells[cell]); + p.skipSpaces(); + //cerr << "handling cell: " << p.nextToken().cs() << " '" << + //cells[cell] << "'\n"; + if (p.nextToken().cs() == "multicolumn") { + // how many cells? + p.getToken(); + size_t ncells = string2int(p.verbatimItem()); + + // special cell properties alignment + vector t; + bool leftline = handle_colalign(p, t); + CellInfo & ci = cellinfos[col]; + ci.multi = 1; + ci.align = t.front().align; + ci.content = parse_text(p, FLAG_ITEM, false); + ci.leftline = leftline; + ci.rightline = t.front().rightline; + + // add dummy cells for multicol + for (size_t i = 0; i < ncells - 1 && col < colinfo.size(); ++i) { + ++col; + cellinfos[col].multi = 2; + cellinfos[col].align = "center"; + } + } else { + cellinfos[col].content = parse_text(p, FLAG_ITEM, false); + } + } + + cellinfo.push_back(cellinfos); + + //cerr << "// handle almost empty last row what we have\n"; + // handle almost empty last row + if (row && lines[row].empty() && row + 1 == rowinfo.size()) { + //cerr << "remove empty last line\n"; + if (rowinfo[row].topline); + rowinfo[row - 1].bottomline = true; + for (size_t col = 0; col < colinfo.size(); ++col) + if (cellinfo[row][col].topline) + cellinfo[row - 1][col].bottomline = true; + rowinfo.pop_back(); + } + + } + + //cerr << "// output what we have\n"; + // output what we have + os << "\n" + << "\n"; + + //cerr << "// after header\n"; + for (size_t col = 0; col < colinfo.size(); ++col) { + os << "\n"; + } + //cerr << "// after cols\n"; + + for (size_t row = 0; row < rowinfo.size(); ++row) { + os << "\n"; + for (size_t col = 0; col < colinfo.size(); ++col) { + CellInfo const & cell = cellinfo[row][col]; + os << "" + << "\n\\begin_inset Text" + << "\n\n\\layout Standard\n\n" + << cell.content + << "\n\\end_inset\n\n" + << "\n"; + } + os << "\n"; + } + + os << "\n"; +} + + + + +// }]) diff --git a/src/tex2lyx/tex2lyx.C b/src/tex2lyx/tex2lyx.C index fd372a63fa..6fdf33aaab 100644 --- a/src/tex2lyx/tex2lyx.C +++ b/src/tex2lyx/tex2lyx.C @@ -6,31 +6,22 @@ #include -#include +#include "Lsstream.h" +#include "tex2lyx.h" + #include #include #include -#include #include #include #include -#include "Lsstream.h" - -#include "texparser.h" - -using std::count_if; using std::cout; using std::cerr; using std::endl; -using std::fill; using std::getline; -using std::ios; using std::ifstream; -using std::istream; using std::istringstream; -using std::map; -using std::swap; using std::ostream; using std::ostringstream; using std::stack; @@ -38,178 +29,26 @@ using std::string; using std::vector; -namespace { - -struct ColInfo -{ - ColInfo() : rightline(false) {} - string align; // column alignment - string width; // column width - bool rightline; // a line on the right? -}; - - -struct RowInfo -{ - RowInfo() : topline(false), bottomline(false) {} - bool topline; // horizontal line above - bool bottomline; // horizontal line below -}; - - -struct CellInfo -{ - CellInfo() - : multi(0), leftline(false), rightline(false), - topline(false), bottomline(false) - {} - - string content; // cell content - int multi; // multicolumn flag - string align; // cell alignment - bool leftline; // do we have a line on the left? - bool rightline; // do we have a line on the right? - bool topline; // do we have a line above? - bool bottomline; // do we have a line below? -}; - - -void parse_preamble(Parser & p, ostream & os); - -void parse(Parser & p, ostream & os, unsigned flags, const mode_type mode, -const bool outer); - -string parse(Parser & p, unsigned flags, const mode_type mode, -const bool outer) -{ - ostringstream os; - parse(p, os, flags, mode, outer); - return os.str(); -} +//namespace { -int string2int(string const & s, int deflt = 0) -{ - istringstream is(s); - int i = deflt; - is >> i; - return i; -} -string read_hlines(Parser & p) +void handle_comment(Parser & p) { - ostringstream os; - p.skipSpaces(); + string s; while (p.good()) { - if (p.nextToken().cs() == "hline") { - p.getToken(); - os << "\\hline"; - } else if (p.nextToken().cs() == "cline") { - p.getToken(); - os << "\\cline{" << p.verbatimItem() << "}"; - } else + Token const & t = p.getToken(); + if (t.cat() == catNewline) break; - p.skipSpaces(); - }; - //cerr << "read_hlines(), read: '" << os.str() << "'\n"; - //cerr << "read_hlines(), next token: " << p.nextToken() << "\n"; - return os.str(); + s += t.asString(); + } + //cerr << "comment: " << s << "\n"; + p.skipSpaces(); } -/* rather brutish way to code table structure in a string: - - \begin{tabular}{ccc} - 1 & 2 & 3\\ \hline - \multicolumn{2}{c}{4} & 5 // - 6 & 7 \\ - \end{tabular} - - gets "translated" to: - - 1 TAB 2 TAB 3 LINE - \hline HLINE TAB 5 LINE - 5 TAB 7 LINE -*/ - -char const TAB = '\001'; -char const LINE = '\002'; -char const HLINE = '\004'; - -const char * known_languages[] = { "austrian", "babel", "bahasa", "basque", -"breton", "british", "bulgarian", "catalan", "croatian", "czech", "danish", -"dutch", "english", "esperanto", "estonian", "finnish", "francais", -"frenchb", "galician", "german", "germanb", "greek", "hebcal", "hebfont", -"hebrew", "hebrew_newcode", "hebrew_oldcode", "hebrew_p", "hyphen", -"icelandic", "irish", "italian", "latin", "lgrcmr", "lgrcmro", "lgrcmss", -"lgrcmtt", "lgrenc", "lgrlcmss", "lgrlcmtt", "lheclas", "lhecmr", -"lhecmss", "lhecmtt", "lhecrml", "lheenc", "lhefr", "lheredis", "lheshold", -"lheshscr", "lheshstk", "lsorbian", "magyar", "naustrian", "ngermanb", -"ngerman", "norsk", "polish", "portuges", "rlbabel", "romanian", -"russianb", "samin", "scottish", "serbian", "slovak", "slovene", "spanish", -"swedish", "turkish", "ukraineb", "usorbian", "welsh", 0}; - -char const * known_fontsizes[] = { "10pt", "11pt", "12pt", 0 }; - -char const * known_headings[] = { "caption", "title", "author", "date", -"paragraph", "chapter", "section", "subsection", "subsubsection", 0 }; - -char const * known_math_envs[] = { "equation", "equation*", -"eqnarray", "eqnarray*", "align", "align*", 0}; - -char const * known_latex_commands[] = { "ref", "cite", "label", "index", -"printindex", "pageref", "url", 0 }; - -// LaTeX names for quotes -char const * known_quotes[] = { "glqq", "grqq", "quotedblbase", -"textquotedblleft", 0}; - -// the same as known_quotes with .lyx names -char const * known_coded_quotes[] = { "gld", "grd", "gld", "grd", 0}; - - -// some ugly stuff -ostringstream h_preamble; -string h_textclass = "article"; -string h_options = ""; -string h_language = "english"; -string h_inputencoding = "latin1"; -string h_fontscheme = "default"; -string h_graphics = "default"; -string h_paperfontsize = "default"; -string h_spacing = "single"; -string h_papersize = "default"; -string h_paperpackage = "default"; -string h_use_geometry = "0"; -string h_use_amsmath = "0"; -string h_use_natbib = "0"; -string h_use_numerical_citations = "0"; -string h_paperorientation = "portrait"; -string h_secnumdepth = "3"; -string h_tocdepth = "3"; -string h_paragraph_separation = "indent"; -string h_defskip = "medskip"; -string h_quotes_language = "english"; -string h_quotes_times = "2"; -string h_papercolumns = "1"; -string h_papersides = "1"; -string h_paperpagestyle = "default"; -string h_tracking_changes = "0"; - -// current stack of nested environments -stack active_environments; - - -string cap(string s) -{ - if (s.size()) - s[0] = toupper(s[0]); - return s; -} - - -string const trim(string const & a, char const * p = " \t\n\r") +string const trim(string const & a, char const * p) { // lyx::Assert(p); @@ -227,7 +66,7 @@ string const trim(string const & a, char const * p = " \t\n\r") } -void split(string const & s, vector & result, char delim = ',') +void split(string const & s, vector & result, char delim) { //cerr << "split 1: '" << s << "'\n"; istringstream is(s); @@ -238,22 +77,6 @@ void split(string const & s, vector & result, char delim = ',') } -// splits "x=z, y=b" into a map -map split_map(string const & s) -{ - map res; - vector v; - split(s, v); - for (size_t i = 0; i < v.size(); ++i) { - size_t const pos = v[i].find('='); - string const index = v[i].substr(0, pos); - string const value = v[i].substr(pos + 1, string::npos); - res[trim(index)] = trim(value); - } - return res; -} - - string join(vector const & input, char const * delim) { ostringstream os; @@ -275,41 +98,26 @@ char const ** is_known(string const & str, char const ** what) } -void handle_opt(vector & opts, char const ** what, string & target) -{ - if (opts.empty()) - return; - for ( ; *what; ++what) { - vector::iterator it = find(opts.begin(), opts.end(), *what); - if (it != opts.end()) { - //cerr << "### found option '" << *what << "'\n"; - target = *what; - opts.erase(it); - return; - } - } -} +// current stack of nested environments +stack active_environments; -bool is_math_env(string const & name) +void active_environments_push(std::string const & name) { - for (char const ** what = known_math_envs; *what; ++what) - if (*what == name) - return true; - return false; + active_environments.push(name); } -void begin_inset(ostream & os, string const & name) +void active_environments_pop() { - os << "\n\\begin_inset " << name; + active_environments.pop(); } -void end_inset(ostream & os) +bool active_environments_empty() { - os << "\n\\end_inset\n\n"; + return active_environments.empty(); } @@ -319,1147 +127,9 @@ string curr_env() } -void handle_ert(ostream & os, string const & s) -{ - begin_inset(os, "ERT"); - os << "\nstatus Collapsed\n\n\\layout Standard\n\n"; - for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) { - if (*it == '\\') - os << "\n\\backslash\n"; - else - os << *it; - } - end_inset(os); -} - - -void handle_par(ostream & os) -{ - if (active_environments.empty()) - return; - os << "\n\\layout "; - string s = curr_env(); - if (s == "document" || s == "table") - os << "Standard\n\n"; - else if (s == "lyxcode") - os << "LyX-Code\n\n"; - else if (s == "lyxlist") - os << "List\n\n"; - else if (s == "thebibliography") - os << "Bibliography\n\n"; - else - os << cap(s) << "\n\n"; -} - - -void handle_package(string const & name, string const & options) -{ - //cerr << "handle_package: '" << name << "'\n"; - if (name == "a4wide") { - h_papersize = "a4paper"; - h_paperpackage = "widemarginsa4"; - } else if (name == "ae") - h_fontscheme = "ae"; - else if (name == "aecompl") - h_fontscheme = "ae"; - else if (name == "amsmath") - h_use_amsmath = "1"; - else if (name == "amssymb") - h_use_amsmath = "1"; - else if (name == "babel") - ; // ignore this - else if (name == "fontenc") - ; // ignore this - else if (name == "inputenc") - h_inputencoding = options; - else if (name == "makeidx") - ; // ignore this - else if (name == "verbatim") - ; // ignore this - else if (is_known(name, known_languages)) { - h_language = name; - h_quotes_language = name; - } else { - if (options.size()) - h_preamble << "\\usepackage[" << options << "]{" << name << "}\n"; - else - h_preamble << "\\usepackage{" << name << "}\n"; - } -} - - -bool handle_colalign(Parser & p, vector & colinfo) -{ - if (p.getToken().cat() != catBegin) - cerr << "wrong syntax for table column alignment. '{' expected\n"; - - string nextalign = "block"; - bool leftline = false; - for (Token t = p.getToken(); p.good() && t.cat() != catEnd; t = p.getToken()){ -#ifdef FILEDEBUG - cerr << "t: " << t << " c: '" << t.character() << "'\n"; -#endif - - switch (t.character()) { - case 'c': - colinfo.push_back(ColInfo()); - colinfo.back().align = "center"; - break; - case 'l': - colinfo.push_back(ColInfo()); - colinfo.back().align = "left"; - break; - case 'r': - colinfo.push_back(ColInfo()); - colinfo.back().align = "right"; - break; - case 'p': - colinfo.push_back(ColInfo()); - colinfo.back().align = nextalign; - colinfo.back().width = p.verbatimItem(); - nextalign = "block"; - break; - case '|': - if (colinfo.empty()) - leftline = true; - else - colinfo.back().rightline = true; - break; - case '>': { - string s = p.verbatimItem(); - if (s == "\\raggedleft ") - nextalign = "left"; - else if (s == "\\raggedright ") - nextalign = "right"; - else - cerr << "unknown '>' column '" << s << "'\n"; - break; - } - default: - cerr << "ignoring special separator '" << t << "'\n"; - break; - } - } - return leftline; -} - - - -void handle_tabular(Parser & p, ostream & os, mode_type mode) -{ - begin_inset(os, "Tabular \n"); - string posopts = p.getOpt(); - if (posopts.size()) - cerr << "vertical tabular positioning '" << posopts << "' ignored\n"; - - vector colinfo; - - // handle column formatting - bool leftline = handle_colalign(p, colinfo); - - // handle initial hlines - - // first scan of cells - // use table mode to keep it minimal-invasive - // not exactly what's TeX doing... - vector lines; - ostringstream ss; - ss << read_hlines(p) << HLINE; // handle initial hlines - parse(p, ss, FLAG_END, TABLE_MODE, false); - split(ss.str(), lines, LINE); - - vector< vector > cellinfo(lines.size()); - vector rowinfo(lines.size()); - - // split into rows - //cerr << "// split into rows\n"; - for (size_t row = 0; row < rowinfo.size(); ++row) { - - // init row - vector & cellinfos = cellinfo[row]; - cellinfos.resize(colinfo.size()); - - // split row - vector dummy; - //cerr << "\n########### LINE: " << lines[row] << "########\n"; - split(lines[row], dummy, HLINE); - - // handle horizontal line fragments - if (dummy.size() != 3) { - if (dummy.size() != 1) - cerr << "unexpected dummy size: " << dummy.size() - << " content: " << lines[row] << "\n"; - dummy.resize(3); - } - lines[row] = dummy[1]; - - //cerr << "line: " << row << " above 0: " << dummy[0] << "\n"; - //cerr << "line: " << row << " below 2: " << dummy[2] << "\n"; - //cerr << "line: " << row << " cells 1: " << dummy[1] << "\n"; - - for (int i = 0; i <= 2; i += 2) { - //cerr << " reading from line string '" << dummy[i] << "'\n"; - Parser p1(dummy[i]); - while (p1.good()) { - Token t = p1.getToken(); - //cerr << "read token: " << t << "\n"; - if (t.cs() == "hline") { - if (i == 0) { - rowinfo[row].topline = true; - for (size_t col = 0; col < colinfo.size(); ++col) - cellinfos[col].topline = true; - } else { - rowinfo[row].bottomline = true; - for (size_t col = 0; col < colinfo.size(); ++col) - cellinfos[col].bottomline = true; - } - } else if (t.cs() == "cline") { - string arg = p1.verbatimItem(); - //cerr << "read cline arg: '" << arg << "'\n"; - vector t; - split(arg, t, '-'); - t.resize(2); - size_t from = string2int(t[0]); - size_t to = string2int(t[1]); - for (size_t col = from; col < to; ++col) { - if (i == 0) - cellinfos[col].topline = true; - else - cellinfos[col].bottomline = true; - } - } else { - cerr << "unexpected line token: " << t << endl; - } - } - } - - // split into cells - vector cells; - split(lines[row], cells, TAB); - for (size_t col = 0, cell = 0; cell < cells.size() && col < colinfo.size(); ++col, ++cell) { - //cerr << "cell content: " << cells[cell] << "\n"; - Parser p(cells[cell]); - p.skipSpaces(); - //cerr << "handling cell: " << p.nextToken().cs() << " '" << - //cells[cell] << "'\n"; - if (p.nextToken().cs() == "multicolumn") { - // how many cells? - p.getToken(); - size_t ncells = string2int(p.verbatimItem()); - - // special cell properties alignment - vector t; - bool leftline = handle_colalign(p, t); - CellInfo & ci = cellinfos[col]; - ci.multi = 1; - ci.align = t.front().align; - ci.content = parse(p, FLAG_ITEM, mode, false); - ci.leftline = leftline; - ci.rightline = t.front().rightline; - - // add dummy cells for multicol - for (size_t i = 0; i < ncells - 1 && col < colinfo.size(); ++i) { - ++col; - cellinfos[col].multi = 2; - cellinfos[col].align = "center"; - } - } else { - cellinfos[col].content = parse(p, FLAG_ITEM, mode, false); - } - } - - cellinfo.push_back(cellinfos); - - //cerr << "// handle almost empty last row what we have\n"; - // handle almost empty last row - if (row && lines[row].empty() && row + 1 == rowinfo.size()) { - //cerr << "remove empty last line\n"; - if (rowinfo[row].topline); - rowinfo[row - 1].bottomline = true; - for (size_t col = 0; col < colinfo.size(); ++col) - if (cellinfo[row][col].topline) - cellinfo[row - 1][col].bottomline = true; - rowinfo.pop_back(); - } - - } - - //cerr << "// output what we have\n"; - // output what we have - os << "\n" - << "\n"; - - //cerr << "// after header\n"; - for (size_t col = 0; col < colinfo.size(); ++col) { - os << "\n"; - } - //cerr << "// after cols\n"; - - for (size_t row = 0; row < rowinfo.size(); ++row) { - os << "\n"; - for (size_t col = 0; col < colinfo.size(); ++col) { - CellInfo const & cell = cellinfo[row][col]; - os << ""; - begin_inset(os, "Text"); - os << "\n\n\\layout Standard\n\n"; - os << cell.content; - end_inset(os); - os << "\n"; - } - os << "\n"; - } - - os << "\n"; - end_inset(os); -} - - -void end_preamble(ostream & os) -{ - os << "# tex2lyx 0.0.2 created this file\n" - << "\\lyxformat 222\n" - << "\\textclass " << h_textclass << "\n" - << "\\begin_preamble\n" << h_preamble.str() << "\n\\end_preamble\n" - << "\\options " << h_options << "\n" - << "\\language " << h_language << "\n" - << "\\inputencoding " << h_inputencoding << "\n" - << "\\fontscheme " << h_fontscheme << "\n" - << "\\graphics " << h_graphics << "\n" - << "\\paperfontsize " << h_paperfontsize << "\n" - << "\\spacing " << h_spacing << "\n" - << "\\papersize " << h_papersize << "\n" - << "\\paperpackage " << h_paperpackage << "\n" - << "\\use_geometry " << h_use_geometry << "\n" - << "\\use_amsmath " << h_use_amsmath << "\n" - << "\\use_natbib " << h_use_natbib << "\n" - << "\\use_numerical_citations " << h_use_numerical_citations << "\n" - << "\\paperorientation " << h_paperorientation << "\n" - << "\\secnumdepth " << h_secnumdepth << "\n" - << "\\tocdepth " << h_tocdepth << "\n" - << "\\paragraph_separation " << h_paragraph_separation << "\n" - << "\\defskip " << h_defskip << "\n" - << "\\quotes_language " << h_quotes_language << "\n" - << "\\quotes_times " << h_quotes_times << "\n" - << "\\papercolumns " << h_papercolumns << "\n" - << "\\papersides " << h_papersides << "\n" - << "\\paperpagestyle " << h_paperpagestyle << "\n" - << "\\tracking_changes " << h_tracking_changes << "\n"; -} - - -void parse_preamble(Parser & p, ostream & os) -{ - while (p.good()) { - Token const & t = p.getToken(); - -#ifdef FILEDEBUG - cerr << "t: " << t << " flags: " << flags << "\n"; - //cell->dump(); -#endif - - // - // cat codes - // - if (t.cat() == catLetter || - t.cat() == catSpace || - t.cat() == catSuper || - t.cat() == catSub || - t.cat() == catOther || - t.cat() == catMath || - t.cat() == catActive || - t.cat() == catBegin || - t.cat() == catEnd || - t.cat() == catAlign || - t.cat() == catNewline || - t.cat() == catParameter) - h_preamble << t.character(); - - else if (t.cat() == catComment) { - string s; - while (p.good()) { - Token const & t = p.getToken(); - if (t.cat() == catNewline) - break; - s += t.asString(); - } - //cerr << "comment\n"; - p.skipSpaces(); - } - - else if (t.cs() == "pagestyle") - h_paperpagestyle == p.verbatimItem(); - - else if (t.cs() == "makeatletter") { - p.setCatCode('@', catLetter); - h_preamble << "\\makeatletter\n"; - } - - else if (t.cs() == "makeatother") { - p.setCatCode('@', catOther); - h_preamble << "\\makeatother\n"; - } - - else if (t.cs() == "newcommand" || t.cs() == "renewcommand" - || t.cs() == "providecommand") { - bool star = false; - if (p.nextToken().character() == '*') { - p.getToken(); - star = true; - } - string const name = p.verbatimItem(); - string const opts = p.getOpt(); - string const body = p.verbatimItem(); - // only non-lyxspecific stuff - if (name != "\\noun " - && name != "\\tabularnewline " - && name != "\\LyX " - && name != "\\lyxline " - && name != "\\lyxaddress " - && name != "\\lyxrightaddress " - && name != "\\boldsymbol " - && name != "\\lyxarrow ") { - ostringstream ss; - ss << '\\' << t.cs(); - if (star) - ss << '*'; - ss << '{' << name << '}' << opts << '{' << body << "}\n"; - h_preamble << ss.str(); -/* - ostream & out = in_preamble ? h_preamble : os; - out << "\\" << t.cs() << "{" << name << "}" - << opts << "{" << body << "}\n"; - if (!in_preamble) - end_inset(os); -*/ - } - } - - else if (t.cs() == "documentclass") { - vector opts; - split(p.getArg('[', ']'), opts, ','); - handle_opt(opts, known_languages, h_language); - handle_opt(opts, known_fontsizes, h_paperfontsize); - h_quotes_language = h_language; - h_options = join(opts, ","); - h_textclass = p.getArg('{', '}'); - } - - else if (t.cs() == "usepackage") { - string const options = p.getArg('[', ']'); - string const name = p.getArg('{', '}'); - if (options.empty() && name.find(',')) { - vector vecnames; - split(name, vecnames, ','); - vector::const_iterator it = vecnames.begin(); - vector::const_iterator end = vecnames.end(); - for (; it != end; ++it) - handle_package(trim(*it), string()); - } else { - handle_package(name, options); - } - } - - else if (t.cs() == "newenvironment") { - string const name = p.getArg('{', '}'); - ostringstream ss; - ss << "\\newenvironment{" << name << "}"; - ss << p.getOpt(); - ss << p.getOpt(); - ss << '{' << p.verbatimItem() << '}'; - ss << '{' << p.verbatimItem() << '}'; - ss << '\n'; - if (name != "lyxcode" && name != "lyxlist" - && name != "lyxrightadress" && name != "lyxaddress") - h_preamble << ss.str(); - } - - else if (t.cs() == "def") { - string name = p.getToken().cs(); - while (p.nextToken().cat() != catBegin) - name += p.getToken().asString(); - h_preamble << "\\def\\" << name << '{' << p.verbatimItem() << "}\n"; - } - - else if (t.cs() == "setcounter") { - string const name = p.getArg('{', '}'); - string const content = p.getArg('{', '}'); - if (name == "secnumdepth") - h_secnumdepth = content; - else if (name == "tocdepth") - h_tocdepth = content; - else - h_preamble << "\\setcounter{" << name << "}{" << content << "}\n"; - } - - else if (t.cs() == "setlength") { - string const name = p.verbatimItem(); - string const content = p.verbatimItem(); - if (name == "parskip") - h_paragraph_separation = "skip"; - else if (name == "parindent") - h_paragraph_separation = "skip"; - else - h_preamble << "\\setlength{" + name + "}{" + content + "}\n"; - } - - else if (t.cs() == "par") - h_preamble << '\n'; - - else if (t.cs() == "begin") { - string const name = p.getArg('{', '}'); - if (name == "document") { - end_preamble(os); - os << "\n\n\\layout Standard\n\n"; - return; - } - h_preamble << "\\begin{" << name << "}"; - } - - else if (t.cs().size()) - h_preamble << '\\' << t.cs() << ' '; - } -} - - -void parse(Parser & p, ostream & os, unsigned flags, const mode_type mode, -bool outer) -{ - string hlines; - - while (p.good()) { - Token const & t = p.getToken(); - -#ifdef FILEDEBUG - cerr << "t: " << t << " flags: " << flags << "\n"; -#endif - - if (flags & FLAG_ITEM) { - if (t.cat() == catSpace) - continue; - - flags &= ~FLAG_ITEM; - if (t.cat() == catBegin) { - // skip the brace and collect everything to the next matching - // closing brace - flags |= FLAG_BRACE_LAST; - continue; - } - - // handle only this single token, leave the loop if done - flags |= FLAG_LEAVE; - } - - - // - // cat codes - // - if (t.cat() == catMath) { - if (mode == TEXT_MODE || mode == MATHTEXT_MODE) { - // we are inside some text mode thingy, so opening new math is allowed - if (mode == TEXT_MODE) - begin_inset(os, "Formula "); - Token const & n = p.getToken(); - if (n.cat() == catMath && outer) { - // TeX's $$...$$ syntax for displayed math - os << "\\["; - parse(p, os, FLAG_SIMPLE, MATH_MODE, outer); - os << "\\]"; - p.getToken(); // skip the second '$' token - } else { - // simple $...$ stuff - p.putback(); - os << '$'; - parse(p, os, FLAG_SIMPLE, MATH_MODE, outer); - os << '$'; - } - if (mode == TEXT_MODE) - end_inset(os); - } - - else if (mode == TABLE_MODE) { - os << '$'; - } - - else if (flags & FLAG_SIMPLE) { - // this is the end of the formula - return; - } - - else { - cerr << "\nmode: " << mode << endl; - p.error("something strange in the parser\n"); - break; - } - } - - else if (t.cat() == catLetter || - t.cat() == catSpace || - t.cat() == catSuper || - t.cat() == catSub || - t.cat() == catOther || - t.cat() == catParameter) - os << t.character(); - - else if (t.cat() == catNewline) { - if (p.nextToken().cat() == catNewline) { - p.getToken(); - handle_par(os); - } else { - os << "\n "; // note the space - } - } - - else if (t.cat() == catActive) { - if (t.character() == '~') { - if (curr_env() == "lyxcode") - os << ' '; - else if (mode == TEXT_MODE) - os << "\\SpecialChar ~\n"; - else - os << '~'; - } else - os << t.character(); - } - - else if (t.cat() == catBegin) { - if (mode == TEXT_MODE) { - handle_ert(os, "{"); - parse(p, os, FLAG_BRACE_LAST, mode, outer); - handle_ert(os, "}"); - } else { - os << '{'; - } - } - - else if (t.cat() == catEnd) { - if (flags & FLAG_BRACE_LAST) - return; - if (mode == TEXT_MODE) - handle_ert(os, "}"); - else - os << '}'; - } - - else if (t.cat() == catAlign) { - if (mode == TABLE_MODE) - os << TAB; - else - os << t.character(); - } - - else if (t.cs() == "tabularnewline") { - if (mode == TABLE_MODE) { - // stuff before the line break - // and look ahead for stuff after the line break - os << HLINE << hlines << HLINE << LINE << read_hlines(p) << HLINE; - hlines.clear(); - } else { - os << t.asInput(); - } - } - - else if (t.cs() == "\\" && mode == MATH_MODE) - os << t.asInput(); - - else if (t.cs() == "\\" && mode == TEXT_MODE && curr_env() == "tabular") - os << LINE; - - else if (t.cat() == catOther) - os << string(1, t.character()); - - else if (t.cat() == catComment) { - string s; - while (p.good()) { - Token const & t = p.getToken(); - if (t.cat() == catNewline) - break; - s += t.asString(); - } - //cerr << "comment\n"; - p.skipSpaces(); - } - - // - // control sequences - // - - else if (t.cs() == "ldots" && mode == MATH_MODE) - os << "\n\\SpecialChar \\ldots{}\n"; - - else if (t.cs() == "lyxlock") - ; // ignored - - else if (t.cs() == "makeatletter") { - p.setCatCode('@', catLetter); - handle_ert(os, "\\makeatletter\n"); - } - - else if (t.cs() == "makeatother") { - p.setCatCode('@', catOther); - handle_ert(os, "\\makeatother\n"); - } - - else if (t.cs() == "newcommand" || t.cs() == "renewcommand" - || t.cs() == "providecommand") { - string const name = p.verbatimItem(); - string const opts = p.getOpt(); - string const body = p.verbatimItem(); - // only non-lyxspecific stuff - if (name != "\\noun " && name != "\\tabularnewline ") { - ostringstream ss; - ss << '\\' << t.cs() << '{' << name << '}' - << opts << '{' << body << "}\n"; - handle_ert(os, ss.str()); -/* - ostream & out = in_preamble ? h_preamble : os; - if (!in_preamble) - begin_inset(os, "FormulaMacro\n"); - out << "\\" << t.cs() << "{" << name << "}" - << opts << "{" << body << "}\n"; - if (!in_preamble) - end_inset(os); -*/ - } - } - - else if (t.cs() == "newtheorem") { - ostringstream ss; - ss << "\\newtheorem"; - ss << '{' << p.verbatimItem() << '}'; - ss << p.getOpt(); - ss << '{' << p.verbatimItem() << '}'; - ss << p.getOpt(); - ss << '\n'; - handle_ert(os, ss.str()); - } - - else if (t.cs() == "(") { - begin_inset(os, "Formula"); - os << " \\("; - parse(p, os, FLAG_SIMPLE2, MATH_MODE, outer); - os << "\\)"; - end_inset(os); - } - - else if (t.cs() == "[" && mode == TEXT_MODE) { - begin_inset(os, "Formula"); - os << " \\["; - parse(p, os, FLAG_EQUATION, MATH_MODE, outer); - os << "\\]"; - end_inset(os); - } - - else if (t.cs() == "protect") - // ignore \\protect, will hopefully be re-added during output - ; - - else if (t.cs() == "begin") { - string const name = p.getArg('{', '}'); - active_environments.push(name); - if (name == "abstract") { - handle_par(os); - parse(p, os, FLAG_END, mode, outer); - } else if (is_math_env(name)) { - begin_inset(os, "Formula "); - os << "\\begin{" << name << "}"; - parse(p, os, FLAG_END, MATH_MODE, outer); - os << "\\end{" << name << "}"; - end_inset(os); - } else if (name == "tabular") { - if (mode == TEXT_MODE) - handle_tabular(p, os, mode); - else { - os << "\\begin{" << name << "}"; - parse(p, os, FLAG_END, MATHTEXT_MODE, outer); - os << "\\end{" << name << "}"; - } - } else if (name == "table" || name == "figure") { - string opts = p.getOpt(); - begin_inset(os, "Float " + name + "\n"); - if (opts.size()) - os << "placement " << opts << '\n'; - os << "wide false\n" - << "collapsed false\n" - << "\n" - << "\\layout Standard\n"; - parse(p, os, FLAG_END, mode, outer); - end_inset(os); - } else if (name == "lyxlist") { - p.verbatimItem(); // swallow next arg - parse(p, os, FLAG_END, mode, outer); - os << "\n\\layout Bibliography\n\n"; - } else if (name == "thebibliography") { - p.verbatimItem(); // swallow next arg - parse(p, os, FLAG_END, mode, outer); - os << "\n\\layout Bibliography\n\n"; - } else if (mode == MATH_MODE || mode == MATHTEXT_MODE) { - os << "\\begin{" << name << "}"; - parse(p, os, FLAG_END, mode, outer); - os << "\\end{" << name << "}"; - } else { - parse(p, os, FLAG_END, mode, outer); - } - } - - else if (t.cs() == "end") { - if (flags & FLAG_END) { - // eat environment name - string const name = p.getArg('{', '}'); - if (name != curr_env()) - p.error("\\end{" + name + "} does not match \\begin{" - + curr_env() + "}"); - active_environments.pop(); - return; - } - p.error("found 'end' unexpectedly"); - } - - else if (t.cs() == "item") - handle_par(os); - - else if (t.cs() == ")") { - if (flags & FLAG_SIMPLE2) - return; - p.error("found '\\)' unexpectedly"); - } - - else if (t.cs() == "]") { - if (flags & FLAG_EQUATION) - return; - p.error("found '\\]' unexpectedly"); - } - - else if (t.cs() == "documentclass") { - vector opts; - split(p.getArg('[', ']'), opts, ','); - handle_opt(opts, known_languages, h_language); - handle_opt(opts, known_fontsizes, h_paperfontsize); - h_quotes_language = h_language; - h_options = join(opts, ","); - h_textclass = p.getArg('{', '}'); - } - - else if (t.cs() == "usepackage") { - string const options = p.getArg('[', ']'); - string const name = p.getArg('{', '}'); - if (options.empty() && name.find(',')) { - vector vecnames; - split(name, vecnames, ','); - vector::const_iterator it = vecnames.begin(); - vector::const_iterator end = vecnames.end(); - for (; it != end; ++it) - handle_package(trim(*it), string()); - } else { - handle_package(name, options); - } - } - - else if (t.cs() == "def") { - string name = p.getToken().cs(); - while (p.nextToken().cat() != catBegin) - name += p.getToken().asString(); - handle_ert(os, "\\def\\" + name + '{' + p.verbatimItem() + '}'); - } - - else if (t.cs() == "par") - handle_par(os); - - else if (is_known(t.cs(), known_headings)) { - string name = t.cs(); - if (p.nextToken().asInput() == "*") { - p.getToken(); - name += "*"; - } - os << "\n\n\\layout " << cap(name) << "\n\n"; - string opt = p.getOpt(); - if (opt.size()) { - begin_inset(os, "OptArg\n"); - os << "collapsed true\n\n\\layout Standard\n\n" << opt; - end_inset(os); - } - parse(p, os, FLAG_ITEM, mode, outer); - os << "\n\n\\layout Standard\n\n"; - } - - else if (t.cs() == "includegraphics") { - if (mode == TEXT_MODE) { - map opts = split_map(p.getArg('[', ']')); - string name = p.verbatimItem(); - begin_inset(os, "Graphics "); - os << "\n\tfilename " << name << '\n'; - if (opts.find("width") != opts.end()) - os << "\twidth " << opts["width"] << '\n'; - if (opts.find("height") != opts.end()) - os << "\theight " << opts["height"] << '\n'; - end_inset(os); - } else { - os << "\\includegraphics "; - } - } - - else if (t.cs() == "footnote") { - begin_inset(os, "Foot\n"); - os << "collapsed true\n\n\\layout Standard\n\n"; - parse(p, os, FLAG_ITEM, mode, false); - end_inset(os); - } - - else if (t.cs() == "makeindex" || t.cs() == "maketitle") - ; // swallow this - - else if (t.cs() == "tableofcontents") - p.verbatimItem(); // swallow this - - else if (t.cs() == "hline" && mode == TABLE_MODE) - hlines += "\\hline"; - - else if (t.cs() == "cline" && mode == TABLE_MODE) - hlines += "\\cline{" + p.verbatimItem() + '}'; - - else if (t.cs() == "tiny" && mode == TEXT_MODE) - os << "\n\\size tiny\n"; - - else if (t.cs() == "scriptsize" && mode == TEXT_MODE) - os << "\n\\size scriptsize\n"; - - else if (t.cs() == "Large" && mode == TEXT_MODE) - os << "\n\\size larger\n"; - - else if (t.cs() == "textrm") { - if (mode == TEXT_MODE) { - os << "\n\\family roman\n"; - parse(p, os, FLAG_ITEM, TEXT_MODE, outer); - os << "\n\\family default\n"; - } else { - os << '\\' << t.cs() << '{'; - parse(p, os, FLAG_ITEM, MATHTEXT_MODE, outer); - os << '}'; - } - } - - else if (t.cs() == "textsf") { - if (mode == TEXT_MODE) { - os << "\n\\family sans\n"; - parse(p, os, FLAG_ITEM, TEXT_MODE, outer); - os << "\n\\family default\n"; - } else { - os << '\\' << t.cs() << '{'; - parse(p, os, FLAG_ITEM, MATHTEXT_MODE, outer); - os << '}'; - } - } - - else if (t.cs() == "texttt") { - if (mode == TEXT_MODE) { - os << "\n\\family typewriter\n"; - parse(p, os, FLAG_ITEM, TEXT_MODE, outer); - os << "\n\\family default\n"; - } else { - os << '\\' << t.cs() << '{'; - parse(p, os, FLAG_ITEM, MATHTEXT_MODE, outer); - os << '}'; - } - } - - else if (t.cs() == "textsc") { - if (mode == TEXT_MODE) { - os << "\n\\noun on\n"; - parse(p, os, FLAG_ITEM, TEXT_MODE, outer); - os << "\n\\noun default\n"; - } else { - os << '\\' << t.cs() << '{'; - parse(p, os, FLAG_ITEM, MATHTEXT_MODE, outer); - os << '}'; - } - } - - else if (t.cs() == "textbf") { - if (mode == TEXT_MODE) { - os << "\n\\series bold\n"; - parse(p, os, FLAG_ITEM, TEXT_MODE, outer); - os << "\n\\series default\n"; - } else { - os << '\\' << t.cs() << '{'; - parse(p, os, FLAG_ITEM, MATHTEXT_MODE, outer); - os << '}'; - } - } - - else if (t.cs() == "underbar") { - if (mode == TEXT_MODE) { - os << "\n\\bar under\n"; - parse(p, os, FLAG_ITEM, TEXT_MODE, outer); - os << "\n\\bar default\n"; - } else { - os << '\\' << t.cs() << '{'; - parse(p, os, FLAG_ITEM, MATHTEXT_MODE, outer); - os << '}'; - } - } - - else if ((t.cs() == "emph" || t.cs() == "noun") && mode == TEXT_MODE) { - os << "\n\\" << t.cs() << " on\n"; - parse(p, os, FLAG_ITEM, mode, outer); - os << "\n\\" << t.cs() << " default\n"; - } - - else if (t.cs() == "mbox" && mode != TEXT_MODE) { - os << "\n\\mbox{"; - parse(p, os, FLAG_ITEM, MATHTEXT_MODE, outer); - os << '}'; - } - - else if (is_known(t.cs(), known_latex_commands) && mode == TEXT_MODE) { - begin_inset(os, "LatexCommand "); - os << '\\' << t.cs(); - os << p.getOpt(); - os << p.getOpt(); - os << '{' << p.verbatimItem() << '}'; - end_inset(os); - } - - else if (t.cs() == "bibitem") { - os << "\n\\layout Bibliography\n\\bibitem "; - os << p.getOpt(); - os << '{' << p.verbatimItem() << '}' << "\n\n"; - } - - else if (mode == TEXT_MODE && is_known(t.cs(), known_quotes)) { - char const ** where = is_known(t.cs(), known_quotes); - begin_inset(os, "Quotes "); - os << known_coded_quotes[where - known_quotes]; - end_inset(os); - } - - else if (t.cs() == "LyX" && mode == TEXT_MODE) { - p.verbatimItem(); // eat {} - os << "LyX"; - } - - else if (t.cs() == "TeX" && mode == TEXT_MODE) { - p.verbatimItem(); // eat {} - os << "TeX"; - } - - else if (t.cs() == "LaTeX" && mode == TEXT_MODE) { - p.verbatimItem(); // eat {} - os << "LaTeX"; - } - - else if (t.cs() == "LaTeXe" && mode == TEXT_MODE) { - p.verbatimItem(); // eat {} - os << "LaTeXe"; - } - - else if (t.cs() == "lyxarrow" && mode == TEXT_MODE) { - p.verbatimItem(); - os << "\\SpecialChar \\menuseparator\n"; - } - - else if (t.cs() == "ldots" && mode == TEXT_MODE) { - p.verbatimItem(); - os << "\\SpecialChar \\ldots{}\n"; - } - - else if (t.cs() == "@" && mode == TEXT_MODE) - os << "\\SpecialChar \\@"; - - else if (t.cs() == "textasciitilde" && mode == TEXT_MODE) - os << '~'; - - else if (t.cs() == "_" && mode == TEXT_MODE) - os << '_'; - - else if (t.cs() == "&" && mode == TEXT_MODE) - os << '&'; - - else if (t.cs() == "#" && mode == TEXT_MODE) - os << "#"; - - else if (t.cs() == "\"") { - string const name = p.verbatimItem(); - if (name == "a") os << 'ä'; - else if (name == "o") os << 'ö'; - else if (name == "u") os << 'ü'; - else if (name == "A") os << 'Ä'; - else if (name == "O") os << 'Ö'; - else if (name == "U") os << 'Ü'; - else handle_ert(os, "\"{" + name + "}"); - } - - else if (t.cs() == "ss") - os << "ß"; - - else if (t.cs() == "input") - handle_ert(os, "\\input{" + p.verbatimItem() + "}\n"); - - else if (t.cs() == "fancyhead") { - ostringstream ss; - ss << "\\fancyhead"; - ss << p.getOpt(); - ss << '{' << p.verbatimItem() << "}\n"; - handle_ert(os, ss.str()); - } - - else { - //cerr << "#: " << t << " mode: " << mode << endl; - if (mode == TEXT_MODE) { - // heuristic: read up to next non-nested space - /* - string s = t.asInput(); - string z = p.verbatimItem(); - while (p.good() && z != " " && z.size()) { - //cerr << "read: " << z << endl; - s += z; - z = p.verbatimItem(); - } - cerr << "found ERT: " << s << endl; - handle_ert(os, s + ' '); - */ - handle_ert(os, t.asInput() + ' '); - } else { - os << t.asInput(); - //cerr << "#: writing: '" << t.asInput() << "'\n"; - } - } - - if (flags & FLAG_LEAVE) { - flags &= ~FLAG_LEAVE; - break; - } - } -} - -} // anonymous namespace +//} // anonymous namespace int main(int argc, char * argv[]) @@ -1473,7 +143,7 @@ int main(int argc, char * argv[]) Parser p(is); parse_preamble(p, cout); active_environments.push("document"); - parse(p, cout, FLAG_END, TEXT_MODE, true); + parse_text(p, cout, FLAG_END, true); cout << "\n\\the_end"; return 0; diff --git a/src/tex2lyx/text.C b/src/tex2lyx/text.C new file mode 100644 index 0000000000..9ca5237f75 --- /dev/null +++ b/src/tex2lyx/text.C @@ -0,0 +1,495 @@ +/** The .tex to .lyx converter + \author André Pönitz (2003) + */ + +// {[( + +#include + +#include "Lsstream.h" +#include "tex2lyx.h" + +#include +#include +#include +#include + +using std::cerr; +using std::endl; +using std::map; +using std::ostream; +using std::ostringstream; +using std::string; +using std::vector; + + +namespace { + +char const * known_headings[] = { "caption", "title", "author", "date", +"paragraph", "chapter", "section", "subsection", "subsubsection", 0 }; + +char const * known_latex_commands[] = { "ref", "cite", "label", "index", +"printindex", "pageref", "url", 0 }; + +// LaTeX names for quotes +char const * known_quotes[] = { "glqq", "grqq", "quotedblbase", +"textquotedblleft", 0}; + +// the same as known_quotes with .lyx names +char const * known_coded_quotes[] = { "gld", "grd", "gld", "grd", 0}; + + +string cap(string s) +{ + if (s.size()) + s[0] = toupper(s[0]); + return s; +} + + +// splits "x=z, y=b" into a map +map split_map(string const & s) +{ + map res; + vector v; + split(s, v); + for (size_t i = 0; i < v.size(); ++i) { + size_t const pos = v[i].find('='); + string const index = v[i].substr(0, pos); + string const value = v[i].substr(pos + 1, string::npos); + res[trim(index)] = trim(value); + } + return res; +} + + +void begin_inset(ostream & os, string const & name) +{ + os << "\n\\begin_inset " << name; +} + + +void end_inset(ostream & os) +{ + os << "\n\\end_inset\n\n"; +} + + +void handle_ert(ostream & os, string const & s) +{ + begin_inset(os, "ERT"); + os << "\nstatus Collapsed\n\n\\layout Standard\n\n"; + for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) { + if (*it == '\\') + os << "\n\\backslash\n"; + else + os << *it; + } + end_inset(os); +} + + +void handle_par(ostream & os) +{ + if (active_environments_empty()) + return; + os << "\n\\layout "; + string s = curr_env(); + if (s == "document" || s == "table") + os << "Standard\n\n"; + else if (s == "lyxcode") + os << "LyX-Code\n\n"; + else if (s == "lyxlist") + os << "List\n\n"; + else if (s == "thebibliography") + os << "Bibliography\n\n"; + else + os << cap(s) << "\n\n"; +} + + +} // anonymous namespace + + +void parse_text(Parser & p, ostream & os, unsigned flags, bool outer) +{ + while (p.good()) { + Token const & t = p.getToken(); + +#ifdef FILEDEBUG + cerr << "t: " << t << " flags: " << flags << "\n"; +#endif + + if (flags & FLAG_ITEM) { + if (t.cat() == catSpace) + continue; + + flags &= ~FLAG_ITEM; + if (t.cat() == catBegin) { + // skip the brace and collect everything to the next matching + // closing brace + flags |= FLAG_BRACE_LAST; + continue; + } + + // handle only this single token, leave the loop if done + flags |= FLAG_LEAVE; + } + + + // + // cat codes + // + if (t.cat() == catMath) { + // we are inside some text mode thingy, so opening new math is allowed + begin_inset(os, "Formula "); + Token const & n = p.getToken(); + if (n.cat() == catMath && outer) { + // TeX's $$...$$ syntax for displayed math + os << "\\["; + parse_math(p, os, FLAG_SIMPLE, MATH_MODE); + os << "\\]"; + p.getToken(); // skip the second '$' token + } else { + // simple $...$ stuff + p.putback(); + os << '$'; + parse_math(p, os, FLAG_SIMPLE, MATH_MODE); + os << '$'; + } + end_inset(os); + } + + else if (t.cat() == catSuper || t.cat() == catSub) + cerr << "catcode " << t << " illegal in text mode\n"; + + else if (t.cat() == catLetter || + t.cat() == catSpace || + t.cat() == catOther || + t.cat() == catAlign || + t.cat() == catParameter) + os << t.character(); + + else if (t.cat() == catNewline) { + if (p.nextToken().cat() == catNewline) { + p.getToken(); + handle_par(os); + } else { + os << "\n "; // note the space + } + } + + else if (t.cat() == catActive) { + if (t.character() == '~') { + if (curr_env() == "lyxcode") + os << ' '; + else + os << "\\SpecialChar ~\n"; + } else + os << t.character(); + } + + else if (t.cat() == catBegin) { + handle_ert(os, "{"); + parse_text(p, os, FLAG_BRACE_LAST, outer); + handle_ert(os, "}"); + } + + else if (t.cat() == catEnd) { + if (flags & FLAG_BRACE_LAST) + return; + cerr << "stray '}' in text\n"; + handle_ert(os, "}"); + } + + else if (t.cat() == catOther) + os << string(1, t.character()); + + else if (t.cat() == catComment) + handle_comment(p); + + // + // control sequences + // + + else if (t.cs() == "ldots") + os << "\n\\SpecialChar \\ldots{}\n"; + + else if (t.cs() == "(") { + begin_inset(os, "Formula"); + os << " \\("; + parse_math(p, os, FLAG_SIMPLE2, MATH_MODE); + os << "\\)"; + end_inset(os); + } + + else if (t.cs() == "[") { + begin_inset(os, "Formula"); + os << " \\["; + parse_math(p, os, FLAG_EQUATION, MATH_MODE); + os << "\\]"; + end_inset(os); + } + + else if (t.cs() == "begin") { + string const name = p.getArg('{', '}'); + active_environments_push(name); + if (name == "abstract") { + handle_par(os); + parse_text(p, os, FLAG_END, outer); + } else if (is_math_env(name)) { + begin_inset(os, "Formula "); + os << "\\begin{" << name << "}"; + parse_math(p, os, FLAG_END, MATH_MODE); + os << "\\end{" << name << "}"; + end_inset(os); + } else if (name == "tabular") { + begin_inset(os, "Tabular "); + handle_tabular(p, os); + end_inset(os); + } else if (name == "table" || name == "figure") { + string opts = p.getOpt(); + begin_inset(os, "Float " + name + "\n"); + if (opts.size()) + os << "placement " << opts << '\n'; + os << "wide false\n" + << "collapsed false\n" + << "\n" + << "\\layout Standard\n"; + parse_text(p, os, FLAG_END, outer); + end_inset(os); + } else if (name == "lyxlist") { + p.verbatimItem(); // swallow next arg + parse_text(p, os, FLAG_END, outer); + os << "\n\\layout Bibliography\n\n"; + } else if (name == "thebibliography") { + p.verbatimItem(); // swallow next arg + parse_text(p, os, FLAG_END, outer); + os << "\n\\layout Bibliography\n\n"; + } else { + parse_text(p, os, FLAG_END, outer); + } + } + + else if (t.cs() == "end") { + if (flags & FLAG_END) { + // eat environment name + string const name = p.getArg('{', '}'); + if (name != curr_env()) + p.error("\\end{" + name + "} does not match \\begin{" + + curr_env() + "}"); + active_environments_pop(); + return; + } + p.error("found 'end' unexpectedly"); + } + + else if (t.cs() == "item") + handle_par(os); + + else if (t.cs() == "def") { + string name = p.getToken().cs(); + while (p.nextToken().cat() != catBegin) + name += p.getToken().asString(); + handle_ert(os, "\\def\\" + name + '{' + p.verbatimItem() + '}'); + } + + else if (t.cs() == "par") + handle_par(os); + + else if (is_known(t.cs(), known_headings)) { + string name = t.cs(); + if (p.nextToken().asInput() == "*") { + p.getToken(); + name += "*"; + } + os << "\n\n\\layout " << cap(name) << "\n\n"; + string opt = p.getOpt(); + if (opt.size()) { + begin_inset(os, "OptArg\n"); + os << "collapsed true\n\n\\layout Standard\n\n" << opt; + end_inset(os); + } + parse_text(p, os, FLAG_ITEM, outer); + os << "\n\n\\layout Standard\n\n"; + } + + else if (t.cs() == "includegraphics") { + map opts = split_map(p.getArg('[', ']')); + string name = p.verbatimItem(); + begin_inset(os, "Graphics "); + os << "\n\tfilename " << name << '\n'; + if (opts.find("width") != opts.end()) + os << "\twidth " << opts["width"] << '\n'; + if (opts.find("height") != opts.end()) + os << "\theight " << opts["height"] << '\n'; + end_inset(os); + } + + else if (t.cs() == "footnote") { + begin_inset(os, "Foot\n"); + os << "collapsed true\n\n\\layout Standard\n\n"; + parse_text(p, os, FLAG_ITEM, false); + end_inset(os); + } + + else if (t.cs() == "makeindex" || t.cs() == "maketitle") + ; // swallow this + + else if (t.cs() == "tableofcontents") + p.verbatimItem(); // swallow this + + else if (t.cs() == "tiny" || t.cs() == "scriptsize") + os << "\n\\size " << t.cs() << "\n"; + + else if (t.cs() == "Large") + os << "\n\\size larger\n"; + + else if (t.cs() == "textrm") { + os << "\n\\family roman\n"; + parse_text(p, os, FLAG_ITEM, outer); + os << "\n\\family default\n"; + } + + else if (t.cs() == "textsf") { + os << "\n\\family sans\n"; + parse_text(p, os, FLAG_ITEM, outer); + os << "\n\\family default\n"; + } + + else if (t.cs() == "texttt") { + os << "\n\\family typewriter\n"; + parse_text(p, os, FLAG_ITEM, outer); + os << "\n\\family default\n"; + } + + else if (t.cs() == "textsc") { + os << "\n\\noun on\n"; + parse_text(p, os, FLAG_ITEM, outer); + os << "\n\\noun default\n"; + } + + else if (t.cs() == "textbf") { + os << "\n\\series bold\n"; + parse_text(p, os, FLAG_ITEM, outer); + os << "\n\\series default\n"; + } + + else if (t.cs() == "underbar") { + os << "\n\\bar under\n"; + parse_text(p, os, FLAG_ITEM, outer); + os << "\n\\bar default\n"; + } + + else if (t.cs() == "emph" || t.cs() == "noun") { + os << "\n\\" << t.cs() << " on\n"; + parse_text(p, os, FLAG_ITEM, outer); + os << "\n\\" << t.cs() << " default\n"; + } + + else if (t.cs() == "bibitem") { + os << "\n\\layout Bibliography\n\\bibitem "; + os << p.getOpt(); + os << '{' << p.verbatimItem() << '}' << "\n\n"; + } + + else if (is_known(t.cs(), known_latex_commands)) { + begin_inset(os, "LatexCommand "); + os << '\\' << t.cs(); + os << p.getOpt(); + os << p.getOpt(); + os << '{' << p.verbatimItem() << '}'; + end_inset(os); + } + + else if (is_known(t.cs(), known_quotes)) { + char const ** where = is_known(t.cs(), known_quotes); + begin_inset(os, "Quotes "); + os << known_coded_quotes[where - known_quotes]; + end_inset(os); + } + + else if (t.cs() == "LyX" || t.cs() == "TeX" + || t.cs() == "LaTeX" || t.cs() == "LaTeXe") { + p.verbatimItem(); // eat {} + os << "LyX"; + } + + else if (t.cs() == "lyxarrow") { + p.verbatimItem(); + os << "\\SpecialChar \\menuseparator\n"; + } + + else if (t.cs() == "ldots") { + p.verbatimItem(); + os << "\\SpecialChar \\ldots{}\n"; + } + + else if (t.cs() == "@") + os << "\\SpecialChar \\@"; + + else if (t.cs() == "textasciitilde") + os << '~'; + + else if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#") + os << t.cs(); + + else if (t.cs() == "\"") { + string const name = p.verbatimItem(); + if (name == "a") os << 'ä'; + else if (name == "o") os << 'ö'; + else if (name == "u") os << 'ü'; + else if (name == "A") os << 'Ä'; + else if (name == "O") os << 'Ö'; + else if (name == "U") os << 'Ü'; + else handle_ert(os, "\"{" + name + "}"); + } + + else if (t.cs() == "ss") + os << "ß"; + + else if (t.cs() == "input") + handle_ert(os, "\\input{" + p.verbatimItem() + "}\n"); + + else if (t.cs() == "fancyhead") { + ostringstream ss; + ss << "\\fancyhead"; + ss << p.getOpt(); + ss << '{' << p.verbatimItem() << "}\n"; + handle_ert(os, ss.str()); + } + + else { + //cerr << "#: " << t << " mode: " << mode << endl; + // heuristic: read up to next non-nested space + /* + string s = t.asInput(); + string z = p.verbatimItem(); + while (p.good() && z != " " && z.size()) { + //cerr << "read: " << z << endl; + s += z; + z = p.verbatimItem(); + } + cerr << "found ERT: " << s << endl; + handle_ert(os, s + ' '); + */ + handle_ert(os, t.asInput() + ' '); + } + + if (flags & FLAG_LEAVE) { + flags &= ~FLAG_LEAVE; + break; + } + } +} + + +string parse_text(Parser & p, unsigned flags, const bool outer) +{ + ostringstream os; + parse_text(p, os, flags, outer); + return os.str(); +} + + +// }]) -- 2.39.2