1 /** The .tex to .lyx converter
2 \author André Pönitz (2003)
11 #include "FloatList.h"
12 #include "support/lstrings.h"
13 #include "support/tostr.h"
24 using std::ostringstream;
25 using std::istringstream;
29 using lyx::support::rtrim;
30 using lyx::support::suffixIs;
33 // thin wrapper around parse_text using a string
34 string parse_text(Parser & p, unsigned flags, const bool outer,
38 parse_text(p, os, flags, outer, context);
42 // parses a subdocument, usually useful in insets (whence the name)
43 void parse_text_in_inset(Parser & p, ostream & os, unsigned flags, bool outer,
46 Context newcontext(true, context.textclass);
47 parse_text(p, os, flags, outer, newcontext);
48 newcontext.check_end_layout(os);
52 // parses a paragraph snippet, useful for example for \emph{...}
53 void parse_text_snippet(Parser & p, ostream & os, unsigned flags, bool outer,
56 Context newcontext(false, context.textclass);
57 parse_text(p, os, flags, outer, newcontext);
58 // should not be needed
59 newcontext.check_end_layout(os);
65 char const * known_latex_commands[] = { "ref", "cite", "label", "index",
66 "printindex", "pageref", "url", "vref", "vpageref", "prettyref", "eqref", 0 };
68 // LaTeX names for quotes
69 char const * known_quotes[] = { "glqq", "grqq", "quotedblbase",
70 "textquotedblleft", "quotesinglbase", "guilsinglleft", "guilsinglright", 0};
72 // the same as known_quotes with .lyx names
73 char const * known_coded_quotes[] = { "gld", "grd", "gld",
74 "grd", "gls", "fls", "frd", 0};
76 char const * known_sizes[] = { "tiny", "scriptsize", "footnotesize",
77 "small", "normalsize", "large", "Large", "LARGE", "huge", "Huge", 0};
79 char const * known_coded_sizes[] = { "tiny", "scriptsize", "footnotesize",
80 "small", "normal", "large", "larger", "largest", "huge", "giant", 0};
82 // splits "x=z, y=b" into a map
83 map<string, string> split_map(string const & s)
85 map<string, string> res;
88 for (size_t i = 0; i < v.size(); ++i) {
89 size_t const pos = v[i].find('=');
90 string const index = v[i].substr(0, pos);
91 string const value = v[i].substr(pos + 1, string::npos);
92 res[trim(index)] = trim(value);
97 // A simple function to translate a latex length to something lyx can
98 // understand. Not perfect, but rather best-effort.
99 string translate_len(string const & len)
101 const string::size_type i = len.find_first_not_of(" -01234567890.");
103 if (i == string::npos || len[i] != '\\')
105 istringstream iss(string(len, 0, i));
111 string const valstring = oss.str();
112 const string::size_type i2 = len.find(" ", i);
113 string const unit = string(len, i, i2 - i);
114 string const endlen = (i2 == string::npos) ? string() : string(len, i2);
115 if (unit == "\\textwidth")
116 return valstring + "text%" + endlen;
117 else if (unit == "\\columnwidth")
118 return valstring + "col%" + endlen;
119 else if (unit == "\\paperwidth")
120 return valstring + "page%" + endlen;
121 else if (unit == "\\linewidth")
122 return valstring + "line%" + endlen;
123 else if (unit == "\\paperheight")
124 return valstring + "pheight%" + endlen;
125 else if (unit == "\\textheight")
126 return valstring + "theight%" + endlen;
132 void begin_inset(ostream & os, string const & name)
134 os << "\n\\begin_inset " << name;
138 void end_inset(ostream & os)
140 os << "\n\\end_inset \n\n";
144 void skip_braces(Parser & p)
146 if (p.next_token().cat() != catBegin)
149 if (p.next_token().cat() == catEnd) {
157 void handle_ert(ostream & os, string const & s, Context const & context)
159 Context newcontext(true, context.textclass);
160 begin_inset(os, "ERT");
161 os << "\nstatus Collapsed\n";
162 newcontext.check_layout(os);
163 for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
165 os << "\n\\backslash \n";
169 newcontext.check_end_layout(os);
175 isLayout(string const name) : name_(name) {}
176 bool operator()(LyXLayout_ptr const & ptr) {
177 return ptr.get() && ptr->latexname() == name_;
184 LyXLayout_ptr findLayout(LyXTextClass const & textclass,
187 LyXTextClass::const_iterator it = textclass.begin();
188 LyXTextClass::const_iterator end = textclass.end();
189 it = std::find_if(it, end, isLayout(name));
190 return (it == end) ? LyXLayout_ptr() : *it;
194 void output_command_layout(ostream & os, Parser & p, bool outer,
195 Context & parent_context,
196 LyXLayout_ptr newlayout)
198 parent_context.check_end_layout(os);
199 Context context(true, parent_context.textclass, newlayout,
200 parent_context.layout);
201 context.check_deeper(os);
202 context.check_layout(os);
203 if (context.layout->optionalargs > 0) {
205 if (p.next_token().character() == '[') {
206 p.get_token(); // eat '['
207 begin_inset(os, "OptArg\n");
208 os << "collapsed true\n";
209 parse_text_in_inset(p, os, FLAG_BRACK_LAST, outer, context);
213 parse_text_snippet(p, os, FLAG_ITEM, outer, context);
214 context.check_end_layout(os);
215 context.check_end_deeper(os);
219 void parse_environment(Parser & p, ostream & os, bool outer,
220 Context & parent_context)
222 LyXLayout_ptr newlayout;
223 string const name = p.getArg('{', '}');
224 const bool is_starred = suffixIs(name, '*');
225 string const unstarred_name = rtrim(name, "*");
226 active_environments.push_back(name);
227 if (is_math_env(name)) {
228 parent_context.check_layout(os);
229 begin_inset(os, "Formula ");
230 os << "\\begin{" << name << "}";
231 parse_math(p, os, FLAG_END, MATH_MODE);
232 os << "\\end{" << name << "}";
236 else if (name == "tabular") {
237 parent_context.check_layout(os);
238 begin_inset(os, "Tabular ");
239 handle_tabular(p, os, parent_context);
243 else if (parent_context.textclass.floats().typeExist(unstarred_name)) {
244 parent_context.check_layout(os);
245 begin_inset(os, "Float " + unstarred_name + "\n");
246 if (p.next_token().asInput() == "[") {
247 os << "placement " << p.getArg('[', ']') << '\n';
249 os << "wide " << tostr(is_starred)
250 << "\ncollapsed false\n";
251 parse_text_in_inset(p, os, FLAG_END, outer, parent_context);
255 else if (name == "minipage") {
256 parent_context.check_layout(os);
257 begin_inset(os, "Minipage\n");
258 string position = "1";
259 string inner_pos = "0";
261 if (p.next_token().asInput() == "[") {
262 switch(p.getArg('[', ']')[0]) {
263 case 't': position = "0"; break;
264 case 'c': position = "1"; break;
265 case 'b': position = "2"; break;
267 cerr << "invalid position for minipage"
271 if (p.next_token().asInput() == "[") {
272 height = translate_len(p.getArg('[', ']'));
274 if (p.next_token().asInput() == "[") {
275 switch(p.getArg('[', ']')[0]) {
276 case 't': inner_pos = "0"; break;
277 case 'c': inner_pos = "1"; break;
278 case 'b': inner_pos = "2"; break;
279 case 's': inner_pos = "3"; break;
281 cerr << "invalid inner_pos for minipage"
288 os << "position " << position << '\n';
289 os << "inner_position " << inner_pos << '\n';
290 os << "height \"" << height << "\"\n";
291 os << "width \"" << translate_len(p.verbatim_item()) << "\"\n";
292 os << "collapsed false\n";
293 parse_text_in_inset(p, os, FLAG_END, outer, parent_context);
298 else if (name == "center") {
299 parse_text(p, os, FLAG_END, outer, parent_context);
300 // The single '=' is meant here.
303 else if ((newlayout = findLayout(parent_context.textclass, name)).get() &&
304 newlayout->isEnvironment()) {
305 Context context(true, parent_context.textclass, newlayout,
306 parent_context.layout);
307 parent_context.check_end_layout(os);
308 switch (context.layout->latextype) {
309 case LATEX_LIST_ENVIRONMENT:
310 context.extra_stuff = "\\labelwidthstring "
311 + p.verbatim_item() + '\n';
313 case LATEX_BIB_ENVIRONMENT:
314 p.verbatim_item(); // swallow next arg
319 context.check_deeper(os);
320 parse_text(p, os, FLAG_END, outer, context);
321 context.check_end_layout(os);
322 context.check_end_deeper(os);
326 parent_context.check_layout(os);
327 handle_ert(os, "\\begin{" + name + "}", parent_context);
328 parse_text_snippet(p, os, FLAG_END, outer, parent_context);
329 handle_ert(os, "\\end{" + name + "}", parent_context);
333 } // anonymous namespace
338 void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
341 LyXLayout_ptr newlayout;
343 Token const & t = p.get_token();
346 cerr << "t: " << t << " flags: " << flags << "\n";
349 if (flags & FLAG_ITEM) {
350 if (t.cat() == catSpace)
354 if (t.cat() == catBegin) {
355 // skip the brace and collect everything to the next matching
357 flags |= FLAG_BRACE_LAST;
361 // handle only this single token, leave the loop if done
365 if (t.character() == ']' && (flags & FLAG_BRACK_LAST))
371 if (t.cat() == catMath) {
372 // we are inside some text mode thingy, so opening new math is allowed
373 context.check_layout(os);
374 begin_inset(os, "Formula ");
375 Token const & n = p.get_token();
376 if (n.cat() == catMath && outer) {
377 // TeX's $$...$$ syntax for displayed math
379 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
381 p.get_token(); // skip the second '$' token
383 // simple $...$ stuff
386 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
392 else if (t.cat() == catSuper || t.cat() == catSub)
393 cerr << "catcode " << t << " illegal in text mode\n";
395 // Basic support for english quotes. This should be
396 // extended to other quotes, but is not so easy (a
397 // left english quote is the same as a right german
399 else if (t.asInput() == "`"
400 && p.next_token().asInput() == "`") {
401 context.check_layout(os);
402 begin_inset(os, "Quotes ");
408 else if (t.asInput() == "'"
409 && p.next_token().asInput() == "'") {
410 context.check_layout(os);
411 begin_inset(os, "Quotes ");
419 else if (t.cat() == catLetter ||
420 t.cat() == catSpace ||
421 t.cat() == catOther ||
422 t.cat() == catAlign ||
423 t.cat() == catParameter) {
424 context.check_layout(os);
428 else if (t.cat() == catNewline) {
429 if (p.next_token().cat() == catNewline) {
430 // this should have been be done by
431 // the parser already
432 cerr << "what are we doing here?" << endl;
434 context.need_layout = true;
436 os << " "; // note the space
440 else if (t.cat() == catActive) {
441 context.check_layout(os);
442 if (t.character() == '~') {
443 if (context.layout->free_spacing)
446 os << "\\InsetSpace ~\n";
451 else if (t.cat() == catBegin) {
453 // special handling of size changes
454 context.check_layout(os);
455 bool const is_size = is_known(p.next_token().cs(), known_sizes);
456 Context newcontext(false, context.textclass);
457 // need_end_layout = false;
458 string const s = parse_text(p, FLAG_BRACE_LAST, outer, newcontext);
459 // need_end_layout = true;
460 if (s.empty() && p.next_token().character() == '`')
461 ; // ignore it in {}``
462 else if (is_size || s == "[" || s == "]" || s == "*")
465 handle_ert(os, "{", context);
467 handle_ert(os, "}", context);
471 else if (t.cat() == catEnd) {
472 if (flags & FLAG_BRACE_LAST) {
473 context.check_end_layout(os);
476 cerr << "stray '}' in text\n";
477 handle_ert(os, "}", context);
480 else if (t.cat() == catComment)
487 else if (t.cs() == "(") {
488 context.check_layout(os);
489 begin_inset(os, "Formula");
491 parse_math(p, os, FLAG_SIMPLE2, MATH_MODE);
496 else if (t.cs() == "[") {
497 context.check_layout(os);
498 begin_inset(os, "Formula");
500 parse_math(p, os, FLAG_EQUATION, MATH_MODE);
505 else if (t.cs() == "begin")
506 parse_environment(p, os, outer, context);
508 else if (t.cs() == "end") {
509 if (flags & FLAG_END) {
510 // eat environment name
511 string const name = p.getArg('{', '}');
512 if (name != active_environment())
513 cerr << "\\end{" + name + "} does not match \\begin{"
514 + active_environment() + "}\n";
515 active_environments.pop_back();
518 p.error("found 'end' unexpectedly");
521 else if (t.cs() == "item") {
522 // should be done automatically by Parser::tokenize
525 if (p.next_token().character() == '[') {
526 p.get_token(); // eat '['
527 Context newcontext(false, context.textclass);
528 s = parse_text(p, FLAG_BRACK_LAST, outer, newcontext);
530 context.need_layout = true;
531 context.has_item = true;
532 context.check_layout(os);
537 else if (t.cs() == "bibitem") {
538 context.need_layout = true;
539 context.has_item = true;
540 context.check_layout(os);
543 os << '{' << p.verbatim_item() << '}' << "\n";
546 else if (t.cs() == "def") {
547 string name = p.get_token().cs();
548 while (p.next_token().cat() != catBegin)
549 name += p.get_token().asString();
550 handle_ert(os, "\\def\\" + name + '{' + p.verbatim_item() + '}', context);
553 else if (t.cs() == "par") {
555 context.check_end_layout(os);
556 context.need_layout = true;
559 // Must attempt to parse "Section*" before "Section".
560 else if ((p.next_token().asInput() == "*") &&
561 // The single '=' is meant here.
562 (newlayout = findLayout(context.textclass,
563 t.cs() + '*')).get() &&
564 newlayout->isCommand()) {
566 output_command_layout(os, p, outer, context, newlayout);
569 // The single '=' is meant here.
570 else if ((newlayout = findLayout(context.textclass, t.cs())).get() &&
571 newlayout->isCommand()) {
572 output_command_layout(os, p, outer, context, newlayout);
575 else if (t.cs() == "includegraphics") {
576 map<string, string> opts = split_map(p.getArg('[', ']'));
577 string name = p.verbatim_item();
579 context.check_layout(os);
580 begin_inset(os, "Graphics ");
581 os << "\n\tfilename " << name << '\n';
582 if (opts.find("width") != opts.end())
584 << translate_len(opts["width"]) << '\n';
585 if (opts.find("height") != opts.end())
587 << translate_len(opts["height"]) << '\n';
591 else if (t.cs() == "footnote") {
592 context.check_layout(os);
593 begin_inset(os, "Foot\n");
594 os << "collapsed true\n";
595 parse_text_in_inset(p, os, FLAG_ITEM, false, context);
599 else if (t.cs() == "marginpar") {
600 context.check_layout(os);
601 begin_inset(os, "Marginal\n");
602 os << "collapsed true\n";
603 parse_text_in_inset(p, os, FLAG_ITEM, false, context);
607 else if (t.cs() == "ensuremath") {
608 context.check_layout(os);
609 Context newcontext(false, context.textclass);
610 string s = parse_text(p, FLAG_ITEM, false, newcontext);
611 if (s == "±" || s == "³" || s == "²" || s == "µ")
614 handle_ert(os, "\\ensuremath{" + s + "}",
618 else if (t.cs() == "hfill") {
619 context.check_layout(os);
624 else if (t.cs() == "makeindex" || t.cs() == "maketitle")
625 skip_braces(p); // swallow this
627 else if (t.cs() == "tableofcontents") {
628 context.check_layout(os);
629 begin_inset(os, "LatexCommand \\tableofcontents\n");
631 skip_braces(p); // swallow this
634 else if (t.cs() == "listoffigures") {
635 context.check_layout(os);
636 begin_inset(os, "FloatList figure\n");
638 skip_braces(p); // swallow this
641 else if (t.cs() == "listoftables") {
642 context.check_layout(os);
643 begin_inset(os, "FloatList table\n");
645 skip_braces(p); // swallow this
648 else if (t.cs() == "listof") {
649 string const name = p.get_token().asString();
650 if (context.textclass.floats().typeExist(name)) {
651 context.check_layout(os);
652 begin_inset(os, "FloatList ");
655 p.get_token(); // swallow second arg
657 handle_ert(os, "\\listof{" + name + "}", context);
660 else if (t.cs() == "textrm") {
661 context.check_layout(os);
662 os << "\n\\family roman \n";
663 parse_text_snippet(p, os, FLAG_ITEM, outer, context);
664 os << "\n\\family default \n";
667 else if (t.cs() == "textsf") {
668 context.check_layout(os);
669 os << "\n\\family sans \n";
670 parse_text_snippet(p, os, FLAG_ITEM, outer, context);
671 os << "\n\\family default \n";
674 else if (t.cs() == "texttt") {
675 context.check_layout(os);
676 os << "\n\\family typewriter \n";
677 parse_text_snippet(p, os, FLAG_ITEM, outer, context);
678 os << "\n\\family default \n";
681 else if (t.cs() == "textit") {
682 context.check_layout(os);
683 os << "\n\\shape italic \n";
684 parse_text_snippet(p, os, FLAG_ITEM, outer, context);
685 os << "\n\\shape default \n";
688 else if (t.cs() == "textsc") {
689 context.check_layout(os);
690 os << "\n\\noun on \n";
691 parse_text_snippet(p, os, FLAG_ITEM, outer, context);
692 os << "\n\\noun default \n";
695 else if (t.cs() == "textbf") {
696 context.check_layout(os);
697 os << "\n\\series bold \n";
698 parse_text_snippet(p, os, FLAG_ITEM, outer, context);
699 os << "\n\\series default \n";
702 else if (t.cs() == "underbar") {
703 context.check_layout(os);
704 os << "\n\\bar under \n";
705 parse_text_snippet(p, os, FLAG_ITEM, outer, context);
706 os << "\n\\bar default \n";
709 else if (t.cs() == "emph" || t.cs() == "noun") {
710 context.check_layout(os);
711 os << "\n\\" << t.cs() << " on \n";
712 parse_text_snippet(p, os, FLAG_ITEM, outer, context);
713 os << "\n\\" << t.cs() << " default \n";
716 else if (is_known(t.cs(), known_latex_commands)) {
717 context.check_layout(os);
718 begin_inset(os, "LatexCommand ");
719 os << '\\' << t.cs();
722 os << '{' << p.verbatim_item() << "}\n";
726 else if (is_known(t.cs(), known_quotes)) {
727 char const ** where = is_known(t.cs(), known_quotes);
728 begin_inset(os, "Quotes ");
729 os << known_coded_quotes[where - known_quotes];
734 else if (is_known(t.cs(), known_sizes)) {
735 char const ** where = is_known(t.cs(), known_sizes);
736 context.check_layout(os);
737 os << "\n\\size " << known_coded_sizes[where - known_sizes] << "\n";
740 else if (t.cs() == "LyX" || t.cs() == "TeX"
741 || t.cs() == "LaTeX") {
742 context.check_layout(os);
744 skip_braces(p); // eat {}
747 else if (t.cs() == "LaTeXe") {
748 context.check_layout(os);
750 skip_braces(p); // eat {}
753 else if (t.cs() == "ldots") {
754 context.check_layout(os);
756 os << "\\SpecialChar \\ldots{}\n";
759 else if (t.cs() == "lyxarrow") {
760 context.check_layout(os);
761 os << "\\SpecialChar \\menuseparator\n";
765 else if (t.cs() == "ldots") {
766 context.check_layout(os);
767 os << "\\SpecialChar \\ldots{}\n";
771 else if (t.cs() == "textcompwordmark") {
772 context.check_layout(os);
773 os << "\\SpecialChar \\textcompwordmark{}\n";
777 else if (t.cs() == "@" && p.next_token().asInput() == ".") {
778 context.check_layout(os);
779 os << "\\SpecialChar \\@.\n";
783 else if (t.cs() == "-") {
784 context.check_layout(os);
785 os << "\\SpecialChar \\-\n";
788 else if (t.cs() == "textasciitilde") {
789 context.check_layout(os);
794 else if (t.cs() == "textasciicircum") {
795 context.check_layout(os);
800 else if (t.cs() == "textbackslash") {
801 context.check_layout(os);
802 os << "\n\\backslash \n";
806 else if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#"
807 || t.cs() == "$" || t.cs() == "{" || t.cs() == "}"
809 context.check_layout(os);
813 else if (t.cs() == "char") {
814 context.check_layout(os);
815 if (p.next_token().character() == '`') {
817 if (p.next_token().cs() == "\"") {
822 handle_ert(os, "\\char`", context);
825 handle_ert(os, "\\char", context);
829 else if (t.cs() == "\"") {
830 context.check_layout(os);
831 string const name = p.verbatim_item();
832 if (name == "a") os << 'ä';
833 else if (name == "o") os << 'ö';
834 else if (name == "u") os << 'ü';
835 else if (name == "A") os << 'Ä';
836 else if (name == "O") os << 'Ö';
837 else if (name == "U") os << 'Ü';
838 else handle_ert(os, "\"{" + name + "}", context);
841 else if (t.cs() == "=" || t.cs() == "H" || t.cs() == "c"
842 || t.cs() == "^" || t.cs() == "'" || t.cs() == "~") {
843 // we need the trim as the LyX parser chokes on such spaces
844 context.check_layout(os);
845 os << "\n\\i \\" << t.cs() << "{"
846 << trim(parse_text(p, FLAG_ITEM, outer, context), " ") << "}\n";
849 else if (t.cs() == "ss") {
850 context.check_layout(os);
854 else if (t.cs() == "i" || t.cs() == "j") {
855 context.check_layout(os);
856 os << "\\" << t.cs() << ' ';
859 else if (t.cs() == "\\") {
860 string const next = p.next_token().asInput();
862 handle_ert(os, "\\\\" + p.getOpt(), context);
863 else if (next == "*") {
865 handle_ert(os, "\\\\*" + p.getOpt(), context);
868 context.check_layout(os);
869 os << "\n\\newline \n";
873 else if (t.cs() == "input" || t.cs() == "include"
874 || t.cs() == "verbatiminput") {
875 string name = '\\' + t.cs();
876 if (t.cs() == "verbatiminput"
877 && p.next_token().asInput() == "*")
878 name += p.get_token().asInput();
879 context.check_layout(os);
880 begin_inset(os, "Include ");
881 os << name << '{' << p.getArg('{', '}') << "}\n";
882 os << "preview false\n";
885 else if (t.cs() == "fancyhead") {
886 context.check_layout(os);
890 ss << '{' << p.verbatim_item() << "}\n";
891 handle_ert(os, ss.str(), context);
895 //cerr << "#: " << t << " mode: " << mode << endl;
896 // heuristic: read up to next non-nested space
898 string s = t.asInput();
899 string z = p.verbatim_item();
900 while (p.good() && z != " " && z.size()) {
901 //cerr << "read: " << z << endl;
903 z = p.verbatim_item();
905 cerr << "found ERT: " << s << endl;
906 handle_ert(os, s + ' ', context);
908 context.check_layout(os);
909 handle_ert(os, t.asInput() + ' ', context);
912 if (flags & FLAG_LEAVE) {
913 flags &= ~FLAG_LEAVE;