]> git.lyx.org Git - lyx.git/blob - src/tex2lyx/text.C
* text.C (parse_text): read environment from layout file too. Now,
[lyx.git] / src / tex2lyx / text.C
1 /** The .tex to .lyx converter
2     \author André Pönitz (2003)
3  */
4
5 // {[(
6
7 #include <config.h>
8
9 #include "tex2lyx.h"
10 #include "FloatList.h"
11 #include "lyxtextclass.h"
12 #include "support/lstrings.h"
13 #include "support/tostr.h"
14
15 #include <iostream>
16 #include <map>
17 #include <sstream>
18 #include <vector>
19
20 using std::cerr;
21 using std::endl;
22 using std::map;
23 using std::ostream;
24 using std::ostringstream;
25 using std::string;
26 using std::vector;
27
28 using lyx::support::rtrim;
29 using lyx::support::suffixIs;
30
31 namespace {
32
33 char const * known_latex_commands[] = { "ref", "cite", "label", "index",
34 "printindex", "pageref", "url", 0 };
35
36 // LaTeX names for quotes
37 char const * known_quotes[] = { "glqq", "grqq", "quotedblbase",
38 "textquotedblleft", "quotesinglbase", "guilsinglleft", "guilsinglright", 0};
39
40 // the same as known_quotes with .lyx names
41 char const * known_coded_quotes[] = { "gld", "grd", "gld",
42 "grd", "gls", "fls", "frd", 0};
43
44 char const * known_sizes[] = { "tiny", "scriptsize", "footnotesize",
45 "small", "normalsize", "large", "Large", "LARGE", "huge", "Huge", 0};
46
47 char const * known_coded_sizes[] = { "tiny", "scriptsize", "footnotesize",
48 "small", "normal", "large", "larger", "largest",  "huge", "giant", 0};
49
50
51 string cap(string s)
52 {
53         if (s.size())
54                 s[0] = toupper(s[0]);
55         return s;
56 }
57
58
59 // splits "x=z, y=b" into a map
60 map<string, string> split_map(string const & s)
61 {
62         map<string, string> res;
63         vector<string> v;
64         split(s, v);
65         for (size_t i = 0; i < v.size(); ++i) {
66                 size_t const pos   = v[i].find('=');
67                 string const index = v[i].substr(0, pos);
68                 string const value = v[i].substr(pos + 1, string::npos);
69                 res[trim(index)] = trim(value);
70         }
71         return res;
72 }
73
74
75 void begin_inset(ostream & os, string const & name)
76 {
77         os << "\n\\begin_inset " << name;
78 }
79
80
81 void end_inset(ostream & os)
82 {
83         os << "\n\\end_inset \n\n";
84 }
85
86
87 void skip_braces(Parser & p)
88 {
89         if (p.next_token().cat() != catBegin)
90                 return;
91         p.get_token();
92         if (p.next_token().cat() == catEnd) {
93                 p.get_token();
94                 return;
95         }
96         p.putback();
97 }
98
99
100 void handle_ert(ostream & os, string const & s)
101 {
102         begin_inset(os, "ERT");
103         os << "\nstatus Collapsed\n\n\\layout Standard\n\n";
104         for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
105                 if (*it == '\\')
106                         os << "\n\\backslash \n";
107                 else
108                         os << *it;
109         }
110         end_inset(os);
111 }
112
113
114 void handle_par(ostream & os)
115 {
116         if (active_environments.empty())
117                 return;
118         os << "\n\\layout ";
119         string s = active_environment();
120         if (s == "document" || s == "table")
121                 os << "Standard\n\n";
122         else if (s == "center")
123                 os << "Standard\n\n\\align center\n";
124         else if (s == "lyxcode")
125                 os << "LyX-Code\n\n";
126         else if (s == "lyxlist")
127                 os << "List\n\n";
128         else if (s == "thebibliography")
129                 os << "Bibliography\n\n";
130         else
131                 os << cap(s) << "\n\n";
132 }
133
134
135 struct isLayout {
136         isLayout(string const name) : name_(name) {}
137         bool operator()(LyXLayout_ptr const & ptr) {
138                 return ptr.get() && ptr->latexname() == name_;
139         }
140 private:
141         string const name_;
142 };
143
144
145 LyXLayout_ptr findLayout(LyXTextClass const & textclass,
146                          string const & name) 
147 {
148         LyXTextClass::const_iterator it  = textclass.begin();
149         LyXTextClass::const_iterator end = textclass.end();
150         it = std::find_if(it, end, isLayout(name));
151         return (it == end) ? LyXLayout_ptr() : *it;
152 }
153
154
155 void output_layout(ostream & os, LyXLayout_ptr const & layout_ptr,
156                   Parser & p, bool outer, LyXTextClass const & textclass)
157 {
158         string name = layout_ptr->name();
159         os << "\n\n\\layout " << name << "\n\n";
160         if (layout_ptr->optionalargs > 0) {
161                 string s; 
162                 if (p.next_token().character() == '[') {
163                         p.get_token(); // eat '['
164                         begin_inset(os, "OptArg\n");
165                         os << "collapsed true\n\n\\layout Standard\n\n";
166                         parse_text(p, os, FLAG_BRACK_LAST, outer, textclass);
167                         end_inset(os);
168                 }
169         }
170         parse_text(p, os, FLAG_ITEM, outer, textclass);
171         os << "\n\n\\layout Standard\n\n";
172 }
173
174 } // anonymous namespace
175
176
177 void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
178                 LyXTextClass const & textclass)
179 {
180         while (p.good()) {
181                 LyXLayout_ptr layout_ptr;
182                 Token const & t = p.get_token();
183
184 #ifdef FILEDEBUG
185                 cerr << "t: " << t << " flags: " << flags << "\n";
186 #endif
187
188                 if (flags & FLAG_ITEM) {
189                         if (t.cat() == catSpace)
190                                 continue;
191
192                         flags &= ~FLAG_ITEM;
193                         if (t.cat() == catBegin) {
194                                 // skip the brace and collect everything to the next matching
195                                 // closing brace
196                                 flags |= FLAG_BRACE_LAST;
197                                 continue;
198                         }
199
200                         // handle only this single token, leave the loop if done
201                         flags |= FLAG_LEAVE;
202                 }
203
204                 if (t.character() == ']' && (flags & FLAG_BRACK_LAST)) 
205                         return;
206
207                 //
208                 // cat codes
209                 //
210                 if (t.cat() == catMath) {
211                         // we are inside some text mode thingy, so opening new math is allowed
212                         begin_inset(os, "Formula ");
213                         Token const & n = p.get_token();
214                         if (n.cat() == catMath && outer) {
215                                 // TeX's $$...$$ syntax for displayed math
216                                 os << "\\[";
217                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
218                                 os << "\\]";
219                                 p.get_token(); // skip the second '$' token
220                         } else {
221                                 // simple $...$  stuff
222                                 p.putback();
223                                 os << '$';
224                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
225                                 os << '$';
226                         }
227                         end_inset(os);
228                 }
229
230                 else if (t.cat() == catSuper || t.cat() == catSub)
231                         cerr << "catcode " << t << " illegal in text mode\n";
232
233                 else if (t.cat() == catLetter ||
234                                t.cat() == catSpace ||
235                                t.cat() == catOther ||
236                                t.cat() == catAlign ||
237                                t.cat() == catParameter)
238                         os << t.character();
239
240                 else if (t.cat() == catNewline) {
241                         if (p.next_token().cat() == catNewline) {
242                                 p.get_token();
243                                 handle_par(os);
244                         } else {
245                                 os << " "; // note the space
246                         }
247                 }
248
249                 else if (t.cat() == catActive) {
250                         if (t.character() == '~') {
251                                 if (active_environment() == "lyxcode")
252                                         os << ' ';
253                                 else 
254                                         os << "\\InsetSpace ~\n";
255                         } else
256                                 os << t.character();
257                 }
258
259                 else if (t.cat() == catBegin) {
260                         // special handling of size changes
261                         bool const is_size = is_known(p.next_token().cs(), known_sizes);
262                         string const s = parse_text(p, FLAG_BRACE_LAST, outer, textclass);
263                         if (s.empty() && p.next_token().character() == '`')
264                                 ; // ignore it in  {}``
265                         else if (is_size || s == "[" || s == "]" || s == "*")
266                                 os << s;
267                         else {
268                                 handle_ert(os, "{");
269                                 os << s;
270                                 handle_ert(os, "}");
271                         }
272                 }
273
274                 else if (t.cat() == catEnd) {
275                         if (flags & FLAG_BRACE_LAST)
276                                 return;
277                         cerr << "stray '}' in text\n";
278                         handle_ert(os, "}");
279                 }
280
281                 else if (t.cat() == catOther)
282                         os << string(1, t.character());
283
284                 else if (t.cat() == catComment)
285                         handle_comment(p);
286
287                 //
288                 // control sequences
289                 //
290
291                 else if (t.cs() == "(") {
292                         begin_inset(os, "Formula");
293                         os << " \\(";
294                         parse_math(p, os, FLAG_SIMPLE2, MATH_MODE);
295                         os << "\\)";
296                         end_inset(os);
297                 }
298
299                 else if (t.cs() == "[") {
300                         begin_inset(os, "Formula");
301                         os << " \\[";
302                         parse_math(p, os, FLAG_EQUATION, MATH_MODE);
303                         os << "\\]";
304                         end_inset(os);
305                 }
306
307                 else if (t.cs() == "begin") {
308                         string const name = p.getArg('{', '}');
309                         const bool is_starred = suffixIs(name, '*');
310                         string const unstarred_name = rtrim(name, "*");
311                         active_environments.push_back(name);
312                         if (is_math_env(name)) {
313                                 begin_inset(os, "Formula ");
314                                 os << "\\begin{" << name << "}";
315                                 parse_math(p, os, FLAG_END, MATH_MODE);
316                                 os << "\\end{" << name << "}";
317                                 end_inset(os);
318                         } else if (name == "tabular") {
319                                 begin_inset(os, "Tabular ");
320                                 handle_tabular(p, os, textclass);
321                                 end_inset(os);
322                         } else if (textclass.floats().typeExist(unstarred_name)) {
323                                 begin_inset(os, "Float " + unstarred_name + "\n");
324                                 if (p.next_token().asInput() == "[") {
325                                         os << "placement " 
326                                            << p.getArg('[', ']') << '\n';
327                                 }
328                                 os << "wide " << tostr(is_starred)
329                                    << "\ncollapsed false\n\n"
330                                    << "\\layout Standard\n";
331                                 parse_text(p, os, FLAG_END, outer,
332                                            textclass);
333                                 end_inset(os);
334                         } else if (name == "center") {
335                                 handle_par(os);
336                                 parse_text(p, os, FLAG_END, outer,
337                                            textclass);
338                                 // The single '=' is meant here.
339                         } else if ((layout_ptr = findLayout(textclass, t.cs())).get() &&
340                                    layout_ptr->isEnvironment()) {
341                                 size_t const n = active_environments.size();
342                                 string const s = active_environments[n - 2];
343                                 bool const deeper = s == "enumerate" || s == "itemize"
344                                         || s == "lyxlist";
345                                 if (deeper)
346                                         os << "\n\\begin_deeper";
347                                 os << "\n\\layout " << layout_ptr->name() 
348                                    << "\n\n";
349                                 switch (layout_ptr->latextype) {
350                                 case  LATEX_LIST_ENVIRONMENT:
351                                         os << "\\labelwidthstring "
352                                            << p.verbatim_item() << '\n';
353                                         break;
354                                 case  LATEX_BIB_ENVIRONMENT:
355                                         p.verbatim_item(); // swallow next arg
356                                         break;
357                                 default:
358                                         break;
359                                 }
360                                 parse_text(p, os, FLAG_END, outer, textclass);
361                                 if (deeper)
362                                         os << "\n\\end_deeper\n";
363                                 handle_par(os);
364                         } else {
365                                 handle_par(os);
366                                 parse_text(p, os, FLAG_END, outer, textclass);
367                         }
368                 }
369
370                 else if (t.cs() == "end") {
371                         if (flags & FLAG_END) {
372                                 // eat environment name
373                                 string const name = p.getArg('{', '}');
374                                 if (name != active_environment())
375                                         cerr << "\\end{" + name + "} does not match \\begin{"
376                                                 + active_environment() + "}\n";
377                                 active_environments.pop_back();
378                                 handle_par(os);
379                                 return;
380                         }
381                         p.error("found 'end' unexpectedly");
382                 }
383
384                 else if (t.cs() == "item") {
385                         // should be done automatically by Parser::tokenize
386                         //p.skip_spaces();
387                         string s; 
388                         if (p.next_token().character() == '[') {
389                                 p.get_token(); // eat '['
390                                 s = parse_text(p, FLAG_BRACK_LAST, outer, textclass);
391                         }
392                         handle_par(os);
393                         if (s.size())
394                                 os << s << ' ';
395                 }
396
397                 else if (t.cs() == "def") {
398                         string name = p.get_token().cs();
399                         while (p.next_token().cat() != catBegin)
400                                 name += p.get_token().asString();
401                         handle_ert(os, "\\def\\" + name + '{' + p.verbatim_item() + '}');
402                 }
403
404                 else if (t.cs() == "par") {
405                         p.skip_spaces();
406                         if (p.next_token().cs() != "\\begin")
407                                 handle_par(os);
408                         //cerr << "next token: '" << p.next_token().cs() << "'\n";
409                 }
410
411                 // Must attempt to parse "Section*" before "Section".
412                 else if ((p.next_token().asInput() == "*") &&
413                          // The single '=' is meant here.
414                          (layout_ptr = findLayout(textclass,
415                                                   t.cs() + '*')).get() &&
416                          layout_ptr->isCommand()) {
417                         p.get_token();
418                         output_layout(os, layout_ptr, p, outer, textclass);
419                 }
420
421                 // The single '=' is meant here.
422                 else if ((layout_ptr = findLayout(textclass, t.cs())).get() &&
423                          layout_ptr->isCommand()) {
424                         output_layout(os, layout_ptr, p, outer, textclass);
425                 }
426
427                 else if (t.cs() == "includegraphics") {
428                         map<string, string> opts = split_map(p.getArg('[', ']'));
429                         string name = p.verbatim_item();
430                         begin_inset(os, "Graphics ");
431                         os << "\n\tfilename " << name << '\n';
432                         if (opts.find("width") != opts.end())
433                                 os << "\twidth " << opts["width"] << '\n';
434                         if (opts.find("height") != opts.end())
435                                 os << "\theight " << opts["height"] << '\n';
436                         end_inset(os);
437                 }
438                 
439                 else if (t.cs() == "footnote") {
440                         begin_inset(os, "Foot\n");
441                         os << "collapsed true\n\n\\layout Standard\n\n";
442                         parse_text(p, os, FLAG_ITEM, false, textclass);
443                         end_inset(os);
444                 }
445
446                 else if (t.cs() == "ensuremath") {
447                         string s = parse_text(p, FLAG_ITEM, false, textclass);
448                         if (s == "±" || s == "³" || s == "²" || s == "µ")
449                                 os << s;
450                         else
451                                 handle_ert(os, "\\ensuremath{" + s + "}");
452                 }
453
454                 else if (t.cs() == "marginpar") {
455                         begin_inset(os, "Marginal\n");
456                         os << "collapsed true\n\n\\layout Standard\n\n";
457                         parse_text(p, os, FLAG_ITEM, false, textclass);
458                         end_inset(os);
459                 }
460
461                 else if (t.cs() == "hfill") {
462                         os << "\n\\hfill\n";
463                         skip_braces(p);
464                 }
465
466                 else if (t.cs() == "makeindex" || t.cs() == "maketitle")
467                         skip_braces(p); // swallow this
468
469                 else if (t.cs() == "tableofcontents") {
470                         begin_inset(os, "LatexCommand ");
471                         os << '\\' << t.cs() << "{}\n";
472                         end_inset(os);
473                         skip_braces(p); // swallow this
474                 }
475
476
477                 else if (t.cs() == "textrm") {
478                         os << "\n\\family roman \n";
479                         parse_text(p, os, FLAG_ITEM, outer, textclass);
480                         os << "\n\\family default \n";
481                 }
482
483                 else if (t.cs() == "textsf") {
484                         os << "\n\\family sans \n";
485                         parse_text(p, os, FLAG_ITEM, outer, textclass);
486                         os << "\n\\family default \n";
487                 }
488
489                 else if (t.cs() == "texttt") {
490                         os << "\n\\family typewriter \n";
491                         parse_text(p, os, FLAG_ITEM, outer, textclass);
492                         os << "\n\\family default \n";
493                 }
494
495                 else if (t.cs() == "textit") {
496                         os << "\n\\shape italic \n";
497                         parse_text(p, os, FLAG_ITEM, outer, textclass);
498                         os << "\n\\shape default \n";
499                 }
500
501                 else if (t.cs() == "textsc") {
502                         os << "\n\\noun on \n";
503                         parse_text(p, os, FLAG_ITEM, outer, textclass);
504                         os << "\n\\noun default \n";
505                 }
506
507                 else if (t.cs() == "textbf") {
508                         os << "\n\\series bold \n";
509                         parse_text(p, os, FLAG_ITEM, outer, textclass);
510                         os << "\n\\series default \n";
511                 }
512
513                 else if (t.cs() == "underbar") {
514                         os << "\n\\bar under \n";
515                         parse_text(p, os, FLAG_ITEM, outer, textclass);
516                         os << "\n\\bar default \n";
517                 }
518
519                 else if (t.cs() == "emph" || t.cs() == "noun") {
520                         os << "\n\\" << t.cs() << " on \n";
521                         parse_text(p, os, FLAG_ITEM, outer, textclass);
522                         os << "\n\\" << t.cs() << " default \n";
523                 }
524
525                 else if (t.cs() == "bibitem") {
526                         os << "\n\\layout Bibliography\n\\bibitem ";
527                         os << p.getOpt();
528                         os << '{' << p.verbatim_item() << '}' << "\n";
529                 }
530
531                 else if (is_known(t.cs(), known_latex_commands)) {
532                         begin_inset(os, "LatexCommand ");
533                         os << '\\' << t.cs();
534                         os << p.getOpt();
535                         os << p.getOpt();
536                         os << '{' << p.verbatim_item() << "}\n";
537                         end_inset(os);
538                 }
539
540                 else if (is_known(t.cs(), known_quotes)) {
541                   char const ** where = is_known(t.cs(), known_quotes);
542                         begin_inset(os, "Quotes ");
543                         os << known_coded_quotes[where - known_quotes];
544                         end_inset(os);
545                         skip_braces(p);
546                 }
547
548                 else if (is_known(t.cs(), known_sizes)) {
549                   char const ** where = is_known(t.cs(), known_sizes);
550                         os << "\n\\size " << known_coded_sizes[where - known_sizes] << "\n";
551                 }
552
553                 else if (t.cs() == "LyX" || t.cs() == "TeX" 
554                          || t.cs() == "LaTeX") {
555                         os << t.cs();
556                         skip_braces(p); // eat {}
557                 }
558
559                 else if (t.cs() == "LaTeXe") {
560                         os << "LaTeX2e";
561                         skip_braces(p); // eat {}
562                 }
563
564                 else if (t.cs() == "ldots") {
565                         skip_braces(p);
566                         os << "\\SpecialChar \\ldots{}\n";
567                 }
568
569                 else if (t.cs() == "lyxarrow") {
570                         os << "\\SpecialChar \\menuseparator\n";
571                         skip_braces(p);
572                 }
573
574                 else if (t.cs() == "ldots") {
575                         os << "\\SpecialChar \\ldots{}\n";
576                         skip_braces(p);
577                 }
578
579                 else if (t.cs() == "@" && p.next_token().asInput() == ".") {
580                         os << "\\SpecialChar \\@.\n";
581                         p.get_token();
582                 }
583
584                 else if (t.cs() == "-")
585                         os << "\\SpecialChar \\-\n";
586
587                 else if (t.cs() == "textasciitilde") {
588                         os << '~';
589                         skip_braces(p);
590                 }
591
592                 else if (t.cs() == "textasciicircum") {
593                         os << '^';
594                         skip_braces(p);
595                 }
596
597                 else if (t.cs() == "textbackslash") {
598                         os << "\n\\backslash \n";
599                         skip_braces(p);
600                 }
601
602                 else if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#" 
603                             || t.cs() == "$" || t.cs() == "{" || t.cs() == "}" 
604                             || t.cs() == "%")
605                         os << t.cs();
606
607                 else if (t.cs() == "char") {
608                         if (p.next_token().character() == '`') {
609                                 p.get_token();
610                                 if (p.next_token().cs() == "\"") {
611                                         p.get_token();
612                                         os << '"';
613                                         skip_braces(p);
614                                 } else {
615                                         handle_ert(os, "\\char`");
616                                 }
617                         } else {
618                                 handle_ert(os, "\\char");
619                         }
620                 }
621
622                 else if (t.cs() == "\"") {
623                         string const name = p.verbatim_item();
624                              if (name == "a") os << 'ä';
625                         else if (name == "o") os << 'ö';
626                         else if (name == "u") os << 'ü';
627                         else if (name == "A") os << 'Ä';
628                         else if (name == "O") os << 'Ö';
629                         else if (name == "U") os << 'Ü';
630                         else handle_ert(os, "\"{" + name + "}");
631                 }
632
633                 else if (t.cs() == "=" || t.cs() == "H" || t.cs() == "c"
634                       || t.cs() == "^" || t.cs() == "'" || t.cs() == "~") {
635                         // we need the trim as the LyX parser chokes on such spaces
636                         os << "\n\\i \\" << t.cs() << "{"
637                            << trim(parse_text(p, FLAG_ITEM, outer, textclass), " ") << "}\n";
638                 }
639
640                 else if (t.cs() == "ss")
641                         os << "ß";
642
643                 else if (t.cs() == "i" || t.cs() == "j")
644                         os << "\\" << t.cs() << ' ';
645
646                 else if (t.cs() == "\\")
647                         os << "\n\\newline \n";
648         
649                 else if (t.cs() == "input")
650                         handle_ert(os, "\\input{" + p.verbatim_item() + "}\n");
651
652                 else if (t.cs() == "fancyhead") {
653                         ostringstream ss;
654                         ss << "\\fancyhead";
655                         ss << p.getOpt();
656                         ss << '{' << p.verbatim_item() << "}\n";
657                         handle_ert(os, ss.str());
658                 }
659
660                 else {
661                         //cerr << "#: " << t << " mode: " << mode << endl;
662                         // heuristic: read up to next non-nested space
663                         /*
664                         string s = t.asInput();
665                         string z = p.verbatim_item();
666                         while (p.good() && z != " " && z.size()) {
667                                 //cerr << "read: " << z << endl;
668                                 s += z;
669                                 z = p.verbatim_item();
670                         }
671                         cerr << "found ERT: " << s << endl;
672                         handle_ert(os, s + ' ');
673                         */
674                         handle_ert(os, t.asInput() + ' ');
675                 }
676
677                 if (flags & FLAG_LEAVE) {
678                         flags &= ~FLAG_LEAVE;
679                         break;
680                 }
681         }
682 }
683
684
685 string parse_text(Parser & p, unsigned flags, const bool outer,
686                   LyXTextClass const & textclass)
687 {
688         ostringstream os;
689         parse_text(p, os, flags, outer, textclass);
690         return os.str();
691 }
692
693
694 // }])