]> git.lyx.org Git - lyx.git/blob - src/tex2lyx/text.C
524605b529ad7b3f4fe090b2919fc969a9bd894b
[lyx.git] / src / tex2lyx / text.C
1 /** The .tex to .lyx converter
2     \author André Pönitz (2003)
3  */
4
5 // {[(
6
7 #include <config.h>
8
9 #include "tex2lyx.h"
10 #include "FloatList.h"
11 #include "lyxtextclass.h"
12 #include "support/lstrings.h"
13 #include "support/tostr.h"
14
15 #include <iostream>
16 #include <map>
17 #include <sstream>
18 #include <vector>
19
20 using std::cerr;
21 using std::endl;
22 using std::map;
23 using std::ostream;
24 using std::ostringstream;
25 using std::string;
26 using std::vector;
27
28 using lyx::support::rtrim;
29 using lyx::support::suffixIs;
30
31 namespace {
32
33 char const * known_headings[] = { "caption", "title", "author", "date",
34 "paragraph", "chapter", "section", "subsection", "subsubsection", 0 };
35
36 char const * known_latex_commands[] = { "ref", "cite", "label", "index",
37 "printindex", "pageref", "url", 0 };
38
39 // LaTeX names for quotes
40 char const * known_quotes[] = { "glqq", "grqq", "quotedblbase",
41 "textquotedblleft", "quotesinglbase", "guilsinglleft", "guilsinglright", 0};
42
43 // the same as known_quotes with .lyx names
44 char const * known_coded_quotes[] = { "gld", "grd", "gld",
45 "grd", "gls", "fls", "frd", 0};
46
47 char const * known_sizes[] = { "tiny", "scriptsize", "footnotesize",
48 "small", "normalsize", "large", "Large", "LARGE", "huge", "Huge", 0};
49
50 char const * known_coded_sizes[] = { "tiny", "scriptsize", "footnotesize",
51 "small", "normal", "large", "larger", "largest",  "huge", "giant", 0};
52
53
54 string cap(string s)
55 {
56         if (s.size())
57                 s[0] = toupper(s[0]);
58         return s;
59 }
60
61
62 // splits "x=z, y=b" into a map
63 map<string, string> split_map(string const & s)
64 {
65         map<string, string> res;
66         vector<string> v;
67         split(s, v);
68         for (size_t i = 0; i < v.size(); ++i) {
69                 size_t const pos   = v[i].find('=');
70                 string const index = v[i].substr(0, pos);
71                 string const value = v[i].substr(pos + 1, string::npos);
72                 res[trim(index)] = trim(value);
73         }
74         return res;
75 }
76
77
78 void begin_inset(ostream & os, string const & name)
79 {
80         os << "\n\\begin_inset " << name;
81 }
82
83
84 void end_inset(ostream & os)
85 {
86         os << "\n\\end_inset\n\n";
87 }
88
89
90 void skip_braces(Parser & p)
91 {
92         if (p.next_token().cat() != catBegin)
93                 return;
94         p.get_token();
95         if (p.next_token().cat() == catEnd) {
96                 p.get_token();
97                 return;
98         }
99         p.putback();
100 }
101
102
103 void handle_ert(ostream & os, string const & s)
104 {
105         begin_inset(os, "ERT");
106         os << "\nstatus Collapsed\n\n\\layout Standard\n\n";
107         for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
108                 if (*it == '\\')
109                         os << "\n\\backslash\n";
110                 else
111                         os << *it;
112         }
113         end_inset(os);
114 }
115
116
117 void handle_par(ostream & os)
118 {
119         if (active_environments.empty())
120                 return;
121         os << "\n\\layout ";
122         string s = active_environment();
123         if (s == "document" || s == "table")
124                 os << "Standard\n\n";
125         else if (s == "center")
126                 os << "Standard\n\n\\align center\n";
127         else if (s == "lyxcode")
128                 os << "LyX-Code\n\n";
129         else if (s == "lyxlist")
130                 os << "List\n\n";
131         else if (s == "thebibliography")
132                 os << "Bibliography\n\n";
133         else
134                 os << cap(s) << "\n\n";
135 }
136
137
138 } // anonymous namespace
139
140
141 void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
142                 LyXTextClass const & textclass)
143 {
144         while (p.good()) {
145                 Token const & t = p.get_token();
146
147 #ifdef FILEDEBUG
148                 cerr << "t: " << t << " flags: " << flags << "\n";
149 #endif
150
151                 if (flags & FLAG_ITEM) {
152                         if (t.cat() == catSpace)
153                                 continue;
154
155                         flags &= ~FLAG_ITEM;
156                         if (t.cat() == catBegin) {
157                                 // skip the brace and collect everything to the next matching
158                                 // closing brace
159                                 flags |= FLAG_BRACE_LAST;
160                                 continue;
161                         }
162
163                         // handle only this single token, leave the loop if done
164                         flags |= FLAG_LEAVE;
165                 }
166
167                 if (t.character() == ']' && (flags & FLAG_BRACK_LAST)) 
168                         return;
169
170                 //
171                 // cat codes
172                 //
173                 if (t.cat() == catMath) {
174                         // we are inside some text mode thingy, so opening new math is allowed
175                         begin_inset(os, "Formula ");
176                         Token const & n = p.get_token();
177                         if (n.cat() == catMath && outer) {
178                                 // TeX's $$...$$ syntax for displayed math
179                                 os << "\\[";
180                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
181                                 os << "\\]";
182                                 p.get_token(); // skip the second '$' token
183                         } else {
184                                 // simple $...$  stuff
185                                 p.putback();
186                                 os << '$';
187                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
188                                 os << '$';
189                         }
190                         end_inset(os);
191                 }
192
193                 else if (t.cat() == catSuper || t.cat() == catSub)
194                         cerr << "catcode " << t << " illegal in text mode\n";
195
196                 else if (t.cat() == catLetter ||
197                                t.cat() == catSpace ||
198                                t.cat() == catOther ||
199                                t.cat() == catAlign ||
200                                t.cat() == catParameter)
201                         os << t.character();
202
203                 else if (t.cat() == catNewline) {
204                         if (p.next_token().cat() == catNewline) {
205                                 p.get_token();
206                                 handle_par(os);
207                         } else {
208                                 os << " "; // note the space
209                         }
210                 }
211
212                 else if (t.cat() == catActive) {
213                         if (t.character() == '~') {
214                                 if (active_environment() == "lyxcode")
215                                         os << ' ';
216                                 else
217                                         os << "\\SpecialChar ~\n";
218                         } else
219                                 os << t.character();
220                 }
221
222                 else if (t.cat() == catBegin) {
223                         // special handling of size changes
224                         bool const is_size = is_known(p.next_token().cs(), known_sizes);
225                         string const s = parse_text(p, FLAG_BRACE_LAST, outer, textclass);
226                         if (s.empty() && p.next_token().character() == '`')
227                                 ; // ignore it in  {}``
228                         else if (is_size || s == "[" || s == "]" || s == "*")
229                                 os << s;
230                         else {
231                                 handle_ert(os, "{");
232                                 os << s;
233                                 handle_ert(os, "}");
234                         }
235                 }
236
237                 else if (t.cat() == catEnd) {
238                         if (flags & FLAG_BRACE_LAST)
239                                 return;
240                         cerr << "stray '}' in text\n";
241                         handle_ert(os, "}");
242                 }
243
244                 else if (t.cat() == catOther)
245                         os << string(1, t.character());
246
247                 else if (t.cat() == catComment)
248                         handle_comment(p);
249
250                 //
251                 // control sequences
252                 //
253
254                 else if (t.cs() == "ldots") {
255                         skip_braces(p);
256                         os << "\n\\SpecialChar \\ldots{}\n";
257                 }
258
259                 else if (t.cs() == "(") {
260                         begin_inset(os, "Formula");
261                         os << " \\(";
262                         parse_math(p, os, FLAG_SIMPLE2, MATH_MODE);
263                         os << "\\)";
264                         end_inset(os);
265                 }
266
267                 else if (t.cs() == "[") {
268                         begin_inset(os, "Formula");
269                         os << " \\[";
270                         parse_math(p, os, FLAG_EQUATION, MATH_MODE);
271                         os << "\\]";
272                         end_inset(os);
273                 }
274
275                 else if (t.cs() == "begin") {
276                         string const name = p.getArg('{', '}');
277                         active_environments.push_back(name);
278                         if (is_math_env(name)) {
279                                 begin_inset(os, "Formula ");
280                                 os << "\\begin{" << name << "}";
281                                 parse_math(p, os, FLAG_END, MATH_MODE);
282                                 os << "\\end{" << name << "}";
283                                 end_inset(os);
284                                 continue;
285                         }
286
287                         if (name == "tabular") {
288                                 begin_inset(os, "Tabular ");
289                                 handle_tabular(p, os, textclass);
290                                 end_inset(os);
291                                 continue;
292                         }
293
294                         bool is_starred = suffixIs(name, '*');
295                         string unstarred_name = rtrim(name, "*");
296                         if (textclass.floats().typeExist(unstarred_name)) {
297                                 string opts = p.getOpt();
298                                 begin_inset(os, "Float " + unstarred_name + "\n");
299                                 if (opts.size())
300                                         os << "placement " << opts << '\n';
301                                 os << "wide " << tostr(is_starred)
302                                    << "\ncollapsed false\n\n"
303                                    << "\\layout Standard\n";
304                                 parse_text(p, os, FLAG_END, outer,
305                                            textclass);
306                                 end_inset(os);
307                         } else if (name == "center") {
308                                 handle_par(os);
309                                 parse_text(p, os, FLAG_END, outer,
310                                            textclass);
311                         } else if (name == "enumerate" || name == "itemize"
312                                         || name == "lyxlist") {
313                                 size_t const n = active_environments.size();
314                                 string const s = active_environments[n - 2];
315                                 bool const deeper = s == "enumerate" || s == "itemize"
316                                         || s == "lyxlist";
317                                 if (deeper)
318                                         os << "\n\\begin_deeper";
319                                 os << "\n\\layout " << cap(name) << "\n\n";
320                                 if (name == "lyxlist")
321                                         p.verbatim_item(); // swallow next arg
322                                 parse_text(p, os, FLAG_END, outer, textclass);
323                                 if (deeper)
324                                         os << "\n\\end_deeper\n";
325                                 handle_par(os);
326                         } else if (name == "thebibliography") {
327                                 p.verbatim_item(); // swallow next arg
328                                 parse_text(p, os, FLAG_END, outer, textclass);
329                                 os << "\n\\layout Bibliography\n\n";
330                         } else {
331                                 handle_par(os);
332                                 parse_text(p, os, FLAG_END, outer, textclass);
333                         }
334                 }
335
336                 else if (t.cs() == "end") {
337                         if (flags & FLAG_END) {
338                                 // eat environment name
339                                 string const name = p.getArg('{', '}');
340                                 if (name != active_environment())
341                                         cerr << "\\end{" + name + "} does not match \\begin{"
342                                                 + active_environment() + "}\n";
343                                 active_environments.pop_back();
344                                 handle_par(os);
345                                 return;
346                         }
347                         p.error("found 'end' unexpectedly");
348                 }
349
350                 else if (t.cs() == "item") {
351                         p.skip_spaces();
352                         string s; 
353                         if (p.next_token().character() == '[') {
354                                 p.get_token(); // eat '['
355                                 s = parse_text(p, FLAG_BRACK_LAST, outer, textclass);
356                         }
357                         handle_par(os);
358                         os << s << ' ';
359                 }
360
361                 else if (t.cs() == "def") {
362                         string name = p.get_token().cs();
363                         while (p.next_token().cat() != catBegin)
364                                 name += p.get_token().asString();
365                         handle_ert(os, "\\def\\" + name + '{' + p.verbatim_item() + '}');
366                 }
367
368                 else if (t.cs() == "par") {
369                         p.skip_spaces();
370                         if (p.next_token().cs() != "\\begin")
371                                 handle_par(os);
372                         //cerr << "next token: '" << p.next_token().cs() << "'\n";
373                 }
374
375                 else if (is_known(t.cs(), known_headings)) {
376                         string name = t.cs();
377                         if (p.next_token().asInput() == "*") {
378                                 p.get_token();
379                                 name += "*";
380                         }
381                         os << "\n\n\\layout " << cap(name) << "\n\n";
382                         string opt = p.getOpt();
383                         if (opt.size()) {
384                                 begin_inset(os, "OptArg\n");
385                                 os << "collapsed true\n\n\\layout Standard\n\n" << opt;
386                                 end_inset(os);
387                         }
388                         parse_text(p, os, FLAG_ITEM, outer, textclass);
389                         os << "\n\n\\layout Standard\n\n";
390                 }
391
392                 else if (t.cs() == "includegraphics") {
393                         map<string, string> opts = split_map(p.getArg('[', ']'));
394                         string name = p.verbatim_item();
395                         begin_inset(os, "Graphics ");
396                         os << "\n\tfilename " << name << '\n';
397                         if (opts.find("width") != opts.end())
398                                 os << "\twidth " << opts["width"] << '\n';
399                         if (opts.find("height") != opts.end())
400                                 os << "\theight " << opts["height"] << '\n';
401                         end_inset(os);
402                 }
403                 
404                 else if (t.cs() == "footnote") {
405                         begin_inset(os, "Foot\n");
406                         os << "collapsed true\n\n\\layout Standard\n\n";
407                         parse_text(p, os, FLAG_ITEM, false, textclass);
408                         end_inset(os);
409                 }
410
411                 else if (t.cs() == "ensuremath") {
412                         string s = parse_text(p, FLAG_ITEM, false, textclass);
413                         if (s == "±" || s == "³" || s == "²" || s == "µ")
414                                 os << s;
415                         else
416                                 handle_ert(os, "\\ensuremath{" + s + "}");
417                 }
418
419                 else if (t.cs() == "marginpar") {
420                         begin_inset(os, "Marginal\n");
421                         os << "collapsed true\n\n\\layout Standard\n\n";
422                         parse_text(p, os, FLAG_ITEM, false, textclass);
423                         end_inset(os);
424                 }
425
426                 else if (t.cs() == "hfill") {
427                         os << "\n\\hfill\n";
428                         skip_braces(p);
429                 }
430
431                 else if (t.cs() == "makeindex" || t.cs() == "maketitle")
432                         skip_braces(p); // swallow this
433
434                 else if (t.cs() == "tableofcontents")
435                         skip_braces(p); // swallow this
436
437                 else if (t.cs() == "textrm") {
438                         os << "\n\\family roman \n";
439                         parse_text(p, os, FLAG_ITEM, outer, textclass);
440                         os << "\n\\family default \n";
441                 }
442
443                 else if (t.cs() == "textsf") {
444                         os << "\n\\family sans \n";
445                         parse_text(p, os, FLAG_ITEM, outer, textclass);
446                         os << "\n\\family default \n";
447                 }
448
449                 else if (t.cs() == "texttt") {
450                         os << "\n\\family typewriter \n";
451                         parse_text(p, os, FLAG_ITEM, outer, textclass);
452                         os << "\n\\family default \n";
453                 }
454
455                 else if (t.cs() == "textit") {
456                         os << "\n\\shape italic \n";
457                         parse_text(p, os, FLAG_ITEM, outer, textclass);
458                         os << "\n\\shape default \n";
459                 }
460
461                 else if (t.cs() == "textsc") {
462                         os << "\n\\noun on \n";
463                         parse_text(p, os, FLAG_ITEM, outer, textclass);
464                         os << "\n\\noun default \n";
465                 }
466
467                 else if (t.cs() == "textbf") {
468                         os << "\n\\series bold \n";
469                         parse_text(p, os, FLAG_ITEM, outer, textclass);
470                         os << "\n\\series default \n";
471                 }
472
473                 else if (t.cs() == "underbar") {
474                         os << "\n\\bar under \n";
475                         parse_text(p, os, FLAG_ITEM, outer, textclass);
476                         os << "\n\\bar default \n";
477                 }
478
479                 else if (t.cs() == "emph" || t.cs() == "noun") {
480                         os << "\n\\" << t.cs() << " on \n";
481                         parse_text(p, os, FLAG_ITEM, outer, textclass);
482                         os << "\n\\" << t.cs() << " default \n";
483                 }
484
485                 else if (t.cs() == "bibitem") {
486                         os << "\n\\layout Bibliography\n\\bibitem ";
487                         os << p.getOpt();
488                         os << '{' << p.verbatim_item() << '}' << "\n\n";
489                 }
490
491                 else if (is_known(t.cs(), known_latex_commands)) {
492                         begin_inset(os, "LatexCommand ");
493                         os << '\\' << t.cs();
494                         os << p.getOpt();
495                         os << p.getOpt();
496                         os << '{' << p.verbatim_item() << '}';
497                         end_inset(os);
498                 }
499
500                 else if (is_known(t.cs(), known_quotes)) {
501                   char const ** where = is_known(t.cs(), known_quotes);
502                         begin_inset(os, "Quotes ");
503                         os << known_coded_quotes[where - known_quotes];
504                         end_inset(os);
505                         skip_braces(p);
506                 }
507
508                 else if (is_known(t.cs(), known_sizes)) {
509                   char const ** where = is_known(t.cs(), known_sizes);
510                         os << "\n\\size " << known_coded_sizes[where - known_sizes] << "\n";
511                 }
512
513                 else if (t.cs() == "LyX" || t.cs() == "TeX"
514                       || t.cs() == "LaTeX" || t.cs() == "LaTeXe") {
515                         os << t.cs();
516                         skip_braces(p); // eat {}
517                 }
518
519                 else if (t.cs() == "lyxarrow") {
520                         os << "\\SpecialChar \\menuseparator\n";
521                         skip_braces(p);
522                 }
523
524                 else if (t.cs() == "ldots") {
525                         os << "\\SpecialChar \\ldots{}\n";
526                         skip_braces(p);
527                 }
528
529                 else if (t.cs() == "@") {
530                         os << "\\SpecialChar \\@";
531                         skip_braces(p);
532                 }
533
534                 else if (t.cs() == "textasciitilde") {
535                         os << '~';
536                         skip_braces(p);
537                 }
538
539                 else if (t.cs() == "textasciicircum") {
540                         os << '^';
541                         skip_braces(p);
542                 }
543
544                 else if (t.cs() == "textbackslash") {
545                         os << "\n\\backslash\n";
546                         skip_braces(p);
547                 }
548
549                 else if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#" || t.cs() == "$"
550                             || t.cs() == "{" || t.cs() == "}" || t.cs() == "%")
551                         os << t.cs();
552
553                 else if (t.cs() == "char") {
554                         if (p.next_token().character() == '`') {
555                                 p.get_token();
556                                 if (p.next_token().cs() == "\"") {
557                                         p.get_token();
558                                         os << '"';
559                                         skip_braces(p);
560                                 } else {
561                                         handle_ert(os, "\\char`");
562                                 }
563                         } else {
564                                 handle_ert(os, "\\char");
565                         }
566                 }
567
568                 else if (t.cs() == "\"") {
569                         string const name = p.verbatim_item();
570                              if (name == "a") os << 'ä';
571                         else if (name == "o") os << 'ö';
572                         else if (name == "u") os << 'ü';
573                         else if (name == "A") os << 'Ä';
574                         else if (name == "O") os << 'Ö';
575                         else if (name == "U") os << 'Ü';
576                         else handle_ert(os, "\"{" + name + "}");
577                 }
578
579                 else if (t.cs() == "=" || t.cs() == "H" || t.cs() == "c"
580                       || t.cs() == "^" || t.cs() == "'" || t.cs() == "~") {
581                         // we need the trim as the LyX parser chokes on such spaces
582                         os << "\n\\i \\" << t.cs() << "{"
583                            << trim(parse_text(p, FLAG_ITEM, outer, textclass), " ") << "}\n";
584                 }
585
586                 else if (t.cs() == "ss")
587                         os << "ß";
588
589                 else if (t.cs() == "i" || t.cs() == "j")
590                         os << "\\" << t.cs() << ' ';
591
592                 else if (t.cs() == "-")
593                         os << "\\SpecialChar \\-\n";
594
595                 else if (t.cs() == "\\")
596                         os << "\n\\newline\n";
597         
598                 else if (t.cs() == "lyxrightaddress") {
599                         os << "\n\\layout Right Address\n";
600                         parse_text(p, os, FLAG_ITEM, outer, textclass);
601                         os << "\n\\layout Standard\n";
602                 }
603
604                 else if (t.cs() == "lyxaddress") {
605                         os << "\n\\layout Address\n";
606                         parse_text(p, os, FLAG_ITEM, outer, textclass);
607                         os << "\n\\layout Standard\n";
608                 }
609
610                 else if (t.cs() == "input")
611                         handle_ert(os, "\\input{" + p.verbatim_item() + "}\n");
612
613                 else if (t.cs() == "fancyhead") {
614                         ostringstream ss;
615                         ss << "\\fancyhead";
616                         ss << p.getOpt();
617                         ss << '{' << p.verbatim_item() << "}\n";
618                         handle_ert(os, ss.str());
619                 }
620
621                 else {
622                         //cerr << "#: " << t << " mode: " << mode << endl;
623                         // heuristic: read up to next non-nested space
624                         /*
625                         string s = t.asInput();
626                         string z = p.verbatim_item();
627                         while (p.good() && z != " " && z.size()) {
628                                 //cerr << "read: " << z << endl;
629                                 s += z;
630                                 z = p.verbatim_item();
631                         }
632                         cerr << "found ERT: " << s << endl;
633                         handle_ert(os, s + ' ');
634                         */
635                         handle_ert(os, t.asInput() + ' ');
636                 }
637
638                 if (flags & FLAG_LEAVE) {
639                         flags &= ~FLAG_LEAVE;
640                         break;
641                 }
642         }
643 }
644
645
646 string parse_text(Parser & p, unsigned flags, const bool outer,
647                   LyXTextClass const & textclass)
648 {
649         ostringstream os;
650         parse_text(p, os, flags, outer, textclass);
651         return os.str();
652 }
653
654
655 // }])