]> git.lyx.org Git - lyx.git/blob - src/tex2lyx/text.C
ec10d5cd976d7f93a255e2d762e6f5a29ee53ebf
[lyx.git] / src / tex2lyx / text.C
1 /** The .tex to .lyx converter
2     \author André Pönitz (2003)
3  */
4
5 // {[(
6
7 #include <config.h>
8
9 #include "Lsstream.h"
10 #include "tex2lyx.h"
11
12 #include <iostream>
13 #include <map>
14 #include <string>
15 #include <vector>
16
17 using std::cerr;
18 using std::endl;
19 using std::map;
20 using std::ostream;
21 using std::ostringstream;
22 using std::string;
23 using std::vector;
24
25
26 namespace {
27
28 char const * known_headings[] = { "caption", "title", "author", "date",
29 "paragraph", "chapter", "section", "subsection", "subsubsection", 0 };
30
31 char const * known_latex_commands[] = { "ref", "cite", "label", "index",
32 "printindex", "pageref", "url", 0 };
33
34 // LaTeX names for quotes
35 char const * known_quotes[] = { "glqq", "grqq", "quotedblbase",
36 "textquotedblleft", "quotesinglbase", "guilsinglleft", "guilsinglright", 0};
37
38 // the same as known_quotes with .lyx names
39 char const * known_coded_quotes[] = { "gld", "grd", "gld",
40 "grd", "gls", "fls", "frd", 0};
41
42 char const * known_sizes[] = { "tiny", "scriptsize", "footnotesize",
43 "small", "normalsize", "large", "Large", "LARGE", "huge", "Huge", 0};
44
45 char const * known_coded_sizes[] = { "tiny", "scriptsize", "footnotesize",
46 "small", "normal", "large", "larger", "largest",  "huge", "giant", 0};
47
48
49 string cap(string s)
50 {
51         if (s.size())
52                 s[0] = toupper(s[0]);
53         return s;
54 }
55
56
57 // splits "x=z, y=b" into a map
58 map<string, string> split_map(string const & s)
59 {
60         map<string, string> res;
61         vector<string> v;
62         split(s, v);
63         for (size_t i = 0; i < v.size(); ++i) {
64                 size_t const pos   = v[i].find('=');
65                 string const index = v[i].substr(0, pos);
66                 string const value = v[i].substr(pos + 1, string::npos);
67                 res[trim(index)] = trim(value);
68         }
69         return res;
70 }
71
72
73 void begin_inset(ostream & os, string const & name)
74 {
75         os << "\n\\begin_inset " << name;
76 }
77
78
79 void end_inset(ostream & os)
80 {
81         os << "\n\\end_inset\n\n";
82 }
83
84
85 void skip_braces(Parser & p)
86 {
87         if (p.next_token().cat() != catBegin)
88                 return;
89         p.get_token();
90         if (p.next_token().cat() == catEnd) {
91                 p.get_token();
92                 return;
93         }
94         p.putback();
95 }
96
97
98 void handle_ert(ostream & os, string const & s)
99 {
100         begin_inset(os, "ERT");
101         os << "\nstatus Collapsed\n\n\\layout Standard\n\n";
102         for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
103                 if (*it == '\\')
104                         os << "\n\\backslash\n";
105                 else
106                         os << *it;
107         }
108         end_inset(os);
109 }
110
111
112 void handle_par(ostream & os)
113 {
114         if (active_environments.empty())
115                 return;
116         os << "\n\\layout ";
117         string s = active_environment();
118         if (s == "document" || s == "table" || s == "center")
119                 os << "Standard\n\n";
120         else if (s == "lyxcode")
121                 os << "LyX-Code\n\n";
122         else if (s == "lyxlist")
123                 os << "List\n\n";
124         else if (s == "thebibliography")
125                 os << "Bibliography\n\n";
126         else
127                 os << cap(s) << "\n\n";
128 }
129
130
131 } // anonymous namespace
132
133
134 void parse_text(Parser & p, ostream & os, unsigned flags, bool outer)
135 {
136         while (p.good()) {
137                 Token const & t = p.get_token();
138
139 #ifdef FILEDEBUG
140                 cerr << "t: " << t << " flags: " << flags << "\n";
141 #endif
142
143                 if (flags & FLAG_ITEM) {
144                         if (t.cat() == catSpace)
145                                 continue;
146
147                         flags &= ~FLAG_ITEM;
148                         if (t.cat() == catBegin) {
149                                 // skip the brace and collect everything to the next matching
150                                 // closing brace
151                                 flags |= FLAG_BRACE_LAST;
152                                 continue;
153                         }
154
155                         // handle only this single token, leave the loop if done
156                         flags |= FLAG_LEAVE;
157                 }
158
159                 if (t.character() == ']' && (flags & FLAG_BRACK_LAST)) 
160                         return;
161
162                 //
163                 // cat codes
164                 //
165                 if (t.cat() == catMath) {
166                         // we are inside some text mode thingy, so opening new math is allowed
167                         begin_inset(os, "Formula ");
168                         Token const & n = p.get_token();
169                         if (n.cat() == catMath && outer) {
170                                 // TeX's $$...$$ syntax for displayed math
171                                 os << "\\[";
172                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
173                                 os << "\\]";
174                                 p.get_token(); // skip the second '$' token
175                         } else {
176                                 // simple $...$  stuff
177                                 p.putback();
178                                 os << '$';
179                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
180                                 os << '$';
181                         }
182                         end_inset(os);
183                 }
184
185                 else if (t.cat() == catSuper || t.cat() == catSub)
186                         cerr << "catcode " << t << " illegal in text mode\n";
187
188                 else if (t.cat() == catLetter ||
189                                t.cat() == catSpace ||
190                                t.cat() == catOther ||
191                                t.cat() == catAlign ||
192                                t.cat() == catParameter)
193                         os << t.character();
194
195                 else if (t.cat() == catNewline) {
196                         if (p.next_token().cat() == catNewline) {
197                                 p.get_token();
198                                 handle_par(os);
199                         } else {
200                                 os << " "; // note the space
201                         }
202                 }
203
204                 else if (t.cat() == catActive) {
205                         if (t.character() == '~') {
206                                 if (active_environment() == "lyxcode")
207                                         os << ' ';
208                                 else
209                                         os << "\\SpecialChar ~\n";
210                         } else
211                                 os << t.character();
212                 }
213
214                 else if (t.cat() == catBegin) {
215                         // special handling of size changes
216                         bool const is_size = is_known(p.next_token().cs(), known_sizes);
217                         string const s = parse_text(p, FLAG_BRACE_LAST, outer);
218                         if (s.empty() && p.next_token().character() == '`')
219                                 ; // ignore it in  {}``
220                         else if (is_size || s == "[" || s == "]" || s == "*")
221                                 os << s;
222                         else {
223                                 handle_ert(os, "{");
224                                 os << s;
225                                 handle_ert(os, "}");
226                         }
227                 }
228
229                 else if (t.cat() == catEnd) {
230                         if (flags & FLAG_BRACE_LAST)
231                                 return;
232                         cerr << "stray '}' in text\n";
233                         handle_ert(os, "}");
234                 }
235
236                 else if (t.cat() == catOther)
237                         os << string(1, t.character());
238
239                 else if (t.cat() == catComment)
240                         handle_comment(p);
241
242                 //
243                 // control sequences
244                 //
245
246                 else if (t.cs() == "ldots") {
247                         skip_braces(p);
248                         os << "\n\\SpecialChar \\ldots{}\n";
249                 }
250
251                 else if (t.cs() == "(") {
252                         begin_inset(os, "Formula");
253                         os << " \\(";
254                         parse_math(p, os, FLAG_SIMPLE2, MATH_MODE);
255                         os << "\\)";
256                         end_inset(os);
257                 }
258
259                 else if (t.cs() == "[") {
260                         begin_inset(os, "Formula");
261                         os << " \\[";
262                         parse_math(p, os, FLAG_EQUATION, MATH_MODE);
263                         os << "\\]";
264                         end_inset(os);
265                 }
266
267                 else if (t.cs() == "begin") {
268                         string const name = p.getArg('{', '}');
269                         active_environments.push_back(name);
270                         if (is_math_env(name)) {
271                                 begin_inset(os, "Formula ");
272                                 os << "\\begin{" << name << "}";
273                                 parse_math(p, os, FLAG_END, MATH_MODE);
274                                 os << "\\end{" << name << "}";
275                                 end_inset(os);
276                         } else if (name == "tabular") {
277                                 begin_inset(os, "Tabular ");
278                                 handle_tabular(p, os);
279                                 end_inset(os);
280                         } else if (name == "table" || name == "figure") {
281                                 string opts = p.getOpt();
282                                 begin_inset(os, "Float " + name + "\n");
283                                 if (opts.size())
284                                         os << "placement " << opts << '\n';
285                                 os << "wide false\ncollapsed false\n\n"
286                                          << "\\layout Standard\n";
287                                 parse_text(p, os, FLAG_END, outer);
288                                 end_inset(os);
289                         } else if (name == "center") {
290                                 active_environments.pop_back();
291                                 handle_par(os); 
292                                 active_environments.push_back(name);
293                                 os << "\\align center\n";
294                                 parse_text(p, os, FLAG_END, outer);
295                         } else if (name == "enumerate" || name == "itemize"
296                                         || name == "lyxlist") {
297                                 size_t const n = active_environments.size();
298                                 string const s = active_environments[n - 2];
299                                 bool const deeper = s == "enumerate" || s == "itemize"
300                                         || s == "lyxlist";
301                                 if (deeper)
302                                         os << "\n\\begin_deeper";
303                                 os << "\n\\layout " << cap(name) << "\n\n";
304                                 if (name == "lyxlist")
305                                         p.verbatim_item(); // swallow next arg
306                                 parse_text(p, os, FLAG_END, outer);
307                                 if (deeper)
308                                         os << "\n\\end_deeper\n";
309                                 handle_par(os);
310                         } else if (name == "thebibliography") {
311                                 p.verbatim_item(); // swallow next arg
312                                 parse_text(p, os, FLAG_END, outer);
313                                 os << "\n\\layout Bibliography\n\n";
314                         } else {
315                                 handle_par(os);
316                                 parse_text(p, os, FLAG_END, outer);
317                         }
318                 }
319
320                 else if (t.cs() == "end") {
321                         if (flags & FLAG_END) {
322                                 // eat environment name
323                                 string const name = p.getArg('{', '}');
324                                 if (name != active_environment())
325                                         cerr << "\\end{" + name + "} does not match \\begin{"
326                                                 + active_environment() + "}\n";
327                                 active_environments.pop_back();
328                                 handle_par(os);
329                                 return;
330                         }
331                         p.error("found 'end' unexpectedly");
332                 }
333
334                 else if (t.cs() == "item") {
335                         p.skip_spaces();
336                         string s; 
337                         if (p.next_token().character() == '[') {
338                                 p.get_token(); // eat '['
339                                 s = parse_text(p, FLAG_BRACK_LAST, outer);
340                         }
341                         handle_par(os);
342                         os << s << ' ';
343                 }
344
345                 else if (t.cs() == "def") {
346                         string name = p.get_token().cs();
347                         while (p.next_token().cat() != catBegin)
348                                 name += p.get_token().asString();
349                         handle_ert(os, "\\def\\" + name + '{' + p.verbatim_item() + '}');
350                 }
351
352                 else if (t.cs() == "par") {
353                         p.skip_spaces();
354                         if (p.next_token().cs() != "\\begin")
355                                 handle_par(os);
356                         //cerr << "next token: '" << p.next_token().cs() << "'\n";
357                 }
358
359                 else if (is_known(t.cs(), known_headings)) {
360                         string name = t.cs();
361                         if (p.next_token().asInput() == "*") {
362                                 p.get_token();
363                                 name += "*";
364                         }
365                         os << "\n\n\\layout " << cap(name) << "\n\n";
366                         string opt = p.getOpt();
367                         if (opt.size()) {
368                                 begin_inset(os, "OptArg\n");
369                                 os << "collapsed true\n\n\\layout Standard\n\n" << opt;
370                                 end_inset(os);
371                         }
372                         parse_text(p, os, FLAG_ITEM, outer);
373                         os << "\n\n\\layout Standard\n\n";
374                 }
375
376                 else if (t.cs() == "includegraphics") {
377                         map<string, string> opts = split_map(p.getArg('[', ']'));
378                         string name = p.verbatim_item();
379                         begin_inset(os, "Graphics ");
380                         os << "\n\tfilename " << name << '\n';
381                         if (opts.find("width") != opts.end())
382                                 os << "\twidth " << opts["width"] << '\n';
383                         if (opts.find("height") != opts.end())
384                                 os << "\theight " << opts["height"] << '\n';
385                         end_inset(os);
386                 }
387                 
388                 else if (t.cs() == "footnote") {
389                         begin_inset(os, "Foot\n");
390                         os << "collapsed true\n\n\\layout Standard\n\n";
391                         parse_text(p, os, FLAG_ITEM, false);
392                         end_inset(os);
393                 }
394
395                 else if (t.cs() == "ensuremath") {
396                         string s = parse_text(p, FLAG_ITEM, false);
397                         if (s == "±" || s == "³" || s == "²" || s == "µ")
398                                 os << s;
399                         else
400                                 handle_ert(os, "\\ensuremath{" + s + "}");
401                 }
402
403                 else if (t.cs() == "marginpar") {
404                         begin_inset(os, "Marginal\n");
405                         os << "collapsed true\n\n\\layout Standard\n\n";
406                         parse_text(p, os, FLAG_ITEM, false);
407                         end_inset(os);
408                 }
409
410                 else if (t.cs() == "hfill") {
411                         os << "\n\\hfill\n";
412                         skip_braces(p);
413                 }
414
415                 else if (t.cs() == "makeindex" || t.cs() == "maketitle")
416                         skip_braces(p); // swallow this
417
418                 else if (t.cs() == "tableofcontents")
419                         skip_braces(p); // swallow this
420
421                 else if (t.cs() == "textrm") {
422                         os << "\n\\family roman \n";
423                         parse_text(p, os, FLAG_ITEM, outer);
424                         os << "\n\\family default \n";
425                 }
426
427                 else if (t.cs() == "textsf") {
428                         os << "\n\\family sans \n";
429                         parse_text(p, os, FLAG_ITEM, outer);
430                         os << "\n\\family default \n";
431                 }
432
433                 else if (t.cs() == "texttt") {
434                         os << "\n\\family typewriter \n";
435                         parse_text(p, os, FLAG_ITEM, outer);
436                         os << "\n\\family default \n";
437                 }
438
439                 else if (t.cs() == "textit") {
440                         os << "\n\\shape italic \n";
441                         parse_text(p, os, FLAG_ITEM, outer);
442                         os << "\n\\shape default \n";
443                 }
444
445                 else if (t.cs() == "textsc") {
446                         os << "\n\\noun on \n";
447                         parse_text(p, os, FLAG_ITEM, outer);
448                         os << "\n\\noun default \n";
449                 }
450
451                 else if (t.cs() == "textbf") {
452                         os << "\n\\series bold \n";
453                         parse_text(p, os, FLAG_ITEM, outer);
454                         os << "\n\\series default \n";
455                 }
456
457                 else if (t.cs() == "underbar") {
458                         os << "\n\\bar under \n";
459                         parse_text(p, os, FLAG_ITEM, outer);
460                         os << "\n\\bar default \n";
461                 }
462
463                 else if (t.cs() == "emph" || t.cs() == "noun") {
464                         os << "\n\\" << t.cs() << " on \n";
465                         parse_text(p, os, FLAG_ITEM, outer);
466                         os << "\n\\" << t.cs() << " default \n";
467                 }
468
469                 else if (t.cs() == "bibitem") {
470                         os << "\n\\layout Bibliography\n\\bibitem ";
471                         os << p.getOpt();
472                         os << '{' << p.verbatim_item() << '}' << "\n\n";
473                 }
474
475                 else if (is_known(t.cs(), known_latex_commands)) {
476                         begin_inset(os, "LatexCommand ");
477                         os << '\\' << t.cs();
478                         os << p.getOpt();
479                         os << p.getOpt();
480                         os << '{' << p.verbatim_item() << '}';
481                         end_inset(os);
482                 }
483
484                 else if (is_known(t.cs(), known_quotes)) {
485                   char const ** where = is_known(t.cs(), known_quotes);
486                         begin_inset(os, "Quotes ");
487                         os << known_coded_quotes[where - known_quotes];
488                         end_inset(os);
489                         skip_braces(p);
490                 }
491
492                 else if (is_known(t.cs(), known_sizes)) {
493                   char const ** where = is_known(t.cs(), known_sizes);
494                         os << "\n\\size " << known_coded_sizes[where - known_sizes] << "\n";
495                 }
496
497                 else if (t.cs() == "LyX" || t.cs() == "TeX"
498                       || t.cs() == "LaTeX" || t.cs() == "LaTeXe") {
499                         os << t.cs();
500                         skip_braces(p); // eat {}
501                 }
502
503                 else if (t.cs() == "lyxarrow") {
504                         os << "\\SpecialChar \\menuseparator\n";
505                         skip_braces(p);
506                 }
507
508                 else if (t.cs() == "ldots") {
509                         os << "\\SpecialChar \\ldots{}\n";
510                         skip_braces(p);
511                 }
512
513                 else if (t.cs() == "@") {
514                         os << "\\SpecialChar \\@";
515                         skip_braces(p);
516                 }
517
518                 else if (t.cs() == "textasciitilde") {
519                         os << '~';
520                         skip_braces(p);
521                 }
522
523                 else if (t.cs() == "textasciicircum") {
524                         os << '^';
525                         skip_braces(p);
526                 }
527
528                 else if (t.cs() == "textbackslash") {
529                         os << "\n\\backslash\n";
530                         skip_braces(p);
531                 }
532
533                 else if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#" || t.cs() == "$"
534                             || t.cs() == "{" || t.cs() == "}" || t.cs() == "%")
535                         os << t.cs();
536
537                 else if (t.cs() == "char") {
538                         if (p.next_token().character() == '`') {
539                                 p.get_token();
540                                 if (p.next_token().cs() == "\"") {
541                                         p.get_token();
542                                         os << '"';
543                                         skip_braces(p);
544                                 } else {
545                                         handle_ert(os, "\\char`");
546                                 }
547                         } else {
548                                 handle_ert(os, "\\char");
549                         }
550                 }
551
552                 else if (t.cs() == "\"") {
553                         string const name = p.verbatim_item();
554                              if (name == "a") os << 'ä';
555                         else if (name == "o") os << 'ö';
556                         else if (name == "u") os << 'ü';
557                         else if (name == "A") os << 'Ä';
558                         else if (name == "O") os << 'Ö';
559                         else if (name == "U") os << 'Ü';
560                         else handle_ert(os, "\"{" + name + "}");
561                 }
562
563                 else if (t.cs() == "=" || t.cs() == "H" || t.cs() == "c"
564                       || t.cs() == "^" || t.cs() == "'" || t.cs() == "~") {
565                         // we need the trim as the LyX parser chokes on such spaces
566                         os << "\n\\i \\" << t.cs() << "{"
567                            << trim(parse_text(p, FLAG_ITEM, outer), " ") << "}\n";
568                 }
569
570                 else if (t.cs() == "ss")
571                         os << "ß";
572
573                 else if (t.cs() == "i" || t.cs() == "j")
574                         os << "\\" << t.cs() << ' ';
575
576                 else if (t.cs() == "-")
577                         os << "\\SpecialChar \\-\n";
578
579                 else if (t.cs() == "\\")
580                         os << "\n\\newline\n";
581         
582                 else if (t.cs() == "lyxrightaddress") {
583                         os << "\n\\layout Right Address\n";
584                         parse_text(p, os, FLAG_ITEM, outer);
585                         os << "\n\\layout Standard\n";
586                 }
587
588                 else if (t.cs() == "lyxaddress") {
589                         os << "\n\\layout Address\n";
590                         parse_text(p, os, FLAG_ITEM, outer);
591                         os << "\n\\layout Standard\n";
592                 }
593
594                 else if (t.cs() == "input")
595                         handle_ert(os, "\\input{" + p.verbatim_item() + "}\n");
596
597                 else if (t.cs() == "fancyhead") {
598                         ostringstream ss;
599                         ss << "\\fancyhead";
600                         ss << p.getOpt();
601                         ss << '{' << p.verbatim_item() << "}\n";
602                         handle_ert(os, ss.str());
603                 }
604
605                 else {
606                         //cerr << "#: " << t << " mode: " << mode << endl;
607                         // heuristic: read up to next non-nested space
608                         /*
609                         string s = t.asInput();
610                         string z = p.verbatim_item();
611                         while (p.good() && z != " " && z.size()) {
612                                 //cerr << "read: " << z << endl;
613                                 s += z;
614                                 z = p.verbatim_item();
615                         }
616                         cerr << "found ERT: " << s << endl;
617                         handle_ert(os, s + ' ');
618                         */
619                         handle_ert(os, t.asInput() + ' ');
620                 }
621
622                 if (flags & FLAG_LEAVE) {
623                         flags &= ~FLAG_LEAVE;
624                         break;
625                 }
626         }
627 }
628
629
630 string parse_text(Parser & p, unsigned flags, const bool outer)
631 {
632         ostringstream os;
633         parse_text(p, os, flags, outer);
634         return os.str();
635 }
636
637
638 // }])