]> git.lyx.org Git - lyx.git/blob - src/tex2lyx/text.C
9becc23b55691cf75b6b93fea89bcb9ecd30b186
[lyx.git] / src / tex2lyx / text.C
1 /** The .tex to .lyx converter
2     \author André Pönitz (2003)
3  */
4
5 // {[(
6
7 #include <config.h>
8
9 #include "Lsstream.h"
10 #include "tex2lyx.h"
11
12 #include <iostream>
13 #include <map>
14 #include <vector>
15
16 using std::cerr;
17 using std::endl;
18 using std::map;
19 using std::ostream;
20 using std::ostringstream;
21 using std::vector;
22
23
24 namespace {
25
26 char const * known_headings[] = { "caption", "title", "author", "date",
27 "paragraph", "chapter", "section", "subsection", "subsubsection", 0 };
28
29 char const * known_latex_commands[] = { "ref", "cite", "label", "index",
30 "printindex", "pageref", "url", 0 };
31
32 // LaTeX names for quotes
33 char const * known_quotes[] = { "glqq", "grqq", "quotedblbase",
34 "textquotedblleft", "quotesinglbase", "guilsinglleft", "guilsinglright", 0};
35
36 // the same as known_quotes with .lyx names
37 char const * known_coded_quotes[] = { "gld", "grd", "gld",
38 "grd", "gls", "fls", "frd", 0};
39
40 char const * known_sizes[] = { "tiny", "scriptsize", "footnotesize",
41 "small", "normalsize", "large", "Large", "LARGE", "huge", "Huge", 0};
42
43 char const * known_coded_sizes[] = { "tiny", "scriptsize", "footnotesize",
44 "small", "normal", "large", "larger", "largest",  "huge", "giant", 0};
45
46
47 string cap(string s)
48 {
49         if (s.size())
50                 s[0] = toupper(s[0]);
51         return s;
52 }
53
54
55 // splits "x=z, y=b" into a map
56 map<string, string> split_map(string const & s)
57 {
58         map<string, string> res;
59         vector<string> v;
60         split(s, v);
61         for (size_t i = 0; i < v.size(); ++i) {
62                 size_t const pos   = v[i].find('=');
63                 string const index = v[i].substr(0, pos);
64                 string const value = v[i].substr(pos + 1, string::npos);
65                 res[trim(index)] = trim(value);
66         }
67         return res;
68 }
69
70
71 void begin_inset(ostream & os, string const & name)
72 {
73         os << "\n\\begin_inset " << name;
74 }
75
76
77 void end_inset(ostream & os)
78 {
79         os << "\n\\end_inset\n\n";
80 }
81
82
83 void skip_braces(Parser & p)
84 {
85         if (p.next_token().cat() != catBegin)
86                 return;
87         p.get_token();
88         if (p.next_token().cat() == catEnd) {
89                 p.get_token();
90                 return;
91         }
92         p.putback();
93 }
94
95
96 void handle_ert(ostream & os, string const & s)
97 {
98         begin_inset(os, "ERT");
99         os << "\nstatus Collapsed\n\n\\layout Standard\n\n";
100         for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
101                 if (*it == '\\')
102                         os << "\n\\backslash\n";
103                 else
104                         os << *it;
105         }
106         end_inset(os);
107 }
108
109
110 void handle_par(ostream & os)
111 {
112         if (active_environments.empty())
113                 return;
114         os << "\n\\layout ";
115         string s = active_environment();
116         if (s == "document" || s == "table" || s == "center")
117                 os << "Standard\n\n";
118         else if (s == "lyxcode")
119                 os << "LyX-Code\n\n";
120         else if (s == "lyxlist")
121                 os << "List\n\n";
122         else if (s == "thebibliography")
123                 os << "Bibliography\n\n";
124         else
125                 os << cap(s) << "\n\n";
126 }
127
128
129 } // anonymous namespace
130
131
132 void parse_text(Parser & p, ostream & os, unsigned flags, bool outer)
133 {
134         while (p.good()) {
135                 Token const & t = p.get_token();
136
137 #ifdef FILEDEBUG
138                 cerr << "t: " << t << " flags: " << flags << "\n";
139 #endif
140
141                 if (flags & FLAG_ITEM) {
142                         if (t.cat() == catSpace)
143                                 continue;
144
145                         flags &= ~FLAG_ITEM;
146                         if (t.cat() == catBegin) {
147                                 // skip the brace and collect everything to the next matching
148                                 // closing brace
149                                 flags |= FLAG_BRACE_LAST;
150                                 continue;
151                         }
152
153                         // handle only this single token, leave the loop if done
154                         flags |= FLAG_LEAVE;
155                 }
156
157                 if (t.character() == ']' && (flags & FLAG_BRACK_LAST)) 
158                         return;
159
160                 //
161                 // cat codes
162                 //
163                 if (t.cat() == catMath) {
164                         // we are inside some text mode thingy, so opening new math is allowed
165                         begin_inset(os, "Formula ");
166                         Token const & n = p.get_token();
167                         if (n.cat() == catMath && outer) {
168                                 // TeX's $$...$$ syntax for displayed math
169                                 os << "\\[";
170                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
171                                 os << "\\]";
172                                 p.get_token(); // skip the second '$' token
173                         } else {
174                                 // simple $...$  stuff
175                                 p.putback();
176                                 os << '$';
177                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
178                                 os << '$';
179                         }
180                         end_inset(os);
181                 }
182
183                 else if (t.cat() == catSuper || t.cat() == catSub)
184                         cerr << "catcode " << t << " illegal in text mode\n";
185
186                 else if (t.cat() == catLetter ||
187                                t.cat() == catSpace ||
188                                t.cat() == catOther ||
189                                t.cat() == catAlign ||
190                                t.cat() == catParameter)
191                         os << t.character();
192
193                 else if (t.cat() == catNewline) {
194                         if (p.next_token().cat() == catNewline) {
195                                 p.get_token();
196                                 handle_par(os);
197                         } else {
198                                 os << " "; // note the space
199                         }
200                 }
201
202                 else if (t.cat() == catActive) {
203                         if (t.character() == '~') {
204                                 if (active_environment() == "lyxcode")
205                                         os << ' ';
206                                 else
207                                         os << "\\SpecialChar ~\n";
208                         } else
209                                 os << t.character();
210                 }
211
212                 else if (t.cat() == catBegin) {
213                         // special handling of size changes
214                         bool const is_size = is_known(p.next_token().cs(), known_sizes);
215                         string const s = parse_text(p, FLAG_BRACE_LAST, outer);
216                         if (s.empty() && p.next_token().character() == '`')
217                                 ; // ignore it in  {}``
218                         else if (is_size || s == "[" || s == "]" || s == "*")
219                                 os << s;
220                         else {
221                                 handle_ert(os, "{");
222                                 os << s;
223                                 handle_ert(os, "}");
224                         }
225                 }
226
227                 else if (t.cat() == catEnd) {
228                         if (flags & FLAG_BRACE_LAST)
229                                 return;
230                         cerr << "stray '}' in text\n";
231                         handle_ert(os, "}");
232                 }
233
234                 else if (t.cat() == catOther)
235                         os << string(1, t.character());
236
237                 else if (t.cat() == catComment)
238                         handle_comment(p);
239
240                 //
241                 // control sequences
242                 //
243
244                 else if (t.cs() == "ldots") {
245                         skip_braces(p);
246                         os << "\n\\SpecialChar \\ldots{}\n";
247                 }
248
249                 else if (t.cs() == "(") {
250                         begin_inset(os, "Formula");
251                         os << " \\(";
252                         parse_math(p, os, FLAG_SIMPLE2, MATH_MODE);
253                         os << "\\)";
254                         end_inset(os);
255                 }
256
257                 else if (t.cs() == "[") {
258                         begin_inset(os, "Formula");
259                         os << " \\[";
260                         parse_math(p, os, FLAG_EQUATION, MATH_MODE);
261                         os << "\\]";
262                         end_inset(os);
263                 }
264
265                 else if (t.cs() == "begin") {
266                         string const name = p.getArg('{', '}');
267                         active_environments.push_back(name);
268                         if (is_math_env(name)) {
269                                 begin_inset(os, "Formula ");
270                                 os << "\\begin{" << name << "}";
271                                 parse_math(p, os, FLAG_END, MATH_MODE);
272                                 os << "\\end{" << name << "}";
273                                 end_inset(os);
274                         } else if (name == "tabular") {
275                                 begin_inset(os, "Tabular ");
276                                 handle_tabular(p, os);
277                                 end_inset(os);
278                         } else if (name == "table" || name == "figure") {
279                                 string opts = p.getOpt();
280                                 begin_inset(os, "Float " + name + "\n");
281                                 if (opts.size())
282                                         os << "placement " << opts << '\n';
283                                 os << "wide false\ncollapsed false\n\n"
284                                          << "\\layout Standard\n";
285                                 parse_text(p, os, FLAG_END, outer);
286                                 end_inset(os);
287                         } else if (name == "center") {
288                                 active_environments.pop_back();
289                                 handle_par(os); 
290                                 active_environments.push_back(name);
291                                 os << "\\align center\n";
292                                 parse_text(p, os, FLAG_END, outer);
293                         } else if (name == "enumerate" || name == "itemize"
294                                         || name == "lyxlist") {
295                                 size_t const n = active_environments.size();
296                                 string const s = active_environments[n - 2];
297                                 bool const deeper = s == "enumerate" || s == "itemize"
298                                         || s == "lyxlist";
299                                 if (deeper)
300                                         os << "\n\\begin_deeper";
301                                 os << "\n\\layout " << cap(name) << "\n\n";
302                                 if (name == "lyxlist")
303                                         p.verbatim_item(); // swallow next arg
304                                 parse_text(p, os, FLAG_END, outer);
305                                 if (deeper)
306                                         os << "\n\\end_deeper\n";
307                                 handle_par(os);
308                         } else if (name == "thebibliography") {
309                                 p.verbatim_item(); // swallow next arg
310                                 parse_text(p, os, FLAG_END, outer);
311                                 os << "\n\\layout Bibliography\n\n";
312                         } else {
313                                 handle_par(os);
314                                 parse_text(p, os, FLAG_END, outer);
315                         }
316                 }
317
318                 else if (t.cs() == "end") {
319                         if (flags & FLAG_END) {
320                                 // eat environment name
321                                 string const name = p.getArg('{', '}');
322                                 if (name != active_environment())
323                                         cerr << "\\end{" + name + "} does not match \\begin{"
324                                                 + active_environment() + "}\n";
325                                 active_environments.pop_back();
326                                 handle_par(os);
327                                 return;
328                         }
329                         p.error("found 'end' unexpectedly");
330                 }
331
332                 else if (t.cs() == "item") {
333                         p.skip_spaces();
334                         string s; 
335                         if (p.next_token().character() == '[') {
336                                 p.get_token(); // eat '['
337                                 s = parse_text(p, FLAG_BRACK_LAST, outer);
338                         }
339                         handle_par(os);
340                         os << s << ' ';
341                 }
342
343                 else if (t.cs() == "def") {
344                         string name = p.get_token().cs();
345                         while (p.next_token().cat() != catBegin)
346                                 name += p.get_token().asString();
347                         handle_ert(os, "\\def\\" + name + '{' + p.verbatim_item() + '}');
348                 }
349
350                 else if (t.cs() == "par") {
351                         p.skip_spaces();
352                         if (p.next_token().cs() != "\\begin")
353                                 handle_par(os);
354                         //cerr << "next token: '" << p.next_token().cs() << "'\n";
355                 }
356
357                 else if (is_known(t.cs(), known_headings)) {
358                         string name = t.cs();
359                         if (p.next_token().asInput() == "*") {
360                                 p.get_token();
361                                 name += "*";
362                         }
363                         os << "\n\n\\layout " << cap(name) << "\n\n";
364                         string opt = p.getOpt();
365                         if (opt.size()) {
366                                 begin_inset(os, "OptArg\n");
367                                 os << "collapsed true\n\n\\layout Standard\n\n" << opt;
368                                 end_inset(os);
369                         }
370                         parse_text(p, os, FLAG_ITEM, outer);
371                         os << "\n\n\\layout Standard\n\n";
372                 }
373
374                 else if (t.cs() == "includegraphics") {
375                         map<string, string> opts = split_map(p.getArg('[', ']'));
376                         string name = p.verbatim_item();
377                         begin_inset(os, "Graphics ");
378                         os << "\n\tfilename " << name << '\n';
379                         if (opts.find("width") != opts.end())
380                                 os << "\twidth " << opts["width"] << '\n';
381                         if (opts.find("height") != opts.end())
382                                 os << "\theight " << opts["height"] << '\n';
383                         end_inset(os);
384                 }
385                 
386                 else if (t.cs() == "footnote") {
387                         begin_inset(os, "Foot\n");
388                         os << "collapsed true\n\n\\layout Standard\n\n";
389                         parse_text(p, os, FLAG_ITEM, false);
390                         end_inset(os);
391                 }
392
393                 else if (t.cs() == "ensuremath") {
394                         string s = parse_text(p, FLAG_ITEM, false);
395                         if (s == "±" || s == "³" || s == "²" || s == "µ")
396                                 os << s;
397                         else
398                                 handle_ert(os, "\\ensuremath{" + s + "}");
399                 }
400
401                 else if (t.cs() == "marginpar") {
402                         begin_inset(os, "Marginal\n");
403                         os << "collapsed true\n\n\\layout Standard\n\n";
404                         parse_text(p, os, FLAG_ITEM, false);
405                         end_inset(os);
406                 }
407
408                 else if (t.cs() == "hfill") {
409                         os << "\n\\hfill\n";
410                         skip_braces(p);
411                 }
412
413                 else if (t.cs() == "makeindex" || t.cs() == "maketitle")
414                         skip_braces(p); // swallow this
415
416                 else if (t.cs() == "tableofcontents")
417                         skip_braces(p); // swallow this
418
419                 else if (t.cs() == "textrm") {
420                         os << "\n\\family roman \n";
421                         parse_text(p, os, FLAG_ITEM, outer);
422                         os << "\n\\family default \n";
423                 }
424
425                 else if (t.cs() == "textsf") {
426                         os << "\n\\family sans \n";
427                         parse_text(p, os, FLAG_ITEM, outer);
428                         os << "\n\\family default \n";
429                 }
430
431                 else if (t.cs() == "texttt") {
432                         os << "\n\\family typewriter \n";
433                         parse_text(p, os, FLAG_ITEM, outer);
434                         os << "\n\\family default \n";
435                 }
436
437                 else if (t.cs() == "textit") {
438                         os << "\n\\shape italic \n";
439                         parse_text(p, os, FLAG_ITEM, outer);
440                         os << "\n\\shape default \n";
441                 }
442
443                 else if (t.cs() == "textsc") {
444                         os << "\n\\noun on \n";
445                         parse_text(p, os, FLAG_ITEM, outer);
446                         os << "\n\\noun default \n";
447                 }
448
449                 else if (t.cs() == "textbf") {
450                         os << "\n\\series bold \n";
451                         parse_text(p, os, FLAG_ITEM, outer);
452                         os << "\n\\series default \n";
453                 }
454
455                 else if (t.cs() == "underbar") {
456                         os << "\n\\bar under \n";
457                         parse_text(p, os, FLAG_ITEM, outer);
458                         os << "\n\\bar default \n";
459                 }
460
461                 else if (t.cs() == "emph" || t.cs() == "noun") {
462                         os << "\n\\" << t.cs() << " on \n";
463                         parse_text(p, os, FLAG_ITEM, outer);
464                         os << "\n\\" << t.cs() << " default \n";
465                 }
466
467                 else if (t.cs() == "bibitem") {
468                         os << "\n\\layout Bibliography\n\\bibitem ";
469                         os << p.getOpt();
470                         os << '{' << p.verbatim_item() << '}' << "\n\n";
471                 }
472
473                 else if (is_known(t.cs(), known_latex_commands)) {
474                         begin_inset(os, "LatexCommand ");
475                         os << '\\' << t.cs();
476                         os << p.getOpt();
477                         os << p.getOpt();
478                         os << '{' << p.verbatim_item() << '}';
479                         end_inset(os);
480                 }
481
482                 else if (is_known(t.cs(), known_quotes)) {
483                   char const ** where = is_known(t.cs(), known_quotes);
484                         begin_inset(os, "Quotes ");
485                         os << known_coded_quotes[where - known_quotes];
486                         end_inset(os);
487                         skip_braces(p);
488                 }
489
490                 else if (is_known(t.cs(), known_sizes)) {
491                   char const ** where = is_known(t.cs(), known_sizes);
492                         os << "\n\\size " << known_coded_sizes[where - known_sizes] << "\n";
493                 }
494
495                 else if (t.cs() == "LyX" || t.cs() == "TeX"
496                       || t.cs() == "LaTeX" || t.cs() == "LaTeXe") {
497                         os << t.cs();
498                         skip_braces(p); // eat {}
499                 }
500
501                 else if (t.cs() == "lyxarrow") {
502                         os << "\\SpecialChar \\menuseparator\n";
503                         skip_braces(p);
504                 }
505
506                 else if (t.cs() == "ldots") {
507                         os << "\\SpecialChar \\ldots{}\n";
508                         skip_braces(p);
509                 }
510
511                 else if (t.cs() == "@") {
512                         os << "\\SpecialChar \\@";
513                         skip_braces(p);
514                 }
515
516                 else if (t.cs() == "textasciitilde") {
517                         os << '~';
518                         skip_braces(p);
519                 }
520
521                 else if (t.cs() == "textasciicircum") {
522                         os << '^';
523                         skip_braces(p);
524                 }
525
526                 else if (t.cs() == "textbackslash") {
527                         os << "\n\\backslash\n";
528                         skip_braces(p);
529                 }
530
531                 else if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#" || t.cs() == "$"
532                             || t.cs() == "{" || t.cs() == "}" || t.cs() == "%")
533                         os << t.cs();
534
535                 else if (t.cs() == "char") {
536                         if (p.next_token().character() == '`') {
537                                 p.get_token();
538                                 if (p.next_token().cs() == "\"") {
539                                         p.get_token();
540                                         os << '"';
541                                         skip_braces(p);
542                                 } else {
543                                         handle_ert(os, "\\char`");
544                                 }
545                         } else {
546                                 handle_ert(os, "\\char");
547                         }
548                 }
549
550                 else if (t.cs() == "\"") {
551                         string const name = p.verbatim_item();
552                              if (name == "a") os << 'ä';
553                         else if (name == "o") os << 'ö';
554                         else if (name == "u") os << 'ü';
555                         else if (name == "A") os << 'Ä';
556                         else if (name == "O") os << 'Ö';
557                         else if (name == "U") os << 'Ü';
558                         else handle_ert(os, "\"{" + name + "}");
559                 }
560
561                 else if (t.cs() == "=" || t.cs() == "H" || t.cs() == "c"
562                       || t.cs() == "^" || t.cs() == "'" || t.cs() == "~") {
563                         // we need the trim as the LyX parser chokes on such spaces
564                         os << "\n\\i \\" << t.cs() << "{"
565                            << trim(parse_text(p, FLAG_ITEM, outer), " ") << "}\n";
566                 }
567
568                 else if (t.cs() == "ss")
569                         os << "ß";
570
571                 else if (t.cs() == "i" || t.cs() == "j")
572                         os << "\\" << t.cs() << ' ';
573
574                 else if (t.cs() == "-")
575                         os << "\\SpecialChar \\-\n";
576
577                 else if (t.cs() == "\\")
578                         os << "\n\\newline\n";
579         
580                 else if (t.cs() == "lyxrightaddress") {
581                         os << "\n\\layout Right Address\n";
582                         parse_text(p, os, FLAG_ITEM, outer);
583                         os << "\n\\layout Standard\n";
584                 }
585
586                 else if (t.cs() == "lyxaddress") {
587                         os << "\n\\layout Address\n";
588                         parse_text(p, os, FLAG_ITEM, outer);
589                         os << "\n\\layout Standard\n";
590                 }
591
592                 else if (t.cs() == "input")
593                         handle_ert(os, "\\input{" + p.verbatim_item() + "}\n");
594
595                 else if (t.cs() == "fancyhead") {
596                         ostringstream ss;
597                         ss << "\\fancyhead";
598                         ss << p.getOpt();
599                         ss << '{' << p.verbatim_item() << "}\n";
600                         handle_ert(os, STRCONV(ss.str()));
601                 }
602
603                 else {
604                         //cerr << "#: " << t << " mode: " << mode << endl;
605                         // heuristic: read up to next non-nested space
606                         /*
607                         string s = t.asInput();
608                         string z = p.verbatim_item();
609                         while (p.good() && z != " " && z.size()) {
610                                 //cerr << "read: " << z << endl;
611                                 s += z;
612                                 z = p.verbatim_item();
613                         }
614                         cerr << "found ERT: " << s << endl;
615                         handle_ert(os, s + ' ');
616                         */
617                         handle_ert(os, t.asInput() + ' ');
618                 }
619
620                 if (flags & FLAG_LEAVE) {
621                         flags &= ~FLAG_LEAVE;
622                         break;
623                 }
624         }
625 }
626
627
628 string parse_text(Parser & p, unsigned flags, const bool outer)
629 {
630         ostringstream os;
631         parse_text(p, os, flags, outer);
632         return STRCONV(os.str());
633 }
634
635
636 // }])