]> git.lyx.org Git - lyx.git/blob - src/tex2lyx/text.C
ffb9dc8f56937f080417bdf1092781f92af85673
[lyx.git] / src / tex2lyx / text.C
1 /** The .tex to .lyx converter
2     \author André Pönitz (2003)
3  */
4
5 // {[(
6
7 #include <config.h>
8
9 #include "tex2lyx.h"
10 #include "FloatList.h"
11 #include "lyxtextclass.h"
12 #include "support/lstrings.h"
13 #include "support/tostr.h"
14
15 #include <iostream>
16 #include <map>
17 #include <sstream>
18 #include <vector>
19
20 using std::cerr;
21 using std::endl;
22 using std::map;
23 using std::ostream;
24 using std::ostringstream;
25 using std::string;
26 using std::vector;
27
28 using lyx::support::rtrim;
29 using lyx::support::suffixIs;
30
31 namespace {
32
33 char const * known_latex_commands[] = { "ref", "cite", "label", "index",
34 "printindex", "pageref", "url", 0 };
35
36 // LaTeX names for quotes
37 char const * known_quotes[] = { "glqq", "grqq", "quotedblbase",
38 "textquotedblleft", "quotesinglbase", "guilsinglleft", "guilsinglright", 0};
39
40 // the same as known_quotes with .lyx names
41 char const * known_coded_quotes[] = { "gld", "grd", "gld",
42 "grd", "gls", "fls", "frd", 0};
43
44 char const * known_sizes[] = { "tiny", "scriptsize", "footnotesize",
45 "small", "normalsize", "large", "Large", "LARGE", "huge", "Huge", 0};
46
47 char const * known_coded_sizes[] = { "tiny", "scriptsize", "footnotesize",
48 "small", "normal", "large", "larger", "largest",  "huge", "giant", 0};
49
50
51 string cap(string s)
52 {
53         if (s.size())
54                 s[0] = toupper(s[0]);
55         return s;
56 }
57
58
59 // splits "x=z, y=b" into a map
60 map<string, string> split_map(string const & s)
61 {
62         map<string, string> res;
63         vector<string> v;
64         split(s, v);
65         for (size_t i = 0; i < v.size(); ++i) {
66                 size_t const pos   = v[i].find('=');
67                 string const index = v[i].substr(0, pos);
68                 string const value = v[i].substr(pos + 1, string::npos);
69                 res[trim(index)] = trim(value);
70         }
71         return res;
72 }
73
74
75 void begin_inset(ostream & os, string const & name)
76 {
77         os << "\n\\begin_inset " << name;
78 }
79
80
81 void end_inset(ostream & os)
82 {
83         os << "\n\\end_inset \n\n";
84 }
85
86
87 void skip_braces(Parser & p)
88 {
89         if (p.next_token().cat() != catBegin)
90                 return;
91         p.get_token();
92         if (p.next_token().cat() == catEnd) {
93                 p.get_token();
94                 return;
95         }
96         p.putback();
97 }
98
99
100 void handle_ert(ostream & os, string const & s)
101 {
102         begin_inset(os, "ERT");
103         os << "\nstatus Collapsed\n\n\\layout Standard\n\n";
104         for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
105                 if (*it == '\\')
106                         os << "\n\\backslash \n";
107                 else
108                         os << *it;
109         }
110         end_inset(os);
111 }
112
113
114 void handle_par(ostream & os)
115 {
116         if (active_environments.empty())
117                 return;
118         os << "\n\\layout ";
119         string s = active_environment();
120         if (s == "document" || s == "table")
121                 os << "Standard\n\n";
122         else if (s == "center")
123                 os << "Standard\n\n\\align center\n";
124         else if (s == "lyxcode")
125                 os << "LyX-Code\n\n";
126         else if (s == "lyxlist")
127                 os << "List\n\n";
128         else if (s == "thebibliography")
129                 os << "Bibliography\n\n";
130         else
131                 os << cap(s) << "\n\n";
132 }
133
134
135 struct isLayout {
136         isLayout(string const name) : name_(name) {}
137         bool operator()(LyXLayout_ptr const & ptr) {
138                 return ptr.get() && ptr->latexname() == name_;
139         }
140 private:
141         string const name_;
142 };
143
144
145 LyXLayout_ptr findLayout(LyXTextClass const & textclass,
146                          string const & name) 
147 {
148         LyXTextClass::const_iterator it  = textclass.begin();
149         LyXTextClass::const_iterator end = textclass.end();
150         it = std::find_if(it, end, isLayout(name));
151         return (it == end) ? LyXLayout_ptr() : *it;
152 }
153
154
155 void output_layout(ostream & os, LyXLayout_ptr const & layout_ptr,
156                   Parser & p, bool outer, LyXTextClass const & textclass)
157 {
158         string name = layout_ptr->name();
159         os << "\n\n\\layout " << name << "\n\n";
160         if (layout_ptr->optionalargs > 0) {
161                 string s; 
162                 if (p.next_token().character() == '[') {
163                         p.get_token(); // eat '['
164                         begin_inset(os, "OptArg\n");
165                         os << "collapsed true\n\n\\layout Standard\n\n";
166                         parse_text(p, os, FLAG_BRACK_LAST, outer, textclass);
167                         end_inset(os);
168                 }
169         }
170         parse_text(p, os, FLAG_ITEM, outer, textclass);
171         os << "\n\n\\layout Standard\n\n";
172 }
173
174 } // anonymous namespace
175
176
177 void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
178                 LyXTextClass const & textclass)
179 {
180         while (p.good()) {
181                 LyXLayout_ptr layout_ptr;
182                 Token const & t = p.get_token();
183
184 #ifdef FILEDEBUG
185                 cerr << "t: " << t << " flags: " << flags << "\n";
186 #endif
187
188                 if (flags & FLAG_ITEM) {
189                         if (t.cat() == catSpace)
190                                 continue;
191
192                         flags &= ~FLAG_ITEM;
193                         if (t.cat() == catBegin) {
194                                 // skip the brace and collect everything to the next matching
195                                 // closing brace
196                                 flags |= FLAG_BRACE_LAST;
197                                 continue;
198                         }
199
200                         // handle only this single token, leave the loop if done
201                         flags |= FLAG_LEAVE;
202                 }
203
204                 if (t.character() == ']' && (flags & FLAG_BRACK_LAST)) 
205                         return;
206
207                 //
208                 // cat codes
209                 //
210                 if (t.cat() == catMath) {
211                         // we are inside some text mode thingy, so opening new math is allowed
212                         begin_inset(os, "Formula ");
213                         Token const & n = p.get_token();
214                         if (n.cat() == catMath && outer) {
215                                 // TeX's $$...$$ syntax for displayed math
216                                 os << "\\[";
217                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
218                                 os << "\\]";
219                                 p.get_token(); // skip the second '$' token
220                         } else {
221                                 // simple $...$  stuff
222                                 p.putback();
223                                 os << '$';
224                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
225                                 os << '$';
226                         }
227                         end_inset(os);
228                 }
229
230                 else if (t.cat() == catSuper || t.cat() == catSub)
231                         cerr << "catcode " << t << " illegal in text mode\n";
232
233                 // Basic support for english quotes. This should be
234                 // extended to other quotes, but is not so easy (a
235                 // left english quote is the same as a right german
236                 // quote...)
237                 else if (t.asInput() == "`" 
238                          && p.next_token().asInput() == "`") {
239                         begin_inset(os, "Quotes ");
240                         os << "eld";
241                         end_inset(os);
242                         p.get_token();
243                         skip_braces(p);
244                 }       
245                 else if (t.asInput() == "'" 
246                          && p.next_token().asInput() == "'") {
247                         begin_inset(os, "Quotes ");
248                         os << "erd";
249                         end_inset(os);
250                         p.get_token();
251                         skip_braces(p);
252                 }       
253
254
255                 else if (t.cat() == catLetter ||
256                                t.cat() == catSpace ||
257                                t.cat() == catOther ||
258                                t.cat() == catAlign ||
259                                t.cat() == catParameter)
260                         os << t.character();
261
262                 else if (t.cat() == catNewline) {
263                         if (p.next_token().cat() == catNewline) {
264                                 p.get_token();
265                                 handle_par(os);
266                         } else {
267                                 os << " "; // note the space
268                         }
269                 }
270
271                 else if (t.cat() == catActive) {
272                         if (t.character() == '~') {
273                                 if (active_environment() == "lyxcode")
274                                         os << ' ';
275                                 else 
276                                         os << "\\InsetSpace ~\n";
277                         } else
278                                 os << t.character();
279                 }
280
281                 else if (t.cat() == catBegin) {
282                         // special handling of size changes
283                         bool const is_size = is_known(p.next_token().cs(), known_sizes);
284                         string const s = parse_text(p, FLAG_BRACE_LAST, outer, textclass);
285                         if (s.empty() && p.next_token().character() == '`')
286                                 ; // ignore it in  {}``
287                         else if (is_size || s == "[" || s == "]" || s == "*")
288                                 os << s;
289                         else {
290                                 handle_ert(os, "{");
291                                 os << s;
292                                 handle_ert(os, "}");
293                         }
294                 }
295
296                 else if (t.cat() == catEnd) {
297                         if (flags & FLAG_BRACE_LAST)
298                                 return;
299                         cerr << "stray '}' in text\n";
300                         handle_ert(os, "}");
301                 }
302
303                 else if (t.cat() == catOther)
304                         os << string(1, t.character());
305
306                 else if (t.cat() == catComment)
307                         handle_comment(p);
308
309                 //
310                 // control sequences
311                 //
312
313                 else if (t.cs() == "(") {
314                         begin_inset(os, "Formula");
315                         os << " \\(";
316                         parse_math(p, os, FLAG_SIMPLE2, MATH_MODE);
317                         os << "\\)";
318                         end_inset(os);
319                 }
320
321                 else if (t.cs() == "[") {
322                         begin_inset(os, "Formula");
323                         os << " \\[";
324                         parse_math(p, os, FLAG_EQUATION, MATH_MODE);
325                         os << "\\]";
326                         end_inset(os);
327                 }
328
329                 else if (t.cs() == "begin") {
330                         string const name = p.getArg('{', '}');
331                         const bool is_starred = suffixIs(name, '*');
332                         string const unstarred_name = rtrim(name, "*");
333                         active_environments.push_back(name);
334                         if (is_math_env(name)) {
335                                 begin_inset(os, "Formula ");
336                                 os << "\\begin{" << name << "}";
337                                 parse_math(p, os, FLAG_END, MATH_MODE);
338                                 os << "\\end{" << name << "}";
339                                 end_inset(os);
340                         } else if (name == "tabular") {
341                                 begin_inset(os, "Tabular ");
342                                 handle_tabular(p, os, textclass);
343                                 end_inset(os);
344                         } else if (textclass.floats().typeExist(unstarred_name)) {
345                                 begin_inset(os, "Float " + unstarred_name + "\n");
346                                 if (p.next_token().asInput() == "[") {
347                                         os << "placement " 
348                                            << p.getArg('[', ']') << '\n';
349                                 }
350                                 os << "wide " << tostr(is_starred)
351                                    << "\ncollapsed false\n\n"
352                                    << "\\layout Standard\n";
353                                 parse_text(p, os, FLAG_END, outer,
354                                            textclass);
355                                 end_inset(os);
356                         } else if (name == "center") {
357                                 handle_par(os);
358                                 parse_text(p, os, FLAG_END, outer,
359                                            textclass);
360                                 // The single '=' is meant here.
361                         } else if ((layout_ptr = findLayout(textclass, t.cs())).get() &&
362                                    layout_ptr->isEnvironment()) {
363                                 size_t const n = active_environments.size();
364                                 string const s = active_environments[n - 2];
365                                 bool const deeper = s == "enumerate" || s == "itemize"
366                                         || s == "lyxlist";
367                                 if (deeper)
368                                         os << "\n\\begin_deeper";
369                                 os << "\n\\layout " << layout_ptr->name() 
370                                    << "\n\n";
371                                 switch (layout_ptr->latextype) {
372                                 case  LATEX_LIST_ENVIRONMENT:
373                                         os << "\\labelwidthstring "
374                                            << p.verbatim_item() << '\n';
375                                         break;
376                                 case  LATEX_BIB_ENVIRONMENT:
377                                         p.verbatim_item(); // swallow next arg
378                                         break;
379                                 default:
380                                         break;
381                                 }
382                                 parse_text(p, os, FLAG_END, outer, textclass);
383                                 if (deeper)
384                                         os << "\n\\end_deeper\n";
385                                 handle_par(os);
386                         } else {
387                                 handle_par(os);
388                                 parse_text(p, os, FLAG_END, outer, textclass);
389                         }
390                 }
391
392                 else if (t.cs() == "end") {
393                         if (flags & FLAG_END) {
394                                 // eat environment name
395                                 string const name = p.getArg('{', '}');
396                                 if (name != active_environment())
397                                         cerr << "\\end{" + name + "} does not match \\begin{"
398                                                 + active_environment() + "}\n";
399                                 active_environments.pop_back();
400                                 handle_par(os);
401                                 return;
402                         }
403                         p.error("found 'end' unexpectedly");
404                 }
405
406                 else if (t.cs() == "item") {
407                         // should be done automatically by Parser::tokenize
408                         //p.skip_spaces();
409                         string s; 
410                         if (p.next_token().character() == '[') {
411                                 p.get_token(); // eat '['
412                                 s = parse_text(p, FLAG_BRACK_LAST, outer, textclass);
413                         }
414                         handle_par(os);
415                         if (s.size())
416                                 os << s << ' ';
417                 }
418
419                 else if (t.cs() == "def") {
420                         string name = p.get_token().cs();
421                         while (p.next_token().cat() != catBegin)
422                                 name += p.get_token().asString();
423                         handle_ert(os, "\\def\\" + name + '{' + p.verbatim_item() + '}');
424                 }
425
426                 else if (t.cs() == "par") {
427                         p.skip_spaces();
428                         if (p.next_token().cs() != "\\begin")
429                                 handle_par(os);
430                         //cerr << "next token: '" << p.next_token().cs() << "'\n";
431                 }
432
433                 // Must attempt to parse "Section*" before "Section".
434                 else if ((p.next_token().asInput() == "*") &&
435                          // The single '=' is meant here.
436                          (layout_ptr = findLayout(textclass,
437                                                   t.cs() + '*')).get() &&
438                          layout_ptr->isCommand()) {
439                         p.get_token();
440                         output_layout(os, layout_ptr, p, outer, textclass);
441                 }
442
443                 // The single '=' is meant here.
444                 else if ((layout_ptr = findLayout(textclass, t.cs())).get() &&
445                          layout_ptr->isCommand()) {
446                         output_layout(os, layout_ptr, p, outer, textclass);
447                 }
448
449                 else if (t.cs() == "includegraphics") {
450                         map<string, string> opts = split_map(p.getArg('[', ']'));
451                         string name = p.verbatim_item();
452                         begin_inset(os, "Graphics ");
453                         os << "\n\tfilename " << name << '\n';
454                         if (opts.find("width") != opts.end())
455                                 os << "\twidth " << opts["width"] << '\n';
456                         if (opts.find("height") != opts.end())
457                                 os << "\theight " << opts["height"] << '\n';
458                         end_inset(os);
459                 }
460                 
461                 else if (t.cs() == "footnote") {
462                         begin_inset(os, "Foot\n");
463                         os << "collapsed true\n\n\\layout Standard\n\n";
464                         parse_text(p, os, FLAG_ITEM, false, textclass);
465                         end_inset(os);
466                 }
467
468                 else if (t.cs() == "ensuremath") {
469                         string s = parse_text(p, FLAG_ITEM, false, textclass);
470                         if (s == "±" || s == "³" || s == "²" || s == "µ")
471                                 os << s;
472                         else
473                                 handle_ert(os, "\\ensuremath{" + s + "}");
474                 }
475
476                 else if (t.cs() == "marginpar") {
477                         begin_inset(os, "Marginal\n");
478                         os << "collapsed true\n\n\\layout Standard\n\n";
479                         parse_text(p, os, FLAG_ITEM, false, textclass);
480                         end_inset(os);
481                 }
482
483                 else if (t.cs() == "hfill") {
484                         os << "\n\\hfill\n";
485                         skip_braces(p);
486                 }
487
488                 else if (t.cs() == "makeindex" || t.cs() == "maketitle")
489                         skip_braces(p); // swallow this
490
491                 else if (t.cs() == "tableofcontents") {
492                         begin_inset(os, "LatexCommand ");
493                         os << '\\' << t.cs() << "{}\n";
494                         end_inset(os);
495                         skip_braces(p); // swallow this
496                 }
497
498
499                 else if (t.cs() == "textrm") {
500                         os << "\n\\family roman \n";
501                         parse_text(p, os, FLAG_ITEM, outer, textclass);
502                         os << "\n\\family default \n";
503                 }
504
505                 else if (t.cs() == "textsf") {
506                         os << "\n\\family sans \n";
507                         parse_text(p, os, FLAG_ITEM, outer, textclass);
508                         os << "\n\\family default \n";
509                 }
510
511                 else if (t.cs() == "texttt") {
512                         os << "\n\\family typewriter \n";
513                         parse_text(p, os, FLAG_ITEM, outer, textclass);
514                         os << "\n\\family default \n";
515                 }
516
517                 else if (t.cs() == "textit") {
518                         os << "\n\\shape italic \n";
519                         parse_text(p, os, FLAG_ITEM, outer, textclass);
520                         os << "\n\\shape default \n";
521                 }
522
523                 else if (t.cs() == "textsc") {
524                         os << "\n\\noun on \n";
525                         parse_text(p, os, FLAG_ITEM, outer, textclass);
526                         os << "\n\\noun default \n";
527                 }
528
529                 else if (t.cs() == "textbf") {
530                         os << "\n\\series bold \n";
531                         parse_text(p, os, FLAG_ITEM, outer, textclass);
532                         os << "\n\\series default \n";
533                 }
534
535                 else if (t.cs() == "underbar") {
536                         os << "\n\\bar under \n";
537                         parse_text(p, os, FLAG_ITEM, outer, textclass);
538                         os << "\n\\bar default \n";
539                 }
540
541                 else if (t.cs() == "emph" || t.cs() == "noun") {
542                         os << "\n\\" << t.cs() << " on \n";
543                         parse_text(p, os, FLAG_ITEM, outer, textclass);
544                         os << "\n\\" << t.cs() << " default \n";
545                 }
546
547                 else if (t.cs() == "bibitem") {
548                         os << "\n\\layout Bibliography\n\\bibitem ";
549                         os << p.getOpt();
550                         os << '{' << p.verbatim_item() << '}' << "\n";
551                 }
552
553                 else if (is_known(t.cs(), known_latex_commands)) {
554                         begin_inset(os, "LatexCommand ");
555                         os << '\\' << t.cs();
556                         os << p.getOpt();
557                         os << p.getOpt();
558                         os << '{' << p.verbatim_item() << "}\n";
559                         end_inset(os);
560                 }
561
562                 else if (is_known(t.cs(), known_quotes)) {
563                   char const ** where = is_known(t.cs(), known_quotes);
564                         begin_inset(os, "Quotes ");
565                         os << known_coded_quotes[where - known_quotes];
566                         end_inset(os);
567                         skip_braces(p);
568                 }
569
570                 else if (is_known(t.cs(), known_sizes)) {
571                   char const ** where = is_known(t.cs(), known_sizes);
572                         os << "\n\\size " << known_coded_sizes[where - known_sizes] << "\n";
573                 }
574
575                 else if (t.cs() == "LyX" || t.cs() == "TeX" 
576                          || t.cs() == "LaTeX") {
577                         os << t.cs();
578                         skip_braces(p); // eat {}
579                 }
580
581                 else if (t.cs() == "LaTeXe") {
582                         os << "LaTeX2e";
583                         skip_braces(p); // eat {}
584                 }
585
586                 else if (t.cs() == "ldots") {
587                         skip_braces(p);
588                         os << "\\SpecialChar \\ldots{}\n";
589                 }
590
591                 else if (t.cs() == "lyxarrow") {
592                         os << "\\SpecialChar \\menuseparator\n";
593                         skip_braces(p);
594                 }
595
596                 else if (t.cs() == "ldots") {
597                         os << "\\SpecialChar \\ldots{}\n";
598                         skip_braces(p);
599                 }
600
601                 else if (t.cs() == "@" && p.next_token().asInput() == ".") {
602                         os << "\\SpecialChar \\@.\n";
603                         p.get_token();
604                 }
605
606                 else if (t.cs() == "-")
607                         os << "\\SpecialChar \\-\n";
608
609                 else if (t.cs() == "textasciitilde") {
610                         os << '~';
611                         skip_braces(p);
612                 }
613
614                 else if (t.cs() == "textasciicircum") {
615                         os << '^';
616                         skip_braces(p);
617                 }
618
619                 else if (t.cs() == "textbackslash") {
620                         os << "\n\\backslash \n";
621                         skip_braces(p);
622                 }
623
624                 else if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#" 
625                             || t.cs() == "$" || t.cs() == "{" || t.cs() == "}" 
626                             || t.cs() == "%")
627                         os << t.cs();
628
629                 else if (t.cs() == "char") {
630                         if (p.next_token().character() == '`') {
631                                 p.get_token();
632                                 if (p.next_token().cs() == "\"") {
633                                         p.get_token();
634                                         os << '"';
635                                         skip_braces(p);
636                                 } else {
637                                         handle_ert(os, "\\char`");
638                                 }
639                         } else {
640                                 handle_ert(os, "\\char");
641                         }
642                 }
643
644                 else if (t.cs() == "\"") {
645                         string const name = p.verbatim_item();
646                              if (name == "a") os << 'ä';
647                         else if (name == "o") os << 'ö';
648                         else if (name == "u") os << 'ü';
649                         else if (name == "A") os << 'Ä';
650                         else if (name == "O") os << 'Ö';
651                         else if (name == "U") os << 'Ü';
652                         else handle_ert(os, "\"{" + name + "}");
653                 }
654
655                 else if (t.cs() == "=" || t.cs() == "H" || t.cs() == "c"
656                       || t.cs() == "^" || t.cs() == "'" || t.cs() == "~") {
657                         // we need the trim as the LyX parser chokes on such spaces
658                         os << "\n\\i \\" << t.cs() << "{"
659                            << trim(parse_text(p, FLAG_ITEM, outer, textclass), " ") << "}\n";
660                 }
661
662                 else if (t.cs() == "ss")
663                         os << "ß";
664
665                 else if (t.cs() == "i" || t.cs() == "j")
666                         os << "\\" << t.cs() << ' ';
667
668                 else if (t.cs() == "\\")
669                         os << "\n\\newline \n";
670         
671                 else if (t.cs() == "input")
672                         handle_ert(os, "\\input{" + p.verbatim_item() + "}\n");
673
674                 else if (t.cs() == "fancyhead") {
675                         ostringstream ss;
676                         ss << "\\fancyhead";
677                         ss << p.getOpt();
678                         ss << '{' << p.verbatim_item() << "}\n";
679                         handle_ert(os, ss.str());
680                 }
681
682                 else {
683                         //cerr << "#: " << t << " mode: " << mode << endl;
684                         // heuristic: read up to next non-nested space
685                         /*
686                         string s = t.asInput();
687                         string z = p.verbatim_item();
688                         while (p.good() && z != " " && z.size()) {
689                                 //cerr << "read: " << z << endl;
690                                 s += z;
691                                 z = p.verbatim_item();
692                         }
693                         cerr << "found ERT: " << s << endl;
694                         handle_ert(os, s + ' ');
695                         */
696                         handle_ert(os, t.asInput() + ' ');
697                 }
698
699                 if (flags & FLAG_LEAVE) {
700                         flags &= ~FLAG_LEAVE;
701                         break;
702                 }
703         }
704 }
705
706
707 string parse_text(Parser & p, unsigned flags, const bool outer,
708                   LyXTextClass const & textclass)
709 {
710         ostringstream os;
711         parse_text(p, os, flags, outer, textclass);
712         return os.str();
713 }
714
715
716 // }])