]> git.lyx.org Git - lyx.git/blob - src/tex2lyx/text.C
"Inter-word Space"
[lyx.git] / src / tex2lyx / text.C
1 /** The .tex to .lyx converter
2     \author André Pönitz (2003)
3  */
4
5 // {[(
6
7 #include <config.h>
8
9 #include "Lsstream.h"
10 #include "tex2lyx.h"
11
12 #include <iostream>
13 #include <map>
14 #include <vector>
15
16 using std::cerr;
17 using std::endl;
18 using std::map;
19 using std::ostream;
20 using std::ostringstream;
21 using std::vector;
22
23
24 namespace {
25
26 char const * known_headings[] = { "caption", "title", "author", "date",
27 "paragraph", "chapter", "section", "subsection", "subsubsection", 0 };
28
29 char const * known_latex_commands[] = { "ref", "cite", "label", "index",
30 "printindex", "pageref", "url", 0 };
31
32 // LaTeX names for quotes
33 char const * known_quotes[] = { "glqq", "grqq", "quotedblbase",
34 "textquotedblleft", "quotesinglbase", "guilsinglleft", "guilsinglright", 0};
35
36 // the same as known_quotes with .lyx names
37 char const * known_coded_quotes[] = { "gld", "grd", "gld",
38 "grd", "gls", "fls", "frd", 0};
39
40 char const * known_sizes[] = { "tiny", "scriptsize", "footnotesize",
41 "small", "normalsize", "large", "Large", "LARGE", "huge", "Huge", 0};
42
43 char const * known_coded_sizes[] = { "tiny", "scriptsize", "footnotesize",
44 "small", "normal", "large", "larger", "largest",  "huge", "giant", 0};
45
46
47 string cap(string s)
48 {
49         if (s.size())
50                 s[0] = toupper(s[0]);
51         return s;
52 }
53
54
55 // splits "x=z, y=b" into a map
56 map<string, string> split_map(string const & s)
57 {
58         map<string, string> res;
59         vector<string> v;
60         split(s, v);
61         for (size_t i = 0; i < v.size(); ++i) {
62                 size_t const pos   = v[i].find('=');
63                 string const index = v[i].substr(0, pos);
64                 string const value = v[i].substr(pos + 1, string::npos);
65                 res[trim(index)] = trim(value);
66         }
67         return res;
68 }
69
70
71 void begin_inset(ostream & os, string const & name)
72 {
73         os << "\n\\begin_inset " << name;
74 }
75
76
77 void end_inset(ostream & os)
78 {
79         os << "\n\\end_inset\n\n";
80 }
81
82
83 void skip_braces(Parser & p)
84 {
85         if (p.next_token().cat() != catBegin)
86                 return;
87         p.get_token();
88         if (p.next_token().cat() == catEnd) {
89                 p.get_token();
90                 return;
91         }
92         p.putback();
93 }
94
95
96 void handle_ert(ostream & os, string const & s)
97 {
98         begin_inset(os, "ERT");
99         os << "\nstatus Collapsed\n\n\\layout Standard\n\n";
100         for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
101                 if (*it == '\\')
102                         os << "\n\\backslash\n";
103                 else
104                         os << *it;
105         }
106         end_inset(os);
107 }
108
109
110 void handle_par(ostream & os)
111 {
112         if (active_environments.empty())
113                 return;
114         os << "\n\\layout ";
115         string s = active_environment();
116         if (s == "document" || s == "table")
117                 os << "Standard\n\n";
118         else if (s == "center")
119                 os << "Standard\n\n\\align center\n";
120         else if (s == "lyxcode")
121                 os << "LyX-Code\n\n";
122         else if (s == "lyxlist")
123                 os << "List\n\n";
124         else if (s == "thebibliography")
125                 os << "Bibliography\n\n";
126         else
127                 os << cap(s) << "\n\n";
128 }
129
130
131 } // anonymous namespace
132
133
134 void parse_text(Parser & p, ostream & os, unsigned flags, bool outer)
135 {
136         while (p.good()) {
137                 Token const & t = p.get_token();
138
139 #ifdef FILEDEBUG
140                 cerr << "t: " << t << " flags: " << flags << "\n";
141 #endif
142
143                 if (flags & FLAG_ITEM) {
144                         if (t.cat() == catSpace)
145                                 continue;
146
147                         flags &= ~FLAG_ITEM;
148                         if (t.cat() == catBegin) {
149                                 // skip the brace and collect everything to the next matching
150                                 // closing brace
151                                 flags |= FLAG_BRACE_LAST;
152                                 continue;
153                         }
154
155                         // handle only this single token, leave the loop if done
156                         flags |= FLAG_LEAVE;
157                 }
158
159                 if (t.character() == ']' && (flags & FLAG_BRACK_LAST)) 
160                         return;
161
162                 //
163                 // cat codes
164                 //
165                 if (t.cat() == catMath) {
166                         // we are inside some text mode thingy, so opening new math is allowed
167                         begin_inset(os, "Formula ");
168                         Token const & n = p.get_token();
169                         if (n.cat() == catMath && outer) {
170                                 // TeX's $$...$$ syntax for displayed math
171                                 os << "\\[";
172                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
173                                 os << "\\]";
174                                 p.get_token(); // skip the second '$' token
175                         } else {
176                                 // simple $...$  stuff
177                                 p.putback();
178                                 os << '$';
179                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
180                                 os << '$';
181                         }
182                         end_inset(os);
183                 }
184
185                 else if (t.cat() == catSuper || t.cat() == catSub)
186                         cerr << "catcode " << t << " illegal in text mode\n";
187
188                 else if (t.cat() == catLetter ||
189                                t.cat() == catSpace ||
190                                t.cat() == catOther ||
191                                t.cat() == catAlign ||
192                                t.cat() == catParameter)
193                         os << t.character();
194
195                 else if (t.cat() == catNewline) {
196                         if (p.next_token().cat() == catNewline) {
197                                 p.get_token();
198                                 handle_par(os);
199                         } else {
200                                 os << " "; // note the space
201                         }
202                 }
203
204                 else if (t.cat() == catActive) {
205                         if (t.character() == '~') {
206                                 if (active_environment() == "lyxcode")
207                                         os << ' ';
208                                 else
209                                         os << "\\SpecialChar ~\n";
210                         } else
211                                 os << t.character();
212                 }
213
214                 else if (t.cat() == catBegin) {
215                         // special handling of size changes
216                         bool const is_size = is_known(p.next_token().cs(), known_sizes);
217                         string const s = parse_text(p, FLAG_BRACE_LAST, outer);
218                         if (s.empty() && p.next_token().character() == '`')
219                                 ; // ignore it in  {}``
220                         else if (is_size || s == "[" || s == "]" || s == "*")
221                                 os << s;
222                         else {
223                                 handle_ert(os, "{");
224                                 os << s;
225                                 handle_ert(os, "}");
226                         }
227                 }
228
229                 else if (t.cat() == catEnd) {
230                         if (flags & FLAG_BRACE_LAST)
231                                 return;
232                         cerr << "stray '}' in text\n";
233                         handle_ert(os, "}");
234                 }
235
236                 else if (t.cat() == catOther)
237                         os << string(1, t.character());
238
239                 else if (t.cat() == catComment)
240                         handle_comment(p);
241
242                 //
243                 // control sequences
244                 //
245
246                 else if (t.cs() == "ldots") {
247                         skip_braces(p);
248                         os << "\n\\SpecialChar \\ldots{}\n";
249                 }
250
251                 else if (t.cs() == "(") {
252                         begin_inset(os, "Formula");
253                         os << " \\(";
254                         parse_math(p, os, FLAG_SIMPLE2, MATH_MODE);
255                         os << "\\)";
256                         end_inset(os);
257                 }
258
259                 else if (t.cs() == "[") {
260                         begin_inset(os, "Formula");
261                         os << " \\[";
262                         parse_math(p, os, FLAG_EQUATION, MATH_MODE);
263                         os << "\\]";
264                         end_inset(os);
265                 }
266
267                 else if (t.cs() == "begin") {
268                         string const name = p.getArg('{', '}');
269                         active_environments.push_back(name);
270                         if (is_math_env(name)) {
271                                 begin_inset(os, "Formula ");
272                                 os << "\\begin{" << name << "}";
273                                 parse_math(p, os, FLAG_END, MATH_MODE);
274                                 os << "\\end{" << name << "}";
275                                 end_inset(os);
276                         } else if (name == "tabular") {
277                                 begin_inset(os, "Tabular ");
278                                 handle_tabular(p, os);
279                                 end_inset(os);
280                         } else if (name == "table" || name == "figure") {
281                                 string opts = p.getOpt();
282                                 begin_inset(os, "Float " + name + "\n");
283                                 if (opts.size())
284                                         os << "placement " << opts << '\n';
285                                 os << "wide false\ncollapsed false\n\n"
286                                          << "\\layout Standard\n";
287                                 parse_text(p, os, FLAG_END, outer);
288                                 end_inset(os);
289                         } else if (name == "center") {
290                                 handle_par(os);
291                                 parse_text(p, os, FLAG_END, outer);
292                         } else if (name == "enumerate" || name == "itemize"
293                                         || name == "lyxlist") {
294                                 size_t const n = active_environments.size();
295                                 string const s = active_environments[n - 2];
296                                 bool const deeper = s == "enumerate" || s == "itemize"
297                                         || s == "lyxlist";
298                                 if (deeper)
299                                         os << "\n\\begin_deeper";
300                                 os << "\n\\layout " << cap(name) << "\n\n";
301                                 if (name == "lyxlist")
302                                         p.verbatim_item(); // swallow next arg
303                                 parse_text(p, os, FLAG_END, outer);
304                                 if (deeper)
305                                         os << "\n\\end_deeper\n";
306                                 handle_par(os);
307                         } else if (name == "thebibliography") {
308                                 p.verbatim_item(); // swallow next arg
309                                 parse_text(p, os, FLAG_END, outer);
310                                 os << "\n\\layout Bibliography\n\n";
311                         } else {
312                                 handle_par(os);
313                                 parse_text(p, os, FLAG_END, outer);
314                         }
315                 }
316
317                 else if (t.cs() == "end") {
318                         if (flags & FLAG_END) {
319                                 // eat environment name
320                                 string const name = p.getArg('{', '}');
321                                 if (name != active_environment())
322                                         cerr << "\\end{" + name + "} does not match \\begin{"
323                                                 + active_environment() + "}\n";
324                                 active_environments.pop_back();
325                                 handle_par(os);
326                                 return;
327                         }
328                         p.error("found 'end' unexpectedly");
329                 }
330
331                 else if (t.cs() == "item") {
332                         p.skip_spaces();
333                         string s; 
334                         if (p.next_token().character() == '[') {
335                                 p.get_token(); // eat '['
336                                 s = parse_text(p, FLAG_BRACK_LAST, outer);
337                         }
338                         handle_par(os);
339                         os << s << ' ';
340                 }
341
342                 else if (t.cs() == "def") {
343                         string name = p.get_token().cs();
344                         while (p.next_token().cat() != catBegin)
345                                 name += p.get_token().asString();
346                         handle_ert(os, "\\def\\" + name + '{' + p.verbatim_item() + '}');
347                 }
348
349                 else if (t.cs() == "par") {
350                         p.skip_spaces();
351                         if (p.next_token().cs() != "\\begin")
352                                 handle_par(os);
353                         //cerr << "next token: '" << p.next_token().cs() << "'\n";
354                 }
355
356                 else if (is_known(t.cs(), known_headings)) {
357                         string name = t.cs();
358                         if (p.next_token().asInput() == "*") {
359                                 p.get_token();
360                                 name += "*";
361                         }
362                         os << "\n\n\\layout " << cap(name) << "\n\n";
363                         string opt = p.getOpt();
364                         if (opt.size()) {
365                                 begin_inset(os, "OptArg\n");
366                                 os << "collapsed true\n\n\\layout Standard\n\n" << opt;
367                                 end_inset(os);
368                         }
369                         parse_text(p, os, FLAG_ITEM, outer);
370                         os << "\n\n\\layout Standard\n\n";
371                 }
372
373                 else if (t.cs() == "includegraphics") {
374                         map<string, string> opts = split_map(p.getArg('[', ']'));
375                         string name = p.verbatim_item();
376                         begin_inset(os, "Graphics ");
377                         os << "\n\tfilename " << name << '\n';
378                         if (opts.find("width") != opts.end())
379                                 os << "\twidth " << opts["width"] << '\n';
380                         if (opts.find("height") != opts.end())
381                                 os << "\theight " << opts["height"] << '\n';
382                         end_inset(os);
383                 }
384                 
385                 else if (t.cs() == "footnote") {
386                         begin_inset(os, "Foot\n");
387                         os << "collapsed true\n\n\\layout Standard\n\n";
388                         parse_text(p, os, FLAG_ITEM, false);
389                         end_inset(os);
390                 }
391
392                 else if (t.cs() == "ensuremath") {
393                         string s = parse_text(p, FLAG_ITEM, false);
394                         if (s == "±" || s == "³" || s == "²" || s == "µ")
395                                 os << s;
396                         else
397                                 handle_ert(os, "\\ensuremath{" + s + "}");
398                 }
399
400                 else if (t.cs() == "marginpar") {
401                         begin_inset(os, "Marginal\n");
402                         os << "collapsed true\n\n\\layout Standard\n\n";
403                         parse_text(p, os, FLAG_ITEM, false);
404                         end_inset(os);
405                 }
406
407                 else if (t.cs() == "hfill") {
408                         os << "\n\\hfill\n";
409                         skip_braces(p);
410                 }
411
412                 else if (t.cs() == "makeindex" || t.cs() == "maketitle")
413                         skip_braces(p); // swallow this
414
415                 else if (t.cs() == "tableofcontents")
416                         skip_braces(p); // swallow this
417
418                 else if (t.cs() == "textrm") {
419                         os << "\n\\family roman \n";
420                         parse_text(p, os, FLAG_ITEM, outer);
421                         os << "\n\\family default \n";
422                 }
423
424                 else if (t.cs() == "textsf") {
425                         os << "\n\\family sans \n";
426                         parse_text(p, os, FLAG_ITEM, outer);
427                         os << "\n\\family default \n";
428                 }
429
430                 else if (t.cs() == "texttt") {
431                         os << "\n\\family typewriter \n";
432                         parse_text(p, os, FLAG_ITEM, outer);
433                         os << "\n\\family default \n";
434                 }
435
436                 else if (t.cs() == "textit") {
437                         os << "\n\\shape italic \n";
438                         parse_text(p, os, FLAG_ITEM, outer);
439                         os << "\n\\shape default \n";
440                 }
441
442                 else if (t.cs() == "textsc") {
443                         os << "\n\\noun on \n";
444                         parse_text(p, os, FLAG_ITEM, outer);
445                         os << "\n\\noun default \n";
446                 }
447
448                 else if (t.cs() == "textbf") {
449                         os << "\n\\series bold \n";
450                         parse_text(p, os, FLAG_ITEM, outer);
451                         os << "\n\\series default \n";
452                 }
453
454                 else if (t.cs() == "underbar") {
455                         os << "\n\\bar under \n";
456                         parse_text(p, os, FLAG_ITEM, outer);
457                         os << "\n\\bar default \n";
458                 }
459
460                 else if (t.cs() == "emph" || t.cs() == "noun") {
461                         os << "\n\\" << t.cs() << " on \n";
462                         parse_text(p, os, FLAG_ITEM, outer);
463                         os << "\n\\" << t.cs() << " default \n";
464                 }
465
466                 else if (t.cs() == "bibitem") {
467                         os << "\n\\layout Bibliography\n\\bibitem ";
468                         os << p.getOpt();
469                         os << '{' << p.verbatim_item() << '}' << "\n\n";
470                 }
471
472                 else if (is_known(t.cs(), known_latex_commands)) {
473                         begin_inset(os, "LatexCommand ");
474                         os << '\\' << t.cs();
475                         os << p.getOpt();
476                         os << p.getOpt();
477                         os << '{' << p.verbatim_item() << '}';
478                         end_inset(os);
479                 }
480
481                 else if (is_known(t.cs(), known_quotes)) {
482                   char const ** where = is_known(t.cs(), known_quotes);
483                         begin_inset(os, "Quotes ");
484                         os << known_coded_quotes[where - known_quotes];
485                         end_inset(os);
486                         skip_braces(p);
487                 }
488
489                 else if (is_known(t.cs(), known_sizes)) {
490                   char const ** where = is_known(t.cs(), known_sizes);
491                         os << "\n\\size " << known_coded_sizes[where - known_sizes] << "\n";
492                 }
493
494                 else if (t.cs() == "LyX" || t.cs() == "TeX"
495                       || t.cs() == "LaTeX" || t.cs() == "LaTeXe") {
496                         os << t.cs();
497                         skip_braces(p); // eat {}
498                 }
499
500                 else if (t.cs() == "lyxarrow") {
501                         os << "\\SpecialChar \\menuseparator\n";
502                         skip_braces(p);
503                 }
504
505                 else if (t.cs() == "ldots") {
506                         os << "\\SpecialChar \\ldots{}\n";
507                         skip_braces(p);
508                 }
509
510                 else if (t.cs() == "@") {
511                         os << "\\SpecialChar \\@";
512                         skip_braces(p);
513                 }
514
515                 else if (t.cs() == "textasciitilde") {
516                         os << '~';
517                         skip_braces(p);
518                 }
519
520                 else if (t.cs() == "textasciicircum") {
521                         os << '^';
522                         skip_braces(p);
523                 }
524
525                 else if (t.cs() == "textbackslash") {
526                         os << "\n\\backslash\n";
527                         skip_braces(p);
528                 }
529
530                 else if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#" || t.cs() == "$"
531                             || t.cs() == "{" || t.cs() == "}" || t.cs() == "%")
532                         os << t.cs();
533
534                 else if (t.cs() == "char") {
535                         if (p.next_token().character() == '`') {
536                                 p.get_token();
537                                 if (p.next_token().cs() == "\"") {
538                                         p.get_token();
539                                         os << '"';
540                                         skip_braces(p);
541                                 } else {
542                                         handle_ert(os, "\\char`");
543                                 }
544                         } else {
545                                 handle_ert(os, "\\char");
546                         }
547                 }
548
549                 else if (t.cs() == "\"") {
550                         string const name = p.verbatim_item();
551                              if (name == "a") os << 'ä';
552                         else if (name == "o") os << 'ö';
553                         else if (name == "u") os << 'ü';
554                         else if (name == "A") os << 'Ä';
555                         else if (name == "O") os << 'Ö';
556                         else if (name == "U") os << 'Ü';
557                         else handle_ert(os, "\"{" + name + "}");
558                 }
559
560                 else if (t.cs() == "=" || t.cs() == "H" || t.cs() == "c"
561                       || t.cs() == "^" || t.cs() == "'" || t.cs() == "~") {
562                         // we need the trim as the LyX parser chokes on such spaces
563                         os << "\n\\i \\" << t.cs() << "{"
564                            << trim(parse_text(p, FLAG_ITEM, outer), " ") << "}\n";
565                 }
566
567                 else if (t.cs() == "ss")
568                         os << "ß";
569
570                 else if (t.cs() == "i" || t.cs() == "j")
571                         os << "\\" << t.cs() << ' ';
572
573                 else if (t.cs() == "-")
574                         os << "\\SpecialChar \\-\n";
575
576                 else if (t.cs() == "\\")
577                         os << "\n\\newline\n";
578         
579                 else if (t.cs() == "lyxrightaddress") {
580                         os << "\n\\layout Right Address\n";
581                         parse_text(p, os, FLAG_ITEM, outer);
582                         os << "\n\\layout Standard\n";
583                 }
584
585                 else if (t.cs() == "lyxaddress") {
586                         os << "\n\\layout Address\n";
587                         parse_text(p, os, FLAG_ITEM, outer);
588                         os << "\n\\layout Standard\n";
589                 }
590
591                 else if (t.cs() == "input")
592                         handle_ert(os, "\\input{" + p.verbatim_item() + "}\n");
593
594                 else if (t.cs() == "fancyhead") {
595                         ostringstream ss;
596                         ss << "\\fancyhead";
597                         ss << p.getOpt();
598                         ss << '{' << p.verbatim_item() << "}\n";
599                         handle_ert(os, STRCONV(ss.str()));
600                 }
601
602                 else {
603                         //cerr << "#: " << t << " mode: " << mode << endl;
604                         // heuristic: read up to next non-nested space
605                         /*
606                         string s = t.asInput();
607                         string z = p.verbatim_item();
608                         while (p.good() && z != " " && z.size()) {
609                                 //cerr << "read: " << z << endl;
610                                 s += z;
611                                 z = p.verbatim_item();
612                         }
613                         cerr << "found ERT: " << s << endl;
614                         handle_ert(os, s + ' ');
615                         */
616                         handle_ert(os, t.asInput() + ' ');
617                 }
618
619                 if (flags & FLAG_LEAVE) {
620                         flags &= ~FLAG_LEAVE;
621                         break;
622                 }
623         }
624 }
625
626
627 string parse_text(Parser & p, unsigned flags, const bool outer)
628 {
629         ostringstream os;
630         parse_text(p, os, flags, outer);
631         return STRCONV(os.str());
632 }
633
634
635 // }])