]> git.lyx.org Git - lyx.git/blob - src/tex2lyx/preamble.cpp
preamble.cpp: full babel language support for tex2lyx
[lyx.git] / src / tex2lyx / preamble.cpp
1 /**
2  * \file preamble.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author André Pönitz
7  * \author Uwe Stöhr
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 // {[(
13
14 #include <config.h>
15
16 #include "tex2lyx.h"
17
18 #include "Layout.h"
19 #include "Lexer.h"
20 #include "TextClass.h"
21 #include "support/convert.h"
22 #include "support/filetools.h"
23 #include "support/lstrings.h"
24
25 #include <algorithm>
26 #include <iostream>
27 #include <sstream>
28 #include <string>
29 #include <vector>
30 #include <map>
31
32
33 namespace lyx {
34
35 using std::istringstream;
36 using std::ostream;
37 using std::ostringstream;
38 using std::string;
39 using std::vector;
40 using std::cerr;
41 using std::endl;
42 using std::find;
43
44 using support::FileName;
45 using support::libFileSearch;
46 using support::isStrDbl;
47
48 // special columntypes
49 extern std::map<char, int> special_columns;
50
51 std::map<string, vector<string> > used_packages;
52
53 // needed to handle encodings with babel
54 bool one_language = true;
55
56 // to avoid that the babel options overwrite the documentclass options
57 bool documentclass_language;
58
59 namespace {
60
61 const char * const known_languages[] = { "afrikaans", "american", "arabic",
62 "austrian", "bahasa", "basque", "belarusian", "brazil", "breton", "british",
63 "bulgarian", "canadian", "canadien", "catalan", "croatian", "czech", "danish",
64 "dutch", "english", "esperanto", "estonian", "finnish", "francais", "french",
65 "frenchb", "frenchle", "frenchpro", "galician", "german", "germanb", "greek",
66 "hebrew", "icelandic", "irish", "italian", "lsorbian", "magyar", "naustrian",
67 "ngerman", "ngermanb", "norsk", "nynorsk", "polish", "portuges", "romanian",
68 "russian", "russianb", "scottish", "serbian", "slovak", "slovene", "spanish",
69 "swedish", "thai", "turkish", "ukraineb", "ukrainian", "usorbian", "welsh", 0};
70
71 //note this when updating to lyxformat 305:
72 //bahasai, indonesian, and indon = equal to bahasa
73 //malay, and meyalu = equal to bahasam
74
75 const char * const known_french_languages[] = {"french", "frenchb", "francais",
76                                                 "frenchle", "frenchpro", 0};
77 const char * const known_german_languages[] = {"german", "germanb", 0};
78 const char * const known_ngerman_languages[] = {"ngerman", "ngermanb", 0};
79 const char * const known_russian_languages[] = {"russian", "russianb", 0};
80 const char * const known_ukrainian_languages[] = {"ukrainian", "ukraineb", 0};
81
82 char const * const known_fontsizes[] = { "10pt", "11pt", "12pt", 0 };
83
84 const char * const known_roman_fonts[] = { "ae", "bookman", "charter",
85 "cmr", "fourier", "lmodern", "mathpazo", "mathptmx", "newcent", 0};
86
87 const char * const known_sans_fonts[] = { "avant", "berasans", "cmbr", "cmss",
88 "helvet", "lmss", 0};
89
90 const char * const known_typewriter_fonts[] = { "beramono", "cmtl", "cmtt",
91 "courier", "lmtt", "luximono", "fourier", "lmodern", "mathpazo", "mathptmx",
92 "newcent", 0};
93
94 // some ugly stuff
95 ostringstream h_preamble;
96 string h_textclass               = "article";
97 string h_options                 = string();
98 string h_language                = "english";
99 string h_inputencoding           = "auto";
100 string h_font_roman              = "default";
101 string h_font_sans               = "default";
102 string h_font_typewriter         = "default";
103 string h_font_default_family     = "default";
104 string h_font_sc                 = "false";
105 string h_font_osf                = "false";
106 string h_font_sf_scale           = "100";
107 string h_font_tt_scale           = "100";
108 string h_graphics                = "default";
109 string h_paperfontsize           = "default";
110 string h_spacing                 = "single";
111 string h_papersize               = "default";
112 string h_use_geometry            = "false";
113 string h_use_amsmath             = "0";
114 string h_cite_engine             = "basic";
115 string h_use_bibtopic            = "false";
116 string h_paperorientation        = "portrait";
117 string h_secnumdepth             = "3";
118 string h_tocdepth                = "3";
119 string h_paragraph_separation    = "indent";
120 string h_defskip                 = "medskip";
121 string h_quotes_language         = "english";
122 string h_papercolumns            = "1";
123 string h_papersides              = string();
124 string h_paperpagestyle          = "default";
125 string h_tracking_changes        = "false";
126 string h_output_changes          = "false";
127
128
129 void handle_opt(vector<string> & opts, char const * const * what, string & target)
130 {
131         if (opts.empty())
132                 return;
133
134         // the last language option is the document language (for babel and LyX)
135         // the last size option is the document font size
136         vector<string>::iterator it;
137         vector<string>::iterator position = opts.begin();
138         for (; *what; ++what) {
139                 it = find(opts.begin(), opts.end(), *what);
140                 if (it != opts.end()) {
141                         documentclass_language = true;
142                         if (it >= position) {
143                                 target = *what;
144                                 position = it;
145                         }
146                 }
147         }
148 }
149
150
151 void delete_opt(vector<string> & opts, char const * const * what)
152 {
153         if (opts.empty())
154                 return;
155
156         // remove found options from the list
157         // do this after handle_opt to avoid potential memory leaks and to be able
158         // to find in every case the last language option
159         vector<string>::iterator it;
160         for (; *what; ++what) {
161                 it = find(opts.begin(), opts.end(), *what);
162                 if (it != opts.end())
163                         opts.erase(it);
164         }
165 }
166
167
168 /*!
169  * Split a package options string (keyval format) into a vector.
170  * Example input:
171  *   authorformat=smallcaps,
172  *   commabeforerest,
173  *   titleformat=colonsep,
174  *   bibformat={tabular,ibidem,numbered}
175  */
176 vector<string> split_options(string const & input)
177 {
178         vector<string> options;
179         string option;
180         Parser p(input);
181         while (p.good()) {
182                 Token const & t = p.get_token();
183                 if (t.asInput() == ",") {
184                         options.push_back(trim(option));
185                         option.erase();
186                 } else if (t.asInput() == "=") {
187                         option += '=';
188                         p.skip_spaces(true);
189                         if (p.next_token().asInput() == "{")
190                                 option += '{' + p.getArg('{', '}') + '}';
191                 } else if (t.cat() != catSpace)
192                         option += t.asInput();
193         }
194
195         if (!option.empty())
196                 options.push_back(trim(option));
197
198         return options;
199 }
200
201
202 /*!
203  * Add package \p name with options \p options to used_packages.
204  * Remove options from \p options that we don't want to output.
205  */
206 void add_package(string const & name, vector<string> & options)
207 {
208         // every package inherits the global options
209         if (used_packages.find(name) == used_packages.end())
210                 used_packages[name] = split_options(h_options);
211
212         vector<string> & v = used_packages[name];
213         v.insert(v.end(), options.begin(), options.end());
214         if (name == "jurabib") {
215                 // Don't output the order argument (see the cite command
216                 // handling code in text.cpp).
217                 vector<string>::iterator end =
218                         remove(options.begin(), options.end(), "natbiborder");
219                 end = remove(options.begin(), end, "jurabiborder");
220                 options.erase(end, options.end());
221         }
222 }
223
224
225 // Given is a string like "scaled=0.9", return 0.9 * 100
226 string const scale_as_percentage(string const & scale)
227 {
228         string::size_type pos = scale.find('=');
229         if (pos != string::npos) {
230                 string value = scale.substr(pos + 1);
231                 if (isStrDbl(value))
232                         return convert<string>(100 * convert<double>(value));
233         }
234         // If the input string didn't match our expectations.
235         // return the default value "100" 
236         return "100";
237 }
238
239
240 void handle_package(string const & name, string const & opts)
241 {
242         vector<string> options = split_options(opts);
243         add_package(name, options);
244         string scale;
245
246         // roman fonts
247         if (is_known(name, known_roman_fonts))
248                 h_font_roman = name;
249         if (name == "fourier") {
250                 h_font_roman = "utopia";
251                 // when font uses real small capitals
252                 if (opts == "expert")
253                         h_font_sc = "true";
254         }
255         if (name == "mathpazo")
256                 h_font_roman = "palatino";
257         if (name == "mathptmx")
258                 h_font_roman = "times";
259         // sansserif fonts
260         if (is_known(name, known_sans_fonts)) {
261                 h_font_sans = name;
262                 if (!opts.empty()) {
263                         scale = opts;
264                         h_font_sf_scale = scale_as_percentage(scale);
265                 }
266         }
267         // typewriter fonts
268         if (is_known(name, known_typewriter_fonts)) {
269                 h_font_typewriter = name;
270                 if (!opts.empty()) {
271                         scale = opts;
272                         h_font_tt_scale = scale_as_percentage(scale);
273                 }
274         }
275         // font uses old-style figure
276         if (name == "eco")
277                 h_font_osf = "true";
278
279         else if (name == "amsmath" || name == "amssymb")
280                 h_use_amsmath = "1";
281         else if (name == "babel" && !opts.empty()) {
282                 // check if more than one option was used - used later for inputenc
283                 // in case inputenc is parsed before babel, set the encoding to auto
284                 if (options.begin() != options.end() - 1) {
285                         one_language = false;
286                         h_inputencoding = "auto";
287                 }
288                 // only set the document language when there was not already one set
289                 // via the documentclass options
290                 // babel takes the the last language given in the documentclass options
291                 // as document language. If there is no such language option, the last
292                 // option of its \usepackage call is used.
293                 if (documentclass_language == false) {
294                         handle_opt(options, known_languages, h_language);
295                         delete_opt(options, known_languages);
296                         if (is_known(h_language, known_french_languages))
297                                 h_language = "french";
298                         else if (is_known(h_language, known_german_languages))
299                                 h_language = "german";
300                         else if (is_known(h_language, known_ngerman_languages))
301                                 h_language = "ngerman";
302                         else if (is_known(h_language, known_russian_languages))
303                                 h_language = "russian";
304                         else if (is_known(h_language, known_ukrainian_languages))
305                                 h_language = "ukrainian";
306                         h_quotes_language = h_language;
307                 }
308         }
309         else if (name == "fontenc")
310                 ; // ignore this
311         else if (name == "inputenc") {
312                 // only set when there is not more than one inputenc option
313                 // therefore check for the "," character
314                 // also only set when there is not more then one babel language option
315                 if (opts.find(",") == string::npos && one_language == true)
316                         h_inputencoding = opts;
317                 options.clear();
318         } else if (name == "makeidx")
319                 ; // ignore this
320         else if (name == "verbatim")
321                 ; // ignore this
322         else if (name == "graphicx")
323                 ; // ignore this
324         else if (is_known(name, known_languages)) {
325                 if (is_known(name, known_french_languages))
326                         h_language = "french";
327                 else if (is_known(name, known_german_languages))
328                         h_language = "german";
329                 else if (is_known(name, known_ngerman_languages))
330                         h_language = "ngerman";
331                 else if (is_known(name, known_russian_languages))
332                         h_language = "russian";
333                 else if (is_known(name, known_ukrainian_languages))
334                         h_language = "ukrainian";
335                 else
336                         h_language = name;
337                 h_quotes_language = h_language;
338
339         } else if (name == "natbib") {
340                 h_cite_engine = "natbib_authoryear";
341                 vector<string>::iterator it =
342                         find(options.begin(), options.end(), "authoryear");
343                 if (it != options.end())
344                         options.erase(it);
345                 else {
346                         it = find(options.begin(), options.end(), "numbers");
347                         if (it != options.end()) {
348                                 h_cite_engine = "natbib_numerical";
349                                 options.erase(it);
350                         }
351                 }
352         } else if (name == "jurabib") {
353                 h_cite_engine = "jurabib";
354         } else if (options.empty())
355                 h_preamble << "\\usepackage{" << name << "}\n";
356         else {
357                 h_preamble << "\\usepackage[" << opts << "]{" << name << "}\n";
358                 options.clear();
359         }
360
361         // We need to do something with the options...
362         if (!options.empty())
363                 cerr << "Ignoring options '" << join(options, ",")
364                      << "' of package " << name << '.' << endl;
365 }
366
367
368
369 void end_preamble(ostream & os, TextClass const & /*textclass*/)
370 {
371         os << "#LyX file created by tex2lyx " << PACKAGE_VERSION << "\n"
372            << "\\lyxformat 247\n"
373            << "\\begin_document\n"
374            << "\\begin_header\n"
375            << "\\textclass " << h_textclass << "\n";
376         if (!h_preamble.str().empty())
377                 os << "\\begin_preamble\n" << h_preamble.str() << "\n\\end_preamble\n";
378         if (!h_options.empty())
379                 os << "\\options " << h_options << "\n";
380         os << "\\language " << h_language << "\n"
381            << "\\inputencoding " << h_inputencoding << "\n"
382            << "\\font_roman " << h_font_roman << "\n"
383            << "\\font_sans " << h_font_sans << "\n"
384            << "\\font_typewriter " << h_font_typewriter << "\n"
385            << "\\font_default_family " << h_font_default_family << "\n"
386            << "\\font_sc " << h_font_sc << "\n"
387            << "\\font_osf " << h_font_osf << "\n"
388            << "\\font_sf_scale " << h_font_sf_scale << "\n"
389            << "\\font_tt_scale " << h_font_tt_scale << "\n"
390            << "\\graphics " << h_graphics << "\n"
391            << "\\paperfontsize " << h_paperfontsize << "\n"
392            << "\\spacing " << h_spacing << "\n"
393            << "\\papersize " << h_papersize << "\n"
394            << "\\use_geometry " << h_use_geometry << "\n"
395            << "\\use_amsmath " << h_use_amsmath << "\n"
396            << "\\cite_engine " << h_cite_engine << "\n"
397            << "\\use_bibtopic " << h_use_bibtopic << "\n"
398            << "\\paperorientation " << h_paperorientation << "\n"
399            << "\\secnumdepth " << h_secnumdepth << "\n"
400            << "\\tocdepth " << h_tocdepth << "\n"
401            << "\\paragraph_separation " << h_paragraph_separation << "\n"
402            << "\\defskip " << h_defskip << "\n"
403            << "\\quotes_language " << h_quotes_language << "\n"
404            << "\\papercolumns " << h_papercolumns << "\n"
405            << "\\papersides " << h_papersides << "\n"
406            << "\\paperpagestyle " << h_paperpagestyle << "\n"
407            << "\\tracking_changes " << h_tracking_changes << "\n"
408            << "\\output_changes " << h_output_changes << "\n"
409            << "\\end_header\n\n"
410            << "\\begin_body\n";
411         // clear preamble for subdocuments
412         h_preamble.str("");
413 }
414
415 } // anonymous namespace
416
417 TextClass const parse_preamble(Parser & p, ostream & os, string const & forceclass)
418 {
419         // initialize fixed types
420         special_columns['D'] = 3;
421         bool is_full_document = false;
422
423         // determine whether this is a full document or a fragment for inclusion
424         while (p.good()) {
425                 Token const & t = p.get_token();
426
427                 if (t.cat() == catEscape && t.cs() == "documentclass") {
428                         is_full_document = true;
429                         break;
430                 }
431         }
432         p.reset();
433
434         while (is_full_document && p.good()) {
435                 Token const & t = p.get_token();
436
437 #ifdef FILEDEBUG
438                 cerr << "t: " << t << "\n";
439 #endif
440
441                 //
442                 // cat codes
443                 //
444                 if (t.cat() == catLetter ||
445                           t.cat() == catSuper ||
446                           t.cat() == catSub ||
447                           t.cat() == catOther ||
448                           t.cat() == catMath ||
449                           t.cat() == catActive ||
450                           t.cat() == catBegin ||
451                           t.cat() == catEnd ||
452                           t.cat() == catAlign ||
453                           t.cat() == catParameter)
454                 h_preamble << t.character();
455
456                 else if (t.cat() == catSpace || t.cat() == catNewline)
457                         h_preamble << t.asInput();
458
459                 else if (t.cat() == catComment)
460                         h_preamble << t.asInput();
461
462                 else if (t.cs() == "pagestyle")
463                         h_paperpagestyle = p.verbatim_item();
464
465                 else if (t.cs() == "makeatletter") {
466                         p.setCatCode('@', catLetter);
467                 }
468
469                 else if (t.cs() == "makeatother") {
470                         p.setCatCode('@', catOther);
471                 }
472
473                 else if (t.cs() == "newcommand" || t.cs() == "renewcommand"
474                             || t.cs() == "providecommand") {
475                         bool star = false;
476                         if (p.next_token().character() == '*') {
477                                 p.get_token();
478                                 star = true;
479                         }
480                         string const name = p.verbatim_item();
481                         string const opt1 = p.getOpt();
482                         string const opt2 = p.getFullOpt();
483                         string const body = p.verbatim_item();
484                         // font settings
485                         if (name == "\\rmdefault")
486                                 if (is_known(body, known_roman_fonts))
487                                         h_font_roman = body;
488
489                         if (name == "\\sfdefault")
490                                 if (is_known(body, known_sans_fonts))
491                                         h_font_sans = body;
492
493                         if (name == "\\ttdefault")
494                                 if (is_known(body, known_typewriter_fonts))
495                                         h_font_typewriter = body;
496
497                         if (name == "\\familydefault") {
498                                 string family = body;
499                                 // remove leading "\"
500                                 h_font_default_family = family.erase(0,1);
501                         }
502                         // only non-lyxspecific stuff
503                         if (   name != "\\noun"
504                             && name != "\\tabularnewline"
505                             && name != "\\LyX"
506                             && name != "\\lyxline"
507                             && name != "\\lyxaddress"
508                             && name != "\\lyxrightaddress"
509                             && name != "\\lyxdot"
510                             && name != "\\boldsymbol"
511                             && name != "\\lyxarrow"
512                             && name != "\\rmdefault"
513                             && name != "\\sfdefault"
514                             && name != "\\ttdefault"
515                             && name != "\\familydefault") {
516                                 ostringstream ss;
517                                 ss << '\\' << t.cs();
518                                 if (star)
519                                         ss << '*';
520                                 ss << '{' << name << '}' << opt1 << opt2
521                                    << '{' << body << "}";
522                                 h_preamble << ss.str();
523
524                                 // Add the command to the known commands
525                                 add_known_command(name, opt1, !opt2.empty());
526 /*
527                                 ostream & out = in_preamble ? h_preamble : os;
528                                 out << "\\" << t.cs() << "{" << name << "}"
529                                     << opts << "{" << body << "}";
530 */
531                         }
532                 }
533
534                 else if (t.cs() == "documentclass") {
535                         vector<string> opts = split_options(p.getArg('[', ']'));
536                         handle_opt(opts, known_fontsizes, h_paperfontsize);
537                         delete_opt(opts, known_fontsizes);
538                         // delete "pt" at the end
539                         string::size_type i = h_paperfontsize.find("pt");
540                         if (i != string::npos)
541                                 h_paperfontsize.erase(i);
542                         // to avoid that the babel options overwrite the documentclass options
543                         documentclass_language = false;
544                         handle_opt(opts, known_languages, h_language);
545                         delete_opt(opts, known_languages);
546                         if (is_known(h_language, known_french_languages))
547                                 h_language = "french";
548                         else if (is_known(h_language, known_german_languages))
549                                 h_language = "german";
550                         else if (is_known(h_language, known_ngerman_languages))
551                                 h_language = "ngerman";
552                         else if (is_known(h_language, known_russian_languages))
553                                 h_language = "russian";
554                         else if (is_known(h_language, known_ukrainian_languages))
555                                 h_language = "ukrainian";
556                         h_quotes_language = h_language;
557                         h_options = join(opts, ",");
558                         h_textclass = p.getArg('{', '}');
559                 }
560
561                 else if (t.cs() == "usepackage") {
562                         string const options = p.getArg('[', ']');
563                         string const name = p.getArg('{', '}');
564                         if (options.empty() && name.find(',')) {
565                                 vector<string> vecnames;
566                                 split(name, vecnames, ',');
567                                 vector<string>::const_iterator it  = vecnames.begin();
568                                 vector<string>::const_iterator end = vecnames.end();
569                                 for (; it != end; ++it)
570                                         handle_package(trim(*it), string());
571                         } else {
572                                 handle_package(name, options);
573                         }
574                 }
575
576                 else if (t.cs() == "newenvironment") {
577                         string const name = p.getArg('{', '}');
578                         ostringstream ss;
579                         ss << "\\newenvironment{" << name << "}";
580                         ss << p.getOpt();
581                         ss << p.getOpt();
582                         ss << '{' << p.verbatim_item() << '}';
583                         ss << '{' << p.verbatim_item() << '}';
584                         if (name != "lyxcode" && name != "lyxlist" &&
585                             name != "lyxrightadress" &&
586                             name != "lyxaddress" && name != "lyxgreyedout")
587                                 h_preamble << ss.str();
588                 }
589
590                 else if (t.cs() == "def") {
591                         string name = p.get_token().cs();
592                         while (p.next_token().cat() != catBegin)
593                                 name += p.get_token().asString();
594                         h_preamble << "\\def\\" << name << '{'
595                                    << p.verbatim_item() << "}";
596                 }
597
598                 else if (t.cs() == "newcolumntype") {
599                         string const name = p.getArg('{', '}');
600                         trim(name);
601                         int nargs = 0;
602                         string opts = p.getOpt();
603                         if (!opts.empty()) {
604                                 istringstream is(string(opts, 1));
605                                 is >> nargs;
606                         }
607                         special_columns[name[0]] = nargs;
608                         h_preamble << "\\newcolumntype{" << name << "}";
609                         if (nargs)
610                                 h_preamble << "[" << nargs << "]";
611                         h_preamble << "{" << p.verbatim_item() << "}";
612                 }
613
614                 else if (t.cs() == "setcounter") {
615                         string const name = p.getArg('{', '}');
616                         string const content = p.getArg('{', '}');
617                         if (name == "secnumdepth")
618                                 h_secnumdepth = content;
619                         else if (name == "tocdepth")
620                                 h_tocdepth = content;
621                         else
622                                 h_preamble << "\\setcounter{" << name << "}{" << content << "}";
623                 }
624
625                 else if (t.cs() == "setlength") {
626                         string const name = p.verbatim_item();
627                         string const content = p.verbatim_item();
628                         // Is this correct?
629                         if (name == "parskip")
630                                 h_paragraph_separation = "skip";
631                         else if (name == "parindent")
632                                 h_paragraph_separation = "skip";
633                         else
634                                 h_preamble << "\\setlength{" << name << "}{" << content << "}";
635                 }
636
637                 else if (t.cs() == "begin") {
638                         string const name = p.getArg('{', '}');
639                         if (name == "document")
640                                 break;
641                         h_preamble << "\\begin{" << name << "}";
642                 }
643
644                 else if (t.cs() == "jurabibsetup") {
645                         vector<string> jurabibsetup =
646                                 split_options(p.getArg('{', '}'));
647                         // add jurabibsetup to the jurabib package options
648                         add_package("jurabib", jurabibsetup);
649                         if (!jurabibsetup.empty()) {
650                                 h_preamble << "\\jurabibsetup{"
651                                            << join(jurabibsetup, ",") << '}';
652                         }
653                 }
654
655                 else if (!t.cs().empty())
656                         h_preamble << '\\' << t.cs();
657         }
658         p.skip_spaces();
659
660         // Force textclass if the user wanted it
661         if (!forceclass.empty())
662                 h_textclass = forceclass;
663         if (noweb_mode && !lyx::support::prefixIs(h_textclass, "literate-"))
664                 h_textclass.insert(0, "literate-");
665         FileName layoutfilename = libFileSearch("layouts", h_textclass, "layout");
666         if (layoutfilename.empty()) {
667                 cerr << "Error: Could not find layout file for textclass \"" << h_textclass << "\"." << endl;
668                 exit(1);
669         }
670         TextClass textclass;
671         textclass.read(layoutfilename);
672         if (h_papersides.empty()) {
673                 ostringstream ss;
674                 ss << textclass.sides();
675                 h_papersides = ss.str();
676         }
677         end_preamble(os, textclass);
678         return textclass;
679 }
680
681 // }])
682
683
684 } // namespace lyx