]> git.lyx.org Git - lyx.git/blob - src/Language.cpp
Deactivate active - in tables with \cline or \cmidrule
[lyx.git] / src / Language.cpp
1 /**
2  * \file Language.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author Jean-Marc Lasgouttes
8  * \author Jürgen Spitzmüller
9  * \author Dekel Tsur
10  *
11  * Full author contact details are available in file CREDITS.
12  */
13
14 #include <config.h>
15
16 #include "Language.h"
17
18 #include "Encoding.h"
19 #include "LaTeXFonts.h"
20 #include "Lexer.h"
21 #include "LyXRC.h"
22
23 #include "support/debug.h"
24 #include "support/FileName.h"
25 #include "support/filetools.h"
26 #include "support/lassert.h"
27 #include "support/lstrings.h"
28 #include "support/qstring_helpers.h"
29 #include "support/Messages.h"
30
31 #include <QLocale>
32 #include <QString>
33
34 using namespace std;
35 using namespace lyx::support;
36
37 namespace lyx {
38
39 Languages languages;
40 Language const * ignore_language = 0;
41 Language const * default_language = 0;
42 Language const * latex_language = 0;
43 Language const * reset_language = 0;
44
45
46 bool Language::isPolyglossiaExclusive() const
47 {
48         return babel().empty() && !polyglossia().empty() && requires().empty();
49 }
50
51
52 bool Language::isBabelExclusive() const
53 {
54         return !babel().empty() && polyglossia().empty() && requires().empty();
55 }
56
57
58 docstring const Language::translateLayout(string const & m) const
59 {
60         if (m.empty())
61                 return docstring();
62
63         if (!isAscii(m)) {
64                 lyxerr << "Warning: not translating `" << m
65                        << "' because it is not pure ASCII.\n";
66                 return from_utf8(m);
67         }
68
69         TranslationMap::const_iterator it = layoutTranslations_.find(m);
70         if (it != layoutTranslations_.end())
71                 return it->second;
72
73         docstring t = from_ascii(m);
74         cleanTranslation(t);
75         return t;
76 }
77
78
79 string Language::fontenc(BufferParams const & params) const
80 {
81         // Don't use LaTeX fonts, so just return the language's preferred
82         // (although this is not used with nonTeXFonts anyway).
83         if (params.useNonTeXFonts)
84                 return fontenc_.front() == "ASCII" ? "T1" : fontenc_.front();
85
86         // Determine optimal font encoding
87         // We check whether the used rm font supports an encoding our language supports
88         LaTeXFont const & lf =
89                 theLaTeXFonts().getLaTeXFont(from_ascii(params.fontsRoman()));
90         vector<string> const lfe = lf.fontencs();
91         for (auto & fe : fontenc_) {
92                 // ASCII means: support all T* encodings plus OT1
93                 if (fe == "ASCII") {
94                         for (auto & afe : lfe) {
95                                 if (afe == "OT1" || prefixIs(afe, "T"))
96                                         // we found a suitable one; return that.
97                                         return afe;
98                         }
99                 }
100                 // For other encodings, just check whether the font supports it
101                 if (lf.hasFontenc(fe))
102                         return fe;
103         }
104         // We did not find a suitable one; just take the first in the list,
105         // the priorized one (which is "T1" for ASCII).
106         return fontenc_.front() == "ASCII" ? "T1" : fontenc_.front();
107 }
108
109
110 string Language::dateFormat(size_t i) const
111 {
112         if (i > dateformats_.size())
113                 return string();
114         return dateformats_.at(i);
115 }
116
117
118 docstring Language::decimalSeparator() const
119 {
120         if (lyxrc.default_decimal_sep == "locale") {
121                 QLocale loc = QLocale(toqstr(code()));
122                 return qstring_to_ucs4(QString(loc.decimalPoint()));
123         }
124         return from_utf8(lyxrc.default_decimal_sep);
125 }
126
127
128 bool Language::readLanguage(Lexer & lex)
129 {
130         enum LanguageTags {
131                 LA_BABELNAME = 1,
132                 LA_DATEFORMATS,
133                 LA_ENCODING,
134                 LA_END,
135                 LA_FONTENC,
136                 LA_GUINAME,
137                 LA_HAS_GUI_SUPPORT,
138                 LA_INTERNAL_ENC,
139                 LA_LANG_CODE,
140                 LA_LANG_VARIETY,
141                 LA_POLYGLOSSIANAME,
142                 LA_POLYGLOSSIAOPTS,
143                 LA_POSTBABELPREAMBLE,
144                 LA_PREBABELPREAMBLE,
145                 LA_PROVIDES,
146                 LA_REQUIRES,
147                 LA_QUOTESTYLE,
148                 LA_RTL,
149                 LA_WORDWRAP,
150                 LA_ACTIVECHARS
151         };
152
153         // Keep these sorted alphabetically!
154         LexerKeyword languageTags[] = {
155                 { "activechars",          LA_ACTIVECHARS },
156                 { "babelname",            LA_BABELNAME },
157                 { "dateformats",          LA_DATEFORMATS },
158                 { "encoding",             LA_ENCODING },
159                 { "end",                  LA_END },
160                 { "fontencoding",         LA_FONTENC },
161                 { "guiname",              LA_GUINAME },
162                 { "hasguisupport",        LA_HAS_GUI_SUPPORT },
163                 { "internalencoding",     LA_INTERNAL_ENC },
164                 { "langcode",             LA_LANG_CODE },
165                 { "langvariety",          LA_LANG_VARIETY },
166                 { "polyglossianame",      LA_POLYGLOSSIANAME },
167                 { "polyglossiaopts",      LA_POLYGLOSSIAOPTS },
168                 { "postbabelpreamble",    LA_POSTBABELPREAMBLE },
169                 { "prebabelpreamble",     LA_PREBABELPREAMBLE },
170                 { "provides",             LA_PROVIDES },
171                 { "quotestyle",           LA_QUOTESTYLE },
172                 { "requires",             LA_REQUIRES },
173                 { "rtl",                  LA_RTL },
174                 {"wordwrap",              LA_WORDWRAP }
175         };
176
177         bool error = false;
178         bool finished = false;
179         lex.pushTable(languageTags);
180         // parse style section
181         while (!finished && lex.isOK() && !error) {
182                 int le = lex.lex();
183                 // See comment in LyXRC.cpp.
184                 switch (le) {
185                 case Lexer::LEX_FEOF:
186                         continue;
187
188                 case Lexer::LEX_UNDEF: // parse error
189                         lex.printError("Unknown language tag `$$Token'");
190                         error = true;
191                         continue;
192
193                 default:
194                         break;
195                 }
196                 switch (static_cast<LanguageTags>(le)) {
197                 case LA_END: // end of structure
198                         finished = true;
199                         break;
200                 case LA_BABELNAME:
201                         lex >> babel_;
202                         break;
203                 case LA_POLYGLOSSIANAME:
204                         lex >> polyglossia_name_;
205                         break;
206                 case LA_POLYGLOSSIAOPTS:
207                         lex >> polyglossia_opts_;
208                         break;
209                 case LA_QUOTESTYLE:
210                         lex >> quote_style_;
211                         break;
212                 case LA_ACTIVECHARS:
213                         lex >> active_chars_;
214                         break;
215                 case LA_ENCODING:
216                         lex >> encodingStr_;
217                         break;
218                 case LA_FONTENC: {
219                         lex.eatLine();
220                         vector<string> const fe =
221                                 getVectorFromString(lex.getString(true), "|");
222                         fontenc_.insert(fontenc_.end(), fe.begin(), fe.end());
223                         break;
224                 }
225                 case LA_DATEFORMATS: {
226                         lex.eatLine();
227                         vector<string> const df =
228                                 getVectorFromString(trim(lex.getString(true), "\""), "|");
229                         dateformats_.insert(dateformats_.end(), df.begin(), df.end());
230                         break;
231                 }
232                 case LA_GUINAME:
233                         lex >> display_;
234                         break;
235                 case LA_HAS_GUI_SUPPORT:
236                         lex >> has_gui_support_;
237                         break;
238                 case LA_INTERNAL_ENC:
239                         lex >> internal_enc_;
240                         break;
241                 case LA_LANG_CODE:
242                         lex >> code_;
243                         break;
244                 case LA_LANG_VARIETY:
245                         lex >> variety_;
246                         break;
247                 case LA_POSTBABELPREAMBLE:
248                         babel_postsettings_ =
249                                 lex.getLongString(from_ascii("EndPostBabelPreamble"));
250                         break;
251                 case LA_PREBABELPREAMBLE:
252                         babel_presettings_ =
253                                 lex.getLongString(from_ascii("EndPreBabelPreamble"));
254                         break;
255                 case LA_REQUIRES:
256                         lex >> requires_;
257                         break;
258                 case LA_PROVIDES:
259                         lex >> provides_;
260                         break;
261                 case LA_RTL:
262                         lex >> rightToLeft_;
263                         break;
264                 case LA_WORDWRAP:
265                         lex >> word_wrap_;
266                         break;
267                 }
268         }
269         lex.popTable();
270         return finished && !error;
271 }
272
273
274 bool Language::read(Lexer & lex)
275 {
276         encoding_ = 0;
277         internal_enc_ = 0;
278         rightToLeft_ = 0;
279
280         if (!lex.next()) {
281                 lex.printError("No name given for language: `$$Token'.");
282                 return false;
283         }
284
285         lang_ = lex.getString();
286         LYXERR(Debug::INFO, "Reading language " << lang_);
287         if (!readLanguage(lex)) {
288                 LYXERR0("Error parsing language `" << lang_ << '\'');
289                 return false;
290         }
291
292         encoding_ = encodings.fromLyXName(encodingStr_);
293         if (!encoding_ && !encodingStr_.empty()) {
294                 encoding_ = encodings.fromLyXName("iso8859-1");
295                 LYXERR0("Unknown encoding " << encodingStr_);
296         }
297         if (fontenc_.empty())
298                 fontenc_.push_back("ASCII");
299         if (dateformats_.empty()) {
300                 dateformats_.push_back("MMMM dd, yyyy");
301                 dateformats_.push_back("MMM dd, yyyy");
302                 dateformats_.push_back("M/d/yyyy");
303         }
304         return true;
305 }
306
307
308 void Language::readLayoutTranslations(Language::TranslationMap const & trans, bool replace)
309 {
310         for (auto const & t : trans) {
311                 if (replace
312                     || layoutTranslations_.find(t.first) == layoutTranslations_.end())
313                         layoutTranslations_[t.first] = t.second;
314         }
315 }
316
317
318 void Languages::read(FileName const & filename)
319 {
320         Lexer lex;
321         lex.setFile(filename);
322         lex.setContext("Languages::read");
323         while (lex.isOK()) {
324                 int le = lex.lex();
325                 switch (le) {
326                 case Lexer::LEX_FEOF:
327                         continue;
328
329                 default:
330                         break;
331                 }
332                 if (lex.getString() != "Language") {
333                         lex.printError("Unknown Language tag `$$Token'");
334                         continue;
335                 }
336                 Language l;
337                 l.read(lex);
338                 if (!lex)
339                         break;
340                 if (l.lang() == "latex") {
341                         // Check if latex language was not already defined.
342                         LASSERT(latex_language == 0, continue);
343                         static const Language latex_lang = l;
344                         latex_language = &latex_lang;
345                 } else if (l.lang() == "ignore") {
346                         // Check if ignore language was not already defined.
347                         LASSERT(ignore_language == 0, continue);
348                         static const Language ignore_lang = l;
349                         ignore_language = &ignore_lang;
350                 } else
351                         languagelist_[l.lang()] = l;
352         }
353
354         default_language = getLanguage("english");
355         if (!default_language) {
356                 LYXERR0("Default language \"english\" not found!");
357                 default_language = &(*languagelist_.begin()).second;
358                 LYXERR0("Using \"" << default_language->lang() << "\" instead!");
359         }
360
361         // Read layout translations
362         FileName const path = libFileSearch(string(), "layouttranslations");
363         readLayoutTranslations(path);
364 }
365
366
367 namespace {
368
369 bool readTranslations(Lexer & lex, Language::TranslationMap & trans)
370 {
371         while (lex.isOK()) {
372                 if (lex.checkFor("End"))
373                         break;
374                 if (!lex.next(true))
375                         return false;
376                 string const key = lex.getString();
377                 if (!lex.next(true))
378                         return false;
379                 docstring const val = lex.getDocString();
380                 trans[key] = val;
381         }
382         return true;
383 }
384
385
386 enum Match {
387         NoMatch,
388         ApproximateMatch,
389         VeryApproximateMatch,
390         ExactMatch
391 };
392
393
394 Match match(string const & code, Language const & lang)
395 {
396         // we need to mimic gettext: code can be a two-letter code, which
397         // should match all variants, e.g. "de" should match "de_DE",
398         // "de_AT" etc.
399         // special case for chinese:
400         // simplified  => code == "zh_CN", langcode == "zh_CN"
401         // traditional => code == "zh_TW", langcode == "zh_CN"
402         string const variety = lang.variety();
403         string const langcode = variety.empty() ?
404                                 lang.code() : lang.code() + '_' + variety;
405         string const name = lang.lang();
406         if ((code == langcode && name != "chinese-traditional")
407                 || (code == "zh_TW"  && name == "chinese-traditional"))
408                 return ExactMatch;
409         if ((code.size() == 2) && (langcode.size() > 2)
410                 && (code + '_' == langcode.substr(0, 3)))
411                 return ApproximateMatch;
412         if (code.substr(0,2) == langcode.substr(0,2))
413                 return VeryApproximateMatch;
414         return NoMatch;
415 }
416
417 } // namespace
418
419
420
421 Language const * Languages::getFromCode(string const & code) const
422 {
423         // 1/ exact match with any known language
424         for (auto const & l : languagelist_) {
425                 if (match(code, l.second) == ExactMatch)
426                         return &l.second;
427         }
428
429         // 2/ approximate with any known language
430         for (auto const & l : languagelist_) {
431                 if (match(code, l.second) == ApproximateMatch)
432                         return &l.second;
433         }
434         return 0;
435 }
436
437
438 Language const * Languages::getFromCode(string const & code,
439                         set<Language const *> const & tryfirst) const
440 {
441         // 1/ exact match with tryfirst list
442         for (auto const * lptr : tryfirst) {
443                 if (match(code, *lptr) == ExactMatch)
444                         return lptr;
445         }
446
447         // 2/ approximate match with tryfirst list
448         for (auto const * lptr : tryfirst) {
449                 Match const m = match(code, *lptr);
450                 if (m == ApproximateMatch || m == VeryApproximateMatch)
451                         return lptr;
452         }
453
454         // 3/ stricter match in all languages
455         return getFromCode(code);
456
457         LYXERR0("Unknown language `" << code << "'");
458         return 0;
459 }
460
461
462 void Languages::readLayoutTranslations(support::FileName const & filename)
463 {
464         Lexer lex;
465         lex.setFile(filename);
466         lex.setContext("Languages::read");
467
468         // 1) read all translations (exact and approximate matches) into trans
469         std::map<string, Language::TranslationMap> trans;
470         while (lex.isOK()) {
471                 if (!lex.checkFor("Translation")) {
472                         if (lex.isOK())
473                                 lex.printError("Unknown layout translation tag `$$Token'");
474                         break;
475                 }
476                 if (!lex.next(true))
477                         break;
478                 string const code = lex.getString();
479                 bool found = getFromCode(code);
480                 if (!found) {
481                         lex.printError("Unknown language `" + code + "'");
482                         break;
483                 }
484                 if (!readTranslations(lex, trans[code])) {
485                         lex.printError("Could not read layout translations for language `"
486                                 + code + "'");
487                         break;
488                 }
489         }
490
491         // 2) merge all translations into the languages
492         // exact translations overwrite approximate ones
493         for (auto & tr : trans) {
494                 for (auto & lang : languagelist_) {
495                         Match const m = match(tr.first, lang.second);
496                         if (m == NoMatch)
497                                 continue;
498                         lang.second.readLayoutTranslations(tr.second, m == ExactMatch);
499                 }
500         }
501
502 }
503
504
505 Language const * Languages::getLanguage(string const & language) const
506 {
507         if (language == "reset")
508                 return reset_language;
509         if (language == "ignore")
510                 return ignore_language;
511         const_iterator it = languagelist_.find(language);
512         return it == languagelist_.end() ? reset_language : &it->second;
513 }
514
515
516 } // namespace lyx