]> git.lyx.org Git - lyx.git/blob - src/Language.cpp
03146c358a9abd23dca444ace28a8ee912314c96
[lyx.git] / src / Language.cpp
1 /**
2  * \file Language.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author Jean-Marc Lasgouttes
8  * \author Jürgen Spitzmüller
9  * \author Dekel Tsur
10  *
11  * Full author contact details are available in file CREDITS.
12  */
13
14 #include <config.h>
15
16 #include "Language.h"
17
18 #include "BufferParams.h"
19 #include "Encoding.h"
20 #include "LaTeXFonts.h"
21 #include "Lexer.h"
22 #include "LyXRC.h"
23
24 #include "support/debug.h"
25 #include "support/FileName.h"
26 #include "support/filetools.h"
27 #include "support/lassert.h"
28 #include "support/lstrings.h"
29 #include "support/qstring_helpers.h"
30 #include "support/Messages.h"
31
32 #include <QLocale>
33 #include <QString>
34
35 using namespace std;
36 using namespace lyx::support;
37
38 namespace lyx {
39
40 Languages languages;
41 Language const * ignore_language = nullptr;
42 Language const * default_language = nullptr;
43 Language const * latex_language = nullptr;
44 Language const * reset_language = nullptr;
45
46
47 bool Language::isPolyglossiaExclusive() const
48 {
49         return babel().empty() && !polyglossia().empty() && required().empty();
50 }
51
52
53 bool Language::isBabelExclusive() const
54 {
55         return !babel().empty() && polyglossia().empty() && required().empty();
56 }
57
58
59 docstring const Language::translateLayout(string const & m) const
60 {
61         if (m.empty())
62                 return docstring();
63
64         if (!isAscii(m)) {
65                 lyxerr << "Warning: not translating `" << m
66                        << "' because it is not pure ASCII.\n";
67                 return from_utf8(m);
68         }
69
70         TranslationMap::const_iterator it = layoutTranslations_.find(m);
71         if (it != layoutTranslations_.end())
72                 return it->second;
73
74         docstring t = from_ascii(m);
75         cleanTranslation(t);
76         return t;
77 }
78
79
80 string Language::fontenc(BufferParams const & params) const
81 {
82         // Don't use LaTeX fonts, so just return the language's preferred
83         // (although this is not used with nonTeXFonts anyway).
84         if (params.useNonTeXFonts)
85                 return fontenc_.front() == "ASCII" ? "T1" : fontenc_.front();
86
87         // Determine optimal font encoding
88         // We check whether the used rm font supports an encoding our language supports
89         LaTeXFont const & lf =
90                 theLaTeXFonts().getLaTeXFont(from_ascii(params.fontsRoman()));
91         vector<string> const lfe = lf.fontencs();
92         for (auto & fe : fontenc_) {
93                 // ASCII means: support all T* encodings plus OT1
94                 if (fe == "ASCII") {
95                         for (auto & afe : lfe) {
96                                 if (afe == "OT1" || prefixIs(afe, "T"))
97                                         // we found a suitable one; return that.
98                                         return afe;
99                         }
100                 }
101                 // For other encodings, just check whether the font supports it
102                 if (lf.hasFontenc(fe))
103                         return fe;
104         }
105         // We did not find a suitable one; just take the first in the list,
106         // the priorized one (which is "T1" for ASCII).
107         return fontenc_.front() == "ASCII" ? "T1" : fontenc_.front();
108 }
109
110
111 string Language::dateFormat(size_t i) const
112 {
113         if (i > dateformats_.size())
114                 return string();
115         return dateformats_.at(i);
116 }
117
118
119 docstring Language::decimalSeparator() const
120 {
121         if (lyxrc.default_decimal_sep == "locale") {
122                 QLocale loc = QLocale(toqstr(code()));
123                 return qstring_to_ucs4(QString(loc.decimalPoint()));
124         }
125         return from_utf8(lyxrc.default_decimal_sep);
126 }
127
128
129 bool Language::readLanguage(Lexer & lex)
130 {
131         enum LanguageTags {
132                 LA_BABELNAME = 1,
133                 LA_DATEFORMATS,
134                 LA_ENCODING,
135                 LA_END,
136                 LA_FONTENC,
137                 LA_GUINAME,
138                 LA_HAS_GUI_SUPPORT,
139                 LA_INTERNAL_ENC,
140                 LA_LANG_CODE,
141                 LA_LANG_VARIETY,
142                 LA_POLYGLOSSIANAME,
143                 LA_POLYGLOSSIAOPTS,
144                 LA_XINDYNAME,
145                 LA_POSTBABELPREAMBLE,
146                 LA_PREBABELPREAMBLE,
147                 LA_PROVIDES,
148                 LA_REQUIRES,
149                 LA_QUOTESTYLE,
150                 LA_RTL,
151                 LA_WORDWRAP,
152                 LA_ACTIVECHARS
153         };
154
155         // Keep these sorted alphabetically!
156         LexerKeyword languageTags[] = {
157                 { "activechars",          LA_ACTIVECHARS },
158                 { "babelname",            LA_BABELNAME },
159                 { "dateformats",          LA_DATEFORMATS },
160                 { "encoding",             LA_ENCODING },
161                 { "end",                  LA_END },
162                 { "fontencoding",         LA_FONTENC },
163                 { "guiname",              LA_GUINAME },
164                 { "hasguisupport",        LA_HAS_GUI_SUPPORT },
165                 { "internalencoding",     LA_INTERNAL_ENC },
166                 { "langcode",             LA_LANG_CODE },
167                 { "langvariety",          LA_LANG_VARIETY },
168                 { "polyglossianame",      LA_POLYGLOSSIANAME },
169                 { "polyglossiaopts",      LA_POLYGLOSSIAOPTS },
170                 { "postbabelpreamble",    LA_POSTBABELPREAMBLE },
171                 { "prebabelpreamble",     LA_PREBABELPREAMBLE },
172                 { "provides",             LA_PROVIDES },
173                 { "quotestyle",           LA_QUOTESTYLE },
174                 { "requires",             LA_REQUIRES },
175                 { "rtl",                  LA_RTL },
176                 { "wordwrap",             LA_WORDWRAP },
177                 { "xindyname",            LA_XINDYNAME }
178         };
179
180         bool error = false;
181         bool finished = false;
182         lex.pushTable(languageTags);
183         // parse style section
184         while (!finished && lex.isOK() && !error) {
185                 int le = lex.lex();
186                 // See comment in LyXRC.cpp.
187                 switch (le) {
188                 case Lexer::LEX_FEOF:
189                         continue;
190
191                 case Lexer::LEX_UNDEF: // parse error
192                         lex.printError("Unknown language tag `$$Token'");
193                         error = true;
194                         continue;
195
196                 default:
197                         break;
198                 }
199                 switch (static_cast<LanguageTags>(le)) {
200                 case LA_END: // end of structure
201                         finished = true;
202                         break;
203                 case LA_BABELNAME:
204                         lex >> babel_;
205                         break;
206                 case LA_POLYGLOSSIANAME:
207                         lex >> polyglossia_name_;
208                         break;
209                 case LA_POLYGLOSSIAOPTS:
210                         lex >> polyglossia_opts_;
211                         break;
212                 case LA_XINDYNAME:
213                         lex >> xindy_;
214                         break;
215                 case LA_QUOTESTYLE:
216                         lex >> quote_style_;
217                         break;
218                 case LA_ACTIVECHARS:
219                         lex >> active_chars_;
220                         break;
221                 case LA_ENCODING:
222                         lex >> encodingStr_;
223                         break;
224                 case LA_FONTENC: {
225                         lex.eatLine();
226                         vector<string> const fe =
227                                 getVectorFromString(lex.getString(true), "|");
228                         fontenc_.insert(fontenc_.end(), fe.begin(), fe.end());
229                         break;
230                 }
231                 case LA_DATEFORMATS: {
232                         lex.eatLine();
233                         vector<string> const df =
234                                 getVectorFromString(trim(lex.getString(true), "\""), "|");
235                         dateformats_.insert(dateformats_.end(), df.begin(), df.end());
236                         break;
237                 }
238                 case LA_GUINAME:
239                         lex >> display_;
240                         break;
241                 case LA_HAS_GUI_SUPPORT:
242                         lex >> has_gui_support_;
243                         break;
244                 case LA_INTERNAL_ENC:
245                         lex >> internal_enc_;
246                         break;
247                 case LA_LANG_CODE:
248                         lex >> code_;
249                         break;
250                 case LA_LANG_VARIETY:
251                         lex >> variety_;
252                         break;
253                 case LA_POSTBABELPREAMBLE:
254                         babel_postsettings_ =
255                                 lex.getLongString(from_ascii("EndPostBabelPreamble"));
256                         break;
257                 case LA_PREBABELPREAMBLE:
258                         babel_presettings_ =
259                                 lex.getLongString(from_ascii("EndPreBabelPreamble"));
260                         break;
261                 case LA_REQUIRES:
262                         lex >> required_;
263                         break;
264                 case LA_PROVIDES:
265                         lex >> provides_;
266                         break;
267                 case LA_RTL:
268                         lex >> rightToLeft_;
269                         break;
270                 case LA_WORDWRAP:
271                         lex >> word_wrap_;
272                         break;
273                 }
274         }
275         lex.popTable();
276         return finished && !error;
277 }
278
279
280 bool Language::read(Lexer & lex)
281 {
282         encoding_ = nullptr;
283         internal_enc_ = false;
284         rightToLeft_ = false;
285
286         if (!lex.next()) {
287                 lex.printError("No name given for language: `$$Token'.");
288                 return false;
289         }
290
291         lang_ = lex.getString();
292         LYXERR(Debug::INFO, "Reading language " << lang_);
293         if (!readLanguage(lex)) {
294                 LYXERR0("Error parsing language `" << lang_ << '\'');
295                 return false;
296         }
297
298         encoding_ = encodings.fromLyXName(encodingStr_);
299         if (!encoding_ && !encodingStr_.empty()) {
300                 encoding_ = encodings.fromLyXName("iso8859-1");
301                 LYXERR0("Unknown encoding " << encodingStr_);
302         }
303         if (fontenc_.empty())
304                 fontenc_.push_back("ASCII");
305         if (dateformats_.empty()) {
306                 dateformats_.push_back("MMMM dd, yyyy");
307                 dateformats_.push_back("MMM dd, yyyy");
308                 dateformats_.push_back("M/d/yyyy");
309         }
310         return true;
311 }
312
313
314 void Language::readLayoutTranslations(Language::TranslationMap const & trans, bool replace)
315 {
316         for (auto const & t : trans) {
317                 if (replace
318                     || layoutTranslations_.find(t.first) == layoutTranslations_.end())
319                         layoutTranslations_[t.first] = t.second;
320         }
321 }
322
323
324 void Languages::read(FileName const & filename)
325 {
326         Lexer lex;
327         lex.setFile(filename);
328         lex.setContext("Languages::read");
329         while (lex.isOK()) {
330                 int le = lex.lex();
331                 switch (le) {
332                 case Lexer::LEX_FEOF:
333                         continue;
334
335                 default:
336                         break;
337                 }
338                 if (lex.getString() != "Language") {
339                         lex.printError("Unknown Language tag `$$Token'");
340                         continue;
341                 }
342                 Language l;
343                 l.read(lex);
344                 if (!lex)
345                         break;
346                 if (l.lang() == "latex") {
347                         // Check if latex language was not already defined.
348                         LASSERT(latex_language == nullptr, continue);
349                         static const Language latex_lang = l;
350                         latex_language = &latex_lang;
351                 } else if (l.lang() == "ignore") {
352                         // Check if ignore language was not already defined.
353                         LASSERT(ignore_language == nullptr, continue);
354                         static const Language ignore_lang = l;
355                         ignore_language = &ignore_lang;
356                 } else
357                         languagelist_[l.lang()] = l;
358         }
359
360         default_language = getLanguage("english");
361         if (!default_language) {
362                 LYXERR0("Default language \"english\" not found!");
363                 default_language = &(*languagelist_.begin()).second;
364                 LYXERR0("Using \"" << default_language->lang() << "\" instead!");
365         }
366
367         // Read layout translations
368         FileName const path = libFileSearch(string(), "layouttranslations");
369         readLayoutTranslations(path);
370 }
371
372
373 namespace {
374
375 bool readTranslations(Lexer & lex, Language::TranslationMap & trans)
376 {
377         while (lex.isOK()) {
378                 if (lex.checkFor("End"))
379                         break;
380                 if (!lex.next(true))
381                         return false;
382                 string const key = lex.getString();
383                 if (!lex.next(true))
384                         return false;
385                 docstring const val = lex.getDocString();
386                 trans[key] = val;
387         }
388         return true;
389 }
390
391
392 enum Match {
393         NoMatch,
394         ApproximateMatch,
395         VeryApproximateMatch,
396         ExactMatch
397 };
398
399
400 Match match(string const & code, Language const & lang)
401 {
402         // we need to mimic gettext: code can be a two-letter code, which
403         // should match all variants, e.g. "de" should match "de_DE",
404         // "de_AT" etc.
405         // special case for chinese:
406         // simplified  => code == "zh_CN", langcode == "zh_CN"
407         // traditional => code == "zh_TW", langcode == "zh_CN"
408         string const variety = lang.variety();
409         string const langcode = variety.empty() ?
410                                 lang.code() : lang.code() + '_' + variety;
411         string const name = lang.lang();
412         if ((code == langcode && name != "chinese-traditional")
413                 || (code == "zh_TW"  && name == "chinese-traditional"))
414                 return ExactMatch;
415         if ((code.size() == 2) && (langcode.size() > 2)
416                 && (code + '_' == langcode.substr(0, 3)))
417                 return ApproximateMatch;
418         if (code.substr(0,2) == langcode.substr(0,2))
419                 return VeryApproximateMatch;
420         return NoMatch;
421 }
422
423 } // namespace
424
425
426
427 Language const * Languages::getFromCode(string const & code) const
428 {
429         // 1/ exact match with any known language
430         for (auto const & l : languagelist_) {
431                 if (match(code, l.second) == ExactMatch)
432                         return &l.second;
433         }
434
435         // 2/ approximate with any known language
436         for (auto const & l : languagelist_) {
437                 if (match(code, l.second) == ApproximateMatch)
438                         return &l.second;
439         }
440         return nullptr;
441 }
442
443
444 Language const * Languages::getFromCode(string const & code,
445                         set<Language const *> const & tryfirst) const
446 {
447         // 1/ exact match with tryfirst list
448         for (auto const * lptr : tryfirst) {
449                 if (match(code, *lptr) == ExactMatch)
450                         return lptr;
451         }
452
453         // 2/ approximate match with tryfirst list
454         for (auto const * lptr : tryfirst) {
455                 Match const m = match(code, *lptr);
456                 if (m == ApproximateMatch || m == VeryApproximateMatch)
457                         return lptr;
458         }
459
460         // 3/ stricter match in all languages
461         return getFromCode(code);
462
463         LYXERR0("Unknown language `" << code << "'");
464         return nullptr;
465 }
466
467
468 void Languages::readLayoutTranslations(support::FileName const & filename)
469 {
470         Lexer lex;
471         lex.setFile(filename);
472         lex.setContext("Languages::read");
473
474         // 1) read all translations (exact and approximate matches) into trans
475         std::map<string, Language::TranslationMap> trans;
476         while (lex.isOK()) {
477                 if (!lex.checkFor("Translation")) {
478                         if (lex.isOK())
479                                 lex.printError("Unknown layout translation tag `$$Token'");
480                         break;
481                 }
482                 if (!lex.next(true))
483                         break;
484                 string const code = lex.getString();
485                 bool found = getFromCode(code);
486                 if (!found) {
487                         lex.printError("Unknown language `" + code + "'");
488                         break;
489                 }
490                 if (!readTranslations(lex, trans[code])) {
491                         lex.printError("Could not read layout translations for language `"
492                                 + code + "'");
493                         break;
494                 }
495         }
496
497         // 2) merge all translations into the languages
498         // exact translations overwrite approximate ones
499         for (auto & tr : trans) {
500                 for (auto & lang : languagelist_) {
501                         Match const m = match(tr.first, lang.second);
502                         if (m == NoMatch)
503                                 continue;
504                         lang.second.readLayoutTranslations(tr.second, m == ExactMatch);
505                 }
506         }
507
508 }
509
510
511 Language const * Languages::getLanguage(string const & language) const
512 {
513         if (language == "reset")
514                 return reset_language;
515         if (language == "ignore")
516                 return ignore_language;
517         const_iterator it = languagelist_.find(language);
518         return it == languagelist_.end() ? reset_language : &it->second;
519 }
520
521
522 } // namespace lyx