]> git.lyx.org Git - lyx.git/blob - src/Language.cpp
Refactoring based on cppcheck suggestions
[lyx.git] / src / Language.cpp
1 /**
2  * \file Language.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author Jean-Marc Lasgouttes
8  * \author Jürgen Spitzmüller
9  * \author Dekel Tsur
10  *
11  * Full author contact details are available in file CREDITS.
12  */
13
14 #include <config.h>
15
16 #include "Language.h"
17
18 #include "Encoding.h"
19 #include "LaTeXFonts.h"
20 #include "Lexer.h"
21 #include "LyXRC.h"
22
23 #include "support/debug.h"
24 #include "support/FileName.h"
25 #include "support/filetools.h"
26 #include "support/lassert.h"
27 #include "support/lstrings.h"
28 #include "support/qstring_helpers.h"
29 #include "support/Messages.h"
30
31 #include <QLocale>
32 #include <QString>
33
34 using namespace std;
35 using namespace lyx::support;
36
37 namespace lyx {
38
39 Languages languages;
40 Language const * ignore_language = 0;
41 Language const * default_language = 0;
42 Language const * latex_language = 0;
43 Language const * reset_language = 0;
44
45
46 bool Language::isPolyglossiaExclusive() const
47 {
48         return babel().empty() && !polyglossia().empty() && required().empty();
49 }
50
51
52 bool Language::isBabelExclusive() const
53 {
54         return !babel().empty() && polyglossia().empty() && required().empty();
55 }
56
57
58 docstring const Language::translateLayout(string const & m) const
59 {
60         if (m.empty())
61                 return docstring();
62
63         if (!isAscii(m)) {
64                 lyxerr << "Warning: not translating `" << m
65                        << "' because it is not pure ASCII.\n";
66                 return from_utf8(m);
67         }
68
69         TranslationMap::const_iterator it = layoutTranslations_.find(m);
70         if (it != layoutTranslations_.end())
71                 return it->second;
72
73         docstring t = from_ascii(m);
74         cleanTranslation(t);
75         return t;
76 }
77
78
79 string Language::fontenc(BufferParams const & params) const
80 {
81         // Don't use LaTeX fonts, so just return the language's preferred
82         // (although this is not used with nonTeXFonts anyway).
83         if (params.useNonTeXFonts)
84                 return fontenc_.front() == "ASCII" ? "T1" : fontenc_.front();
85
86         // Determine optimal font encoding
87         // We check whether the used rm font supports an encoding our language supports
88         LaTeXFont const & lf =
89                 theLaTeXFonts().getLaTeXFont(from_ascii(params.fontsRoman()));
90         vector<string> const lfe = lf.fontencs();
91         for (auto & fe : fontenc_) {
92                 // ASCII means: support all T* encodings plus OT1
93                 if (fe == "ASCII") {
94                         for (auto & afe : lfe) {
95                                 if (afe == "OT1" || prefixIs(afe, "T"))
96                                         // we found a suitable one; return that.
97                                         return afe;
98                         }
99                 }
100                 // For other encodings, just check whether the font supports it
101                 if (lf.hasFontenc(fe))
102                         return fe;
103         }
104         // We did not find a suitable one; just take the first in the list,
105         // the priorized one (which is "T1" for ASCII).
106         return fontenc_.front() == "ASCII" ? "T1" : fontenc_.front();
107 }
108
109
110 string Language::dateFormat(size_t i) const
111 {
112         if (i > dateformats_.size())
113                 return string();
114         return dateformats_.at(i);
115 }
116
117
118 docstring Language::decimalSeparator() const
119 {
120         if (lyxrc.default_decimal_sep == "locale") {
121                 QLocale loc = QLocale(toqstr(code()));
122                 return qstring_to_ucs4(QString(loc.decimalPoint()));
123         }
124         return from_utf8(lyxrc.default_decimal_sep);
125 }
126
127
128 bool Language::readLanguage(Lexer & lex)
129 {
130         enum LanguageTags {
131                 LA_BABELNAME = 1,
132                 LA_DATEFORMATS,
133                 LA_ENCODING,
134                 LA_END,
135                 LA_FONTENC,
136                 LA_GUINAME,
137                 LA_HAS_GUI_SUPPORT,
138                 LA_INTERNAL_ENC,
139                 LA_LANG_CODE,
140                 LA_LANG_VARIETY,
141                 LA_POLYGLOSSIANAME,
142                 LA_POLYGLOSSIAOPTS,
143                 LA_XINDYNAME,
144                 LA_POSTBABELPREAMBLE,
145                 LA_PREBABELPREAMBLE,
146                 LA_PROVIDES,
147                 LA_REQUIRES,
148                 LA_QUOTESTYLE,
149                 LA_RTL,
150                 LA_WORDWRAP,
151                 LA_ACTIVECHARS
152         };
153
154         // Keep these sorted alphabetically!
155         LexerKeyword languageTags[] = {
156                 { "activechars",          LA_ACTIVECHARS },
157                 { "babelname",            LA_BABELNAME },
158                 { "dateformats",          LA_DATEFORMATS },
159                 { "encoding",             LA_ENCODING },
160                 { "end",                  LA_END },
161                 { "fontencoding",         LA_FONTENC },
162                 { "guiname",              LA_GUINAME },
163                 { "hasguisupport",        LA_HAS_GUI_SUPPORT },
164                 { "internalencoding",     LA_INTERNAL_ENC },
165                 { "langcode",             LA_LANG_CODE },
166                 { "langvariety",          LA_LANG_VARIETY },
167                 { "polyglossianame",      LA_POLYGLOSSIANAME },
168                 { "polyglossiaopts",      LA_POLYGLOSSIAOPTS },
169                 { "postbabelpreamble",    LA_POSTBABELPREAMBLE },
170                 { "prebabelpreamble",     LA_PREBABELPREAMBLE },
171                 { "provides",             LA_PROVIDES },
172                 { "quotestyle",           LA_QUOTESTYLE },
173                 { "requires",             LA_REQUIRES },
174                 { "rtl",                  LA_RTL },
175                 { "wordwrap",             LA_WORDWRAP },
176                 { "xindyname",            LA_XINDYNAME }
177         };
178
179         bool error = false;
180         bool finished = false;
181         lex.pushTable(languageTags);
182         // parse style section
183         while (!finished && lex.isOK() && !error) {
184                 int le = lex.lex();
185                 // See comment in LyXRC.cpp.
186                 switch (le) {
187                 case Lexer::LEX_FEOF:
188                         continue;
189
190                 case Lexer::LEX_UNDEF: // parse error
191                         lex.printError("Unknown language tag `$$Token'");
192                         error = true;
193                         continue;
194
195                 default:
196                         break;
197                 }
198                 switch (static_cast<LanguageTags>(le)) {
199                 case LA_END: // end of structure
200                         finished = true;
201                         break;
202                 case LA_BABELNAME:
203                         lex >> babel_;
204                         break;
205                 case LA_POLYGLOSSIANAME:
206                         lex >> polyglossia_name_;
207                         break;
208                 case LA_POLYGLOSSIAOPTS:
209                         lex >> polyglossia_opts_;
210                         break;
211                 case LA_XINDYNAME:
212                         lex >> xindy_;
213                         break;
214                 case LA_QUOTESTYLE:
215                         lex >> quote_style_;
216                         break;
217                 case LA_ACTIVECHARS:
218                         lex >> active_chars_;
219                         break;
220                 case LA_ENCODING:
221                         lex >> encodingStr_;
222                         break;
223                 case LA_FONTENC: {
224                         lex.eatLine();
225                         vector<string> const fe =
226                                 getVectorFromString(lex.getString(true), "|");
227                         fontenc_.insert(fontenc_.end(), fe.begin(), fe.end());
228                         break;
229                 }
230                 case LA_DATEFORMATS: {
231                         lex.eatLine();
232                         vector<string> const df =
233                                 getVectorFromString(trim(lex.getString(true), "\""), "|");
234                         dateformats_.insert(dateformats_.end(), df.begin(), df.end());
235                         break;
236                 }
237                 case LA_GUINAME:
238                         lex >> display_;
239                         break;
240                 case LA_HAS_GUI_SUPPORT:
241                         lex >> has_gui_support_;
242                         break;
243                 case LA_INTERNAL_ENC:
244                         lex >> internal_enc_;
245                         break;
246                 case LA_LANG_CODE:
247                         lex >> code_;
248                         break;
249                 case LA_LANG_VARIETY:
250                         lex >> variety_;
251                         break;
252                 case LA_POSTBABELPREAMBLE:
253                         babel_postsettings_ =
254                                 lex.getLongString(from_ascii("EndPostBabelPreamble"));
255                         break;
256                 case LA_PREBABELPREAMBLE:
257                         babel_presettings_ =
258                                 lex.getLongString(from_ascii("EndPreBabelPreamble"));
259                         break;
260                 case LA_REQUIRES:
261                         lex >> required_;
262                         break;
263                 case LA_PROVIDES:
264                         lex >> provides_;
265                         break;
266                 case LA_RTL:
267                         lex >> rightToLeft_;
268                         break;
269                 case LA_WORDWRAP:
270                         lex >> word_wrap_;
271                         break;
272                 }
273         }
274         lex.popTable();
275         return finished && !error;
276 }
277
278
279 bool Language::read(Lexer & lex)
280 {
281         encoding_ = nullptr;
282         internal_enc_ = false;
283         rightToLeft_ = false;
284
285         if (!lex.next()) {
286                 lex.printError("No name given for language: `$$Token'.");
287                 return false;
288         }
289
290         lang_ = lex.getString();
291         LYXERR(Debug::INFO, "Reading language " << lang_);
292         if (!readLanguage(lex)) {
293                 LYXERR0("Error parsing language `" << lang_ << '\'');
294                 return false;
295         }
296
297         encoding_ = encodings.fromLyXName(encodingStr_);
298         if (!encoding_ && !encodingStr_.empty()) {
299                 encoding_ = encodings.fromLyXName("iso8859-1");
300                 LYXERR0("Unknown encoding " << encodingStr_);
301         }
302         if (fontenc_.empty())
303                 fontenc_.push_back("ASCII");
304         if (dateformats_.empty()) {
305                 dateformats_.push_back("MMMM dd, yyyy");
306                 dateformats_.push_back("MMM dd, yyyy");
307                 dateformats_.push_back("M/d/yyyy");
308         }
309         return true;
310 }
311
312
313 void Language::readLayoutTranslations(Language::TranslationMap const & trans, bool replace)
314 {
315         for (auto const & t : trans) {
316                 if (replace
317                     || layoutTranslations_.find(t.first) == layoutTranslations_.end())
318                         layoutTranslations_[t.first] = t.second;
319         }
320 }
321
322
323 void Languages::read(FileName const & filename)
324 {
325         Lexer lex;
326         lex.setFile(filename);
327         lex.setContext("Languages::read");
328         while (lex.isOK()) {
329                 int le = lex.lex();
330                 switch (le) {
331                 case Lexer::LEX_FEOF:
332                         continue;
333
334                 default:
335                         break;
336                 }
337                 if (lex.getString() != "Language") {
338                         lex.printError("Unknown Language tag `$$Token'");
339                         continue;
340                 }
341                 Language l;
342                 l.read(lex);
343                 if (!lex)
344                         break;
345                 if (l.lang() == "latex") {
346                         // Check if latex language was not already defined.
347                         LASSERT(latex_language == 0, continue);
348                         static const Language latex_lang = l;
349                         latex_language = &latex_lang;
350                 } else if (l.lang() == "ignore") {
351                         // Check if ignore language was not already defined.
352                         LASSERT(ignore_language == 0, continue);
353                         static const Language ignore_lang = l;
354                         ignore_language = &ignore_lang;
355                 } else
356                         languagelist_[l.lang()] = l;
357         }
358
359         default_language = getLanguage("english");
360         if (!default_language) {
361                 LYXERR0("Default language \"english\" not found!");
362                 default_language = &(*languagelist_.begin()).second;
363                 LYXERR0("Using \"" << default_language->lang() << "\" instead!");
364         }
365
366         // Read layout translations
367         FileName const path = libFileSearch(string(), "layouttranslations");
368         readLayoutTranslations(path);
369 }
370
371
372 namespace {
373
374 bool readTranslations(Lexer & lex, Language::TranslationMap & trans)
375 {
376         while (lex.isOK()) {
377                 if (lex.checkFor("End"))
378                         break;
379                 if (!lex.next(true))
380                         return false;
381                 string const key = lex.getString();
382                 if (!lex.next(true))
383                         return false;
384                 docstring const val = lex.getDocString();
385                 trans[key] = val;
386         }
387         return true;
388 }
389
390
391 enum Match {
392         NoMatch,
393         ApproximateMatch,
394         VeryApproximateMatch,
395         ExactMatch
396 };
397
398
399 Match match(string const & code, Language const & lang)
400 {
401         // we need to mimic gettext: code can be a two-letter code, which
402         // should match all variants, e.g. "de" should match "de_DE",
403         // "de_AT" etc.
404         // special case for chinese:
405         // simplified  => code == "zh_CN", langcode == "zh_CN"
406         // traditional => code == "zh_TW", langcode == "zh_CN"
407         string const variety = lang.variety();
408         string const langcode = variety.empty() ?
409                                 lang.code() : lang.code() + '_' + variety;
410         string const name = lang.lang();
411         if ((code == langcode && name != "chinese-traditional")
412                 || (code == "zh_TW"  && name == "chinese-traditional"))
413                 return ExactMatch;
414         if ((code.size() == 2) && (langcode.size() > 2)
415                 && (code + '_' == langcode.substr(0, 3)))
416                 return ApproximateMatch;
417         if (code.substr(0,2) == langcode.substr(0,2))
418                 return VeryApproximateMatch;
419         return NoMatch;
420 }
421
422 } // namespace
423
424
425
426 Language const * Languages::getFromCode(string const & code) const
427 {
428         // 1/ exact match with any known language
429         for (auto const & l : languagelist_) {
430                 if (match(code, l.second) == ExactMatch)
431                         return &l.second;
432         }
433
434         // 2/ approximate with any known language
435         for (auto const & l : languagelist_) {
436                 if (match(code, l.second) == ApproximateMatch)
437                         return &l.second;
438         }
439         return 0;
440 }
441
442
443 Language const * Languages::getFromCode(string const & code,
444                         set<Language const *> const & tryfirst) const
445 {
446         // 1/ exact match with tryfirst list
447         for (auto const * lptr : tryfirst) {
448                 if (match(code, *lptr) == ExactMatch)
449                         return lptr;
450         }
451
452         // 2/ approximate match with tryfirst list
453         for (auto const * lptr : tryfirst) {
454                 Match const m = match(code, *lptr);
455                 if (m == ApproximateMatch || m == VeryApproximateMatch)
456                         return lptr;
457         }
458
459         // 3/ stricter match in all languages
460         return getFromCode(code);
461
462         LYXERR0("Unknown language `" << code << "'");
463         return 0;
464 }
465
466
467 void Languages::readLayoutTranslations(support::FileName const & filename)
468 {
469         Lexer lex;
470         lex.setFile(filename);
471         lex.setContext("Languages::read");
472
473         // 1) read all translations (exact and approximate matches) into trans
474         std::map<string, Language::TranslationMap> trans;
475         while (lex.isOK()) {
476                 if (!lex.checkFor("Translation")) {
477                         if (lex.isOK())
478                                 lex.printError("Unknown layout translation tag `$$Token'");
479                         break;
480                 }
481                 if (!lex.next(true))
482                         break;
483                 string const code = lex.getString();
484                 bool found = getFromCode(code);
485                 if (!found) {
486                         lex.printError("Unknown language `" + code + "'");
487                         break;
488                 }
489                 if (!readTranslations(lex, trans[code])) {
490                         lex.printError("Could not read layout translations for language `"
491                                 + code + "'");
492                         break;
493                 }
494         }
495
496         // 2) merge all translations into the languages
497         // exact translations overwrite approximate ones
498         for (auto & tr : trans) {
499                 for (auto & lang : languagelist_) {
500                         Match const m = match(tr.first, lang.second);
501                         if (m == NoMatch)
502                                 continue;
503                         lang.second.readLayoutTranslations(tr.second, m == ExactMatch);
504                 }
505         }
506
507 }
508
509
510 Language const * Languages::getLanguage(string const & language) const
511 {
512         if (language == "reset")
513                 return reset_language;
514         if (language == "ignore")
515                 return ignore_language;
516         const_iterator it = languagelist_.find(language);
517         return it == languagelist_.end() ? reset_language : &it->second;
518 }
519
520
521 } // namespace lyx