]> git.lyx.org Git - lyx.git/blob - src/Language.cpp
Avoid full metrics computation with Update:FitCursor
[lyx.git] / src / Language.cpp
1 /**
2  * \file Language.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author Jean-Marc Lasgouttes
8  * \author Jürgen Spitzmüller
9  * \author Dekel Tsur
10  *
11  * Full author contact details are available in file CREDITS.
12  */
13
14 #include <config.h>
15
16 #include "Language.h"
17
18 #include "BufferParams.h"
19 #include "Encoding.h"
20 #include "LaTeXFonts.h"
21 #include "LyXRC.h"
22
23 #include "support/debug.h"
24 #include "support/docstring.h"
25 #include "support/FileName.h"
26 #include "support/filetools.h"
27 #include "support/lassert.h"
28 #include "support/Lexer.h"
29 #include "support/lstrings.h"
30 #include "support/qstring_helpers.h"
31 #include "support/Messages.h"
32
33 #include <QLocale>
34 #include <QString>
35
36 using namespace std;
37 using namespace lyx::support;
38
39 namespace lyx {
40
41 Languages languages;
42 Language const * ignore_language = nullptr;
43 Language const * default_language = nullptr;
44 Language const * latex_language = nullptr;
45 Language const * reset_language = nullptr;
46
47
48 bool Language::isPolyglossiaExclusive() const
49 {
50         return babel().empty() && !polyglossia().empty() && required().empty();
51 }
52
53
54 bool Language::isBabelExclusive() const
55 {
56         return !babel().empty() && polyglossia().empty() && required().empty();
57 }
58
59
60 docstring const Language::translateLayout(string const & msg) const
61 {
62         if (msg.empty())
63                 return docstring();
64
65         if (!isAscii(msg)) {
66                 lyxerr << "Warning: not translating `" << msg
67                        << "' because it is not pure ASCII.\n";
68                 return from_utf8(msg);
69         }
70
71         TranslationMap::const_iterator it = layoutTranslations_.find(msg);
72         if (it != layoutTranslations_.end())
73                 return it->second;
74
75         docstring t = from_ascii(msg);
76         cleanTranslation(t);
77         return t;
78 }
79
80
81 string Language::fontenc(BufferParams const & params) const
82 {
83         // Don't use LaTeX fonts, so just return the language's preferred
84         // (although this is not used with nonTeXFonts anyway).
85         if (params.useNonTeXFonts)
86                 return fontenc_.front() == "ASCII" ? "T1" : fontenc_.front();
87
88         // Determine optimal font encoding
89         // We check whether the used rm font supports an encoding our language supports
90         LaTeXFont const & lf =
91                 theLaTeXFonts().getLaTeXFont(from_ascii(params.fontsRoman()));
92         vector<string> const & lfe = lf.fontencs();
93         for (auto & fe : fontenc_) {
94                 // ASCII means: support all T* encodings plus OT1
95                 if (fe == "ASCII") {
96                         for (auto & afe : lfe) {
97                                 if (afe == "OT1" || prefixIs(afe, "T"))
98                                         // we found a suitable one; return that.
99                                         return afe;
100                         }
101                 }
102                 // For other encodings, just check whether the font supports it
103                 if (lf.hasFontenc(fe))
104                         return fe;
105         }
106         // We did not find a suitable one; just take the first in the list,
107         // the priorized one (which is "T1" for ASCII).
108         return fontenc_.front() == "ASCII" ? "T1" : fontenc_.front();
109 }
110
111
112 string Language::dateFormat(size_t i) const
113 {
114         if (i > dateformats_.size())
115                 return string();
116         return dateformats_.at(i);
117 }
118
119
120 docstring Language::decimalSeparator() const
121 {
122         if (lyxrc.default_decimal_sep == "locale") {
123                 QLocale loc = QLocale(toqstr(code()));
124                 return qstring_to_ucs4(QString(loc.decimalPoint()));
125         }
126         return from_utf8(lyxrc.default_decimal_sep);
127 }
128
129
130 bool Language::readLanguage(Lexer & lex)
131 {
132         enum LanguageTags {
133                 LA_BABELNAME = 1,
134                 LA_DATEFORMATS,
135                 LA_ENCODING,
136                 LA_END,
137                 LA_FONTENC,
138                 LA_GUINAME,
139                 LA_HAS_GUI_SUPPORT,
140                 LA_INTERNAL_ENC,
141                 LA_LANG_CODE,
142                 LA_LANG_VARIETY,
143                 LA_POLYGLOSSIANAME,
144                 LA_POLYGLOSSIAOPTS,
145                 LA_XINDYNAME,
146                 LA_POSTBABELPREAMBLE,
147                 LA_PREBABELPREAMBLE,
148                 LA_PROVIDES,
149                 LA_REQUIRES,
150                 LA_QUOTESTYLE,
151                 LA_RTL,
152                 LA_WORDWRAP,
153                 LA_ACTIVECHARS
154         };
155
156         // Keep these sorted alphabetically!
157         LexerKeyword languageTags[] = {
158                 { "activechars",          LA_ACTIVECHARS },
159                 { "babelname",            LA_BABELNAME },
160                 { "dateformats",          LA_DATEFORMATS },
161                 { "encoding",             LA_ENCODING },
162                 { "end",                  LA_END },
163                 { "fontencoding",         LA_FONTENC },
164                 { "guiname",              LA_GUINAME },
165                 { "hasguisupport",        LA_HAS_GUI_SUPPORT },
166                 { "internalencoding",     LA_INTERNAL_ENC },
167                 { "langcode",             LA_LANG_CODE },
168                 { "langvariety",          LA_LANG_VARIETY },
169                 { "polyglossianame",      LA_POLYGLOSSIANAME },
170                 { "polyglossiaopts",      LA_POLYGLOSSIAOPTS },
171                 { "postbabelpreamble",    LA_POSTBABELPREAMBLE },
172                 { "prebabelpreamble",     LA_PREBABELPREAMBLE },
173                 { "provides",             LA_PROVIDES },
174                 { "quotestyle",           LA_QUOTESTYLE },
175                 { "requires",             LA_REQUIRES },
176                 { "rtl",                  LA_RTL },
177                 { "wordwrap",             LA_WORDWRAP },
178                 { "xindyname",            LA_XINDYNAME }
179         };
180
181         bool error = false;
182         bool finished = false;
183         lex.pushTable(languageTags);
184         // parse style section
185         while (!finished && lex.isOK() && !error) {
186                 int le = lex.lex();
187                 // See comment in LyXRC.cpp.
188                 switch (le) {
189                 case Lexer::LEX_FEOF:
190                         continue;
191
192                 case Lexer::LEX_UNDEF: // parse error
193                         lex.printError("Unknown language tag `$$Token'");
194                         error = true;
195                         continue;
196
197                 default:
198                         break;
199                 }
200                 switch (static_cast<LanguageTags>(le)) {
201                 case LA_END: // end of structure
202                         finished = true;
203                         break;
204                 case LA_BABELNAME:
205                         lex >> babel_;
206                         break;
207                 case LA_POLYGLOSSIANAME:
208                         lex >> polyglossia_name_;
209                         break;
210                 case LA_POLYGLOSSIAOPTS:
211                         lex >> polyglossia_opts_;
212                         break;
213                 case LA_XINDYNAME:
214                         lex >> xindy_;
215                         break;
216                 case LA_QUOTESTYLE:
217                         lex >> quote_style_;
218                         break;
219                 case LA_ACTIVECHARS:
220                         lex >> active_chars_;
221                         break;
222                 case LA_ENCODING:
223                         lex >> encodingStr_;
224                         break;
225                 case LA_FONTENC: {
226                         lex.eatLine();
227                         vector<string> const fe =
228                                 getVectorFromString(lex.getString(true), "|");
229                         fontenc_.insert(fontenc_.end(), fe.begin(), fe.end());
230                         break;
231                 }
232                 case LA_DATEFORMATS: {
233                         lex.eatLine();
234                         vector<string> const df =
235                                 getVectorFromString(trim(lex.getString(true), "\""), "|");
236                         dateformats_.insert(dateformats_.end(), df.begin(), df.end());
237                         break;
238                 }
239                 case LA_GUINAME:
240                         lex >> display_;
241                         break;
242                 case LA_HAS_GUI_SUPPORT:
243                         lex >> has_gui_support_;
244                         break;
245                 case LA_INTERNAL_ENC:
246                         lex >> internal_enc_;
247                         break;
248                 case LA_LANG_CODE:
249                         lex >> code_;
250                         break;
251                 case LA_LANG_VARIETY:
252                         lex >> variety_;
253                         break;
254                 case LA_POSTBABELPREAMBLE:
255                         babel_postsettings_ =
256                                 lex.getLongString(from_ascii("EndPostBabelPreamble"));
257                         break;
258                 case LA_PREBABELPREAMBLE:
259                         babel_presettings_ =
260                                 lex.getLongString(from_ascii("EndPreBabelPreamble"));
261                         break;
262                 case LA_REQUIRES:
263                         lex >> required_;
264                         break;
265                 case LA_PROVIDES:
266                         lex >> provides_;
267                         break;
268                 case LA_RTL:
269                         lex >> rightToLeft_;
270                         break;
271                 case LA_WORDWRAP:
272                         lex >> word_wrap_;
273                         break;
274                 }
275         }
276         lex.popTable();
277         return finished && !error;
278 }
279
280
281 bool Language::read(Lexer & lex)
282 {
283         encoding_ = nullptr;
284         internal_enc_ = false;
285         rightToLeft_ = false;
286
287         if (!lex.next()) {
288                 lex.printError("No name given for language: `$$Token'.");
289                 return false;
290         }
291
292         lang_ = lex.getString();
293         LYXERR(Debug::INFO, "Reading language " << lang_);
294         if (!readLanguage(lex)) {
295                 LYXERR0("Error parsing language `" << lang_ << '\'');
296                 return false;
297         }
298
299         encoding_ = encodings.fromLyXName(encodingStr_);
300         if (!encoding_ && !encodingStr_.empty()) {
301                 encoding_ = encodings.fromLyXName("iso8859-1");
302                 LYXERR0("Unknown encoding " << encodingStr_);
303         }
304         if (fontenc_.empty())
305                 fontenc_.push_back("ASCII");
306         if (dateformats_.empty()) {
307                 dateformats_.push_back("MMMM dd, yyyy");
308                 dateformats_.push_back("MMM dd, yyyy");
309                 dateformats_.push_back("M/d/yyyy");
310         }
311         return true;
312 }
313
314
315 void Language::readLayoutTranslations(Language::TranslationMap const & trans, bool replace)
316 {
317         for (auto const & t : trans) {
318                 if (replace
319                     || layoutTranslations_.find(t.first) == layoutTranslations_.end())
320                         layoutTranslations_[t.first] = t.second;
321         }
322 }
323
324
325 void Languages::read(FileName const & filename)
326 {
327         Lexer lex;
328         lex.setFile(filename);
329         lex.setContext("Languages::read");
330         while (lex.isOK()) {
331                 int le = lex.lex();
332                 switch (le) {
333                 case Lexer::LEX_FEOF:
334                         continue;
335
336                 default:
337                         break;
338                 }
339                 if (lex.getString() != "Language") {
340                         lex.printError("Unknown Language tag `$$Token'");
341                         continue;
342                 }
343                 Language l;
344                 l.read(lex);
345                 if (!lex)
346                         break;
347                 if (l.lang() == "latex") {
348                         // Check if latex language was not already defined.
349                         LASSERT(latex_language == nullptr, continue);
350                         static const Language latex_lang = l;
351                         latex_language = &latex_lang;
352                 } else if (l.lang() == "ignore") {
353                         // Check if ignore language was not already defined.
354                         LASSERT(ignore_language == nullptr, continue);
355                         static const Language ignore_lang = l;
356                         ignore_language = &ignore_lang;
357                 } else
358                         languagelist_[l.lang()] = l;
359         }
360
361         default_language = getLanguage("english");
362         if (!default_language) {
363                 LYXERR0("Default language \"english\" not found!");
364                 default_language = &(*languagelist_.begin()).second;
365                 LYXERR0("Using \"" << default_language->lang() << "\" instead!");
366         }
367
368         // Read layout translations
369         FileName const path = libFileSearch(string(), "layouttranslations");
370         readLayoutTranslations(path);
371 }
372
373
374 namespace {
375
376 bool readTranslations(Lexer & lex, Language::TranslationMap & trans)
377 {
378         while (lex.isOK()) {
379                 if (lex.checkFor("End"))
380                         break;
381                 if (!lex.next(true))
382                         return false;
383                 string const key = lex.getString();
384                 if (!lex.next(true))
385                         return false;
386                 docstring const val = lex.getDocString();
387                 trans[key] = val;
388         }
389         return true;
390 }
391
392
393 enum Match {
394         NoMatch,
395         ApproximateMatch,
396         VeryApproximateMatch,
397         ExactMatch
398 };
399
400
401 Match match(string const & code, Language const & lang)
402 {
403         // we need to mimic gettext: code can be a two-letter code, which
404         // should match all variants, e.g. "de" should match "de_DE",
405         // "de_AT" etc.
406         // special case for chinese:
407         // simplified  => code == "zh_CN", langcode == "zh_CN"
408         // traditional => code == "zh_TW", langcode == "zh_CN"
409         string const variety = lang.variety();
410         string const langcode = variety.empty() ?
411                                 lang.code() : lang.code() + '_' + variety;
412         string const name = lang.lang();
413         if ((code == langcode && name != "chinese-traditional")
414                 || (code == "zh_TW"  && name == "chinese-traditional"))
415                 return ExactMatch;
416         if ((code.size() == 2) && (langcode.size() > 2)
417                 && (code + '_' == langcode.substr(0, 3)))
418                 return ApproximateMatch;
419         if (code.substr(0,2) == langcode.substr(0,2))
420                 return VeryApproximateMatch;
421         return NoMatch;
422 }
423
424 } // namespace
425
426
427
428 Language const * Languages::getFromCode(string const & code) const
429 {
430         // 1/ exact match with any known language
431         for (auto const & l : languagelist_) {
432                 if (match(code, l.second) == ExactMatch)
433                         return &l.second;
434         }
435
436         // 2/ approximate with any known language
437         for (auto const & l : languagelist_) {
438                 if (match(code, l.second) == ApproximateMatch)
439                         return &l.second;
440         }
441         return nullptr;
442 }
443
444
445 Language const * Languages::getFromCode(string const & code,
446                         set<Language const *> const & tryfirst) const
447 {
448         // 1/ exact match with tryfirst list
449         for (auto const * lptr : tryfirst) {
450                 if (match(code, *lptr) == ExactMatch)
451                         return lptr;
452         }
453
454         // 2/ approximate match with tryfirst list
455         for (auto const * lptr : tryfirst) {
456                 Match const m = match(code, *lptr);
457                 if (m == ApproximateMatch || m == VeryApproximateMatch)
458                         return lptr;
459         }
460
461         // 3/ stricter match in all languages
462         return getFromCode(code);
463
464         LYXERR0("Unknown language `" << code << "'");
465         return nullptr;
466 }
467
468
469 void Languages::readLayoutTranslations(support::FileName const & filename)
470 {
471         Lexer lex;
472         lex.setFile(filename);
473         lex.setContext("Languages::read");
474
475         // 1) read all translations (exact and approximate matches) into trans
476         std::map<string, Language::TranslationMap> trans;
477         while (lex.isOK()) {
478                 if (!lex.checkFor("Translation")) {
479                         if (lex.isOK())
480                                 lex.printError("Unknown layout translation tag `$$Token'");
481                         break;
482                 }
483                 if (!lex.next(true))
484                         break;
485                 string const code = lex.getString();
486                 bool found = getFromCode(code);
487                 if (!found) {
488                         lex.printError("Unknown language `" + code + "'");
489                         break;
490                 }
491                 if (!readTranslations(lex, trans[code])) {
492                         lex.printError("Could not read layout translations for language `"
493                                 + code + "'");
494                         break;
495                 }
496         }
497
498         // 2) merge all translations into the languages
499         // exact translations overwrite approximate ones
500         for (auto & tr : trans) {
501                 for (auto & lang : languagelist_) {
502                         Match const m = match(tr.first, lang.second);
503                         if (m == NoMatch)
504                                 continue;
505                         lang.second.readLayoutTranslations(tr.second, m == ExactMatch);
506                 }
507         }
508
509 }
510
511
512 Language const * Languages::getLanguage(string const & language) const
513 {
514         if (language == "reset")
515                 return reset_language;
516         if (language == "ignore")
517                 return ignore_language;
518         const_iterator it = languagelist_.find(language);
519         return it == languagelist_.end() ? reset_language : &it->second;
520 }
521
522
523 } // namespace lyx