]> git.lyx.org Git - lyx.git/blob - src/Language.cpp
Set language to OS input language when moving cursor
[lyx.git] / src / Language.cpp
1 /**
2  * \file Language.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author Jean-Marc Lasgouttes
8  * \author Jürgen Spitzmüller
9  * \author Dekel Tsur
10  *
11  * Full author contact details are available in file CREDITS.
12  */
13
14 #include <config.h>
15
16 #include "Language.h"
17
18 #include "Encoding.h"
19 #include "LaTeXFonts.h"
20 #include "Lexer.h"
21 #include "LyXRC.h"
22
23 #include "support/debug.h"
24 #include "support/FileName.h"
25 #include "support/filetools.h"
26 #include "support/lassert.h"
27 #include "support/lstrings.h"
28 #include "support/Messages.h"
29
30 using namespace std;
31 using namespace lyx::support;
32
33 namespace lyx {
34
35 Languages languages;
36 Language const * ignore_language = 0;
37 Language const * default_language = 0;
38 Language const * latex_language = 0;
39 Language const * reset_language = 0;
40
41
42 bool Language::isPolyglossiaExclusive() const
43 {
44         return babel().empty() && !polyglossia().empty() && requires().empty();
45 }
46
47
48 bool Language::isBabelExclusive() const
49 {
50         return !babel().empty() && polyglossia().empty() && requires().empty();
51 }
52
53
54 docstring const Language::translateLayout(string const & m) const
55 {
56         if (m.empty())
57                 return docstring();
58
59         if (!isAscii(m)) {
60                 lyxerr << "Warning: not translating `" << m
61                        << "' because it is not pure ASCII.\n";
62                 return from_utf8(m);
63         }
64
65         TranslationMap::const_iterator it = layoutTranslations_.find(m);
66         if (it != layoutTranslations_.end())
67                 return it->second;
68
69         docstring t = from_ascii(m);
70         cleanTranslation(t);
71         return t;
72 }
73
74
75 string Language::fontenc(BufferParams const & params) const
76 {
77         // Don't use LaTeX fonts, so just return the language's preferred
78         // (although this is not used with nonTeXFonts anyway).
79         if (params.useNonTeXFonts)
80                 return fontenc_.front() == "ASCII" ? "T1" : fontenc_.front();
81
82         // Determine optimal font encoding
83         // We check whether the used rm font supports an encoding our language supports
84         LaTeXFont const & lf =
85                 theLaTeXFonts().getLaTeXFont(from_ascii(params.fontsRoman()));
86         vector<string> const lfe = lf.fontencs();
87         for (auto & fe : fontenc_) {
88                 // ASCII means: support all T* encodings plus OT1
89                 if (fe == "ASCII") {
90                         for (auto & afe : lfe) {
91                                 if (afe == "OT1" || prefixIs(afe, "T"))
92                                         // we found a suitable one; return that.
93                                         return afe;
94                         }
95                 }
96                 // For other encodings, just check whether the font supports it
97                 if (lf.hasFontenc(fe))
98                         return fe;
99         }
100         // We did not find a suitable one; just take the first in the list,
101         // the priorized one (which is "T1" for ASCII).
102         return fontenc_.front() == "ASCII" ? "T1" : fontenc_.front();
103 }
104
105
106 string Language::dateFormat(size_t i) const
107 {
108         if (i > dateformats_.size())
109                 return string();
110         return dateformats_.at(i);
111 }
112
113
114 bool Language::readLanguage(Lexer & lex)
115 {
116         enum LanguageTags {
117                 LA_BABELNAME = 1,
118                 LA_DATEFORMATS,
119                 LA_ENCODING,
120                 LA_END,
121                 LA_FONTENC,
122                 LA_GUINAME,
123                 LA_HAS_GUI_SUPPORT,
124                 LA_INTERNAL_ENC,
125                 LA_LANG_CODE,
126                 LA_LANG_VARIETY,
127                 LA_POLYGLOSSIANAME,
128                 LA_POLYGLOSSIAOPTS,
129                 LA_POSTBABELPREAMBLE,
130                 LA_PREBABELPREAMBLE,
131                 LA_PROVIDES,
132                 LA_REQUIRES,
133                 LA_QUOTESTYLE,
134                 LA_RTL,
135                 LA_WORDWRAP
136         };
137
138         // Keep these sorted alphabetically!
139         LexerKeyword languageTags[] = {
140                 { "babelname",            LA_BABELNAME },
141                 { "dateformats",          LA_DATEFORMATS },
142                 { "encoding",             LA_ENCODING },
143                 { "end",                  LA_END },
144                 { "fontencoding",         LA_FONTENC },
145                 { "guiname",              LA_GUINAME },
146                 { "hasguisupport",        LA_HAS_GUI_SUPPORT },
147                 { "internalencoding",     LA_INTERNAL_ENC },
148                 { "langcode",             LA_LANG_CODE },
149                 { "langvariety",          LA_LANG_VARIETY },
150                 { "polyglossianame",      LA_POLYGLOSSIANAME },
151                 { "polyglossiaopts",      LA_POLYGLOSSIAOPTS },
152                 { "postbabelpreamble",    LA_POSTBABELPREAMBLE },
153                 { "prebabelpreamble",     LA_PREBABELPREAMBLE },
154                 { "provides",             LA_PROVIDES },
155                 { "quotestyle",           LA_QUOTESTYLE },
156                 { "requires",             LA_REQUIRES },
157                 { "rtl",                  LA_RTL },
158                 {"wordwrap",              LA_WORDWRAP }
159         };
160
161         bool error = false;
162         bool finished = false;
163         lex.pushTable(languageTags);
164         // parse style section
165         while (!finished && lex.isOK() && !error) {
166                 int le = lex.lex();
167                 // See comment in LyXRC.cpp.
168                 switch (le) {
169                 case Lexer::LEX_FEOF:
170                         continue;
171
172                 case Lexer::LEX_UNDEF: // parse error
173                         lex.printError("Unknown language tag `$$Token'");
174                         error = true;
175                         continue;
176
177                 default:
178                         break;
179                 }
180                 switch (static_cast<LanguageTags>(le)) {
181                 case LA_END: // end of structure
182                         finished = true;
183                         break;
184                 case LA_BABELNAME:
185                         lex >> babel_;
186                         break;
187                 case LA_POLYGLOSSIANAME:
188                         lex >> polyglossia_name_;
189                         break;
190                 case LA_POLYGLOSSIAOPTS:
191                         lex >> polyglossia_opts_;
192                         break;
193                 case LA_QUOTESTYLE:
194                         lex >> quote_style_;
195                         break;
196                 case LA_ENCODING:
197                         lex >> encodingStr_;
198                         break;
199                 case LA_FONTENC: {
200                         lex.eatLine();
201                         vector<string> const fe =
202                                 getVectorFromString(lex.getString(true), "|");
203                         fontenc_.insert(fontenc_.end(), fe.begin(), fe.end());
204                         break;
205                 }
206                 case LA_DATEFORMATS: {
207                         lex.eatLine();
208                         vector<string> const df =
209                                 getVectorFromString(trim(lex.getString(true), "\""), "|");
210                         dateformats_.insert(dateformats_.end(), df.begin(), df.end());
211                         break;
212                 }
213                 case LA_GUINAME:
214                         lex >> display_;
215                         break;
216                 case LA_HAS_GUI_SUPPORT:
217                         lex >> has_gui_support_;
218                         break;
219                 case LA_INTERNAL_ENC:
220                         lex >> internal_enc_;
221                         break;
222                 case LA_LANG_CODE:
223                         lex >> code_;
224                         break;
225                 case LA_LANG_VARIETY:
226                         lex >> variety_;
227                         break;
228                 case LA_POSTBABELPREAMBLE:
229                         babel_postsettings_ =
230                                 lex.getLongString(from_ascii("EndPostBabelPreamble"));
231                         break;
232                 case LA_PREBABELPREAMBLE:
233                         babel_presettings_ =
234                                 lex.getLongString(from_ascii("EndPreBabelPreamble"));
235                         break;
236                 case LA_REQUIRES:
237                         lex >> requires_;
238                         break;
239                 case LA_PROVIDES:
240                         lex >> provides_;
241                         break;
242                 case LA_RTL:
243                         lex >> rightToLeft_;
244                         break;
245                 case LA_WORDWRAP:
246                         lex >> word_wrap_;
247                         break;
248                 }
249         }
250         lex.popTable();
251         return finished && !error;
252 }
253
254
255 bool Language::read(Lexer & lex)
256 {
257         encoding_ = 0;
258         internal_enc_ = 0;
259         rightToLeft_ = 0;
260
261         if (!lex.next()) {
262                 lex.printError("No name given for language: `$$Token'.");
263                 return false;
264         }
265
266         lang_ = lex.getString();
267         LYXERR(Debug::INFO, "Reading language " << lang_);
268         if (!readLanguage(lex)) {
269                 LYXERR0("Error parsing language `" << lang_ << '\'');
270                 return false;
271         }
272
273         encoding_ = encodings.fromLyXName(encodingStr_);
274         if (!encoding_ && !encodingStr_.empty()) {
275                 encoding_ = encodings.fromLyXName("iso8859-1");
276                 LYXERR0("Unknown encoding " << encodingStr_);
277         }
278         if (fontenc_.empty())
279                 fontenc_.push_back("ASCII");
280         if (dateformats_.empty()) {
281                 dateformats_.push_back("MMMM dd, yyyy");
282                 dateformats_.push_back("MMM dd, yyyy");
283                 dateformats_.push_back("M/d/yyyy");
284         }
285         return true;
286 }
287
288
289 void Language::readLayoutTranslations(Language::TranslationMap const & trans, bool replace)
290 {
291         for (auto const & t : trans) {
292                 if (replace
293                     || layoutTranslations_.find(t.first) == layoutTranslations_.end())
294                         layoutTranslations_[t.first] = t.second;
295         }
296 }
297
298
299 void Languages::read(FileName const & filename)
300 {
301         Lexer lex;
302         lex.setFile(filename);
303         lex.setContext("Languages::read");
304         while (lex.isOK()) {
305                 int le = lex.lex();
306                 switch (le) {
307                 case Lexer::LEX_FEOF:
308                         continue;
309
310                 default:
311                         break;
312                 }
313                 if (lex.getString() != "Language") {
314                         lex.printError("Unknown Language tag `$$Token'");
315                         continue;
316                 }
317                 Language l;
318                 l.read(lex);
319                 if (!lex)
320                         break;
321                 if (l.lang() == "latex") {
322                         // Check if latex language was not already defined.
323                         LASSERT(latex_language == 0, continue);
324                         static const Language latex_lang = l;
325                         latex_language = &latex_lang;
326                 } else if (l.lang() == "ignore") {
327                         // Check if ignore language was not already defined.
328                         LASSERT(ignore_language == 0, continue);
329                         static const Language ignore_lang = l;
330                         ignore_language = &ignore_lang;
331                 } else
332                         languagelist_[l.lang()] = l;
333         }
334
335         default_language = getLanguage("english");
336         if (!default_language) {
337                 LYXERR0("Default language \"english\" not found!");
338                 default_language = &(*languagelist_.begin()).second;
339                 LYXERR0("Using \"" << default_language->lang() << "\" instead!");
340         }
341
342         // Read layout translations
343         FileName const path = libFileSearch(string(), "layouttranslations");
344         readLayoutTranslations(path);
345 }
346
347
348 namespace {
349
350 bool readTranslations(Lexer & lex, Language::TranslationMap & trans)
351 {
352         while (lex.isOK()) {
353                 if (lex.checkFor("End"))
354                         break;
355                 if (!lex.next(true))
356                         return false;
357                 string const key = lex.getString();
358                 if (!lex.next(true))
359                         return false;
360                 docstring const val = lex.getDocString();
361                 trans[key] = val;
362         }
363         return true;
364 }
365
366
367 enum Match {
368         NoMatch,
369         ApproximateMatch,
370         VeryApproximateMatch,
371         ExactMatch
372 };
373
374
375 Match match(string const & code, Language const & lang)
376 {
377         // we need to mimic gettext: code can be a two-letter code, which
378         // should match all variants, e.g. "de" should match "de_DE",
379         // "de_AT" etc.
380         // special case for chinese:
381         // simplified  => code == "zh_CN", langcode == "zh_CN"
382         // traditional => code == "zh_TW", langcode == "zh_CN"
383         string const variety = lang.variety();
384         string const langcode = variety.empty() ?
385                                 lang.code() : lang.code() + '_' + variety;
386         string const name = lang.lang();
387         if ((code == langcode && name != "chinese-traditional")
388                 || (code == "zh_TW"  && name == "chinese-traditional"))
389                 return ExactMatch;
390         if ((code.size() == 2) && (langcode.size() > 2)
391                 && (code + '_' == langcode.substr(0, 3)))
392                 return ApproximateMatch;
393         if (code.substr(0,2) == langcode.substr(0,2))
394                 return VeryApproximateMatch;
395         return NoMatch;
396 }
397
398 } // namespace
399
400
401
402 Language const * Languages::getFromCode(string const & code) const
403 {
404         // 1/ exact match with any known language
405         for (auto const & l : languagelist_) {
406                 if (match(code, l.second) == ExactMatch)
407                         return &l.second;
408         }
409
410         // 2/ approximate with any known language
411         for (auto const & l : languagelist_) {
412                 if (match(code, l.second) == ApproximateMatch)
413                         return &l.second;
414         }
415         return 0;
416 }
417
418
419 Language const * Languages::getFromCode(string const & code,
420                         set<Language const *> const & tryfirst) const
421 {
422         // 1/ exact match with tryfirst list
423         for (auto const * lptr : tryfirst) {
424                 if (match(code, *lptr) == ExactMatch)
425                         return lptr;
426         }
427
428         // 2/ approximate match with tryfirst list
429         for (auto const * lptr : tryfirst) {
430                 Match const m = match(code, *lptr);
431                 if (m == ApproximateMatch || m == VeryApproximateMatch)
432                         return lptr;
433         }
434
435         // 3/ stricter match in all languages
436         return getFromCode(code);
437
438         LYXERR0("Unknown language `" << code << "'");
439         return 0;
440 }
441
442
443 void Languages::readLayoutTranslations(support::FileName const & filename)
444 {
445         Lexer lex;
446         lex.setFile(filename);
447         lex.setContext("Languages::read");
448
449         // 1) read all translations (exact and approximate matches) into trans
450         std::map<string, Language::TranslationMap> trans;
451         while (lex.isOK()) {
452                 if (!lex.checkFor("Translation")) {
453                         if (lex.isOK())
454                                 lex.printError("Unknown layout translation tag `$$Token'");
455                         break;
456                 }
457                 if (!lex.next(true))
458                         break;
459                 string const code = lex.getString();
460                 bool found = getFromCode(code);
461                 if (!found) {
462                         lex.printError("Unknown language `" + code + "'");
463                         break;
464                 }
465                 if (!readTranslations(lex, trans[code])) {
466                         lex.printError("Could not read layout translations for language `"
467                                 + code + "'");
468                         break;
469                 }
470         }
471
472         // 2) merge all translations into the languages
473         // exact translations overwrite approximate ones
474         for (auto & tr : trans) {
475                 for (auto & lang : languagelist_) {
476                         Match const m = match(tr.first, lang.second);
477                         if (m == NoMatch)
478                                 continue;
479                         lang.second.readLayoutTranslations(tr.second, m == ExactMatch);
480                 }
481         }
482
483 }
484
485
486 Language const * Languages::getLanguage(string const & language) const
487 {
488         if (language == "reset")
489                 return reset_language;
490         if (language == "ignore")
491                 return ignore_language;
492         const_iterator it = languagelist_.find(language);
493         return it == languagelist_.end() ? reset_language : &it->second;
494 }
495
496
497 } // namespace lyx