Fix bug #11030.

[lyx.git] / src / Language.cpp
diff --git a/src/Language.cpp b/src/Language.cpp

index d89f7cafe9640f0cab877382048531e407a1f82a..884d1a039bb7078ba141013e693a6ba15b65843f 100644 (file)
--- a/src/Language.cpp
+++ b/src/Language.cpp
@@ -3,8 +3,9 @@
   * This file is part of LyX, the document processor.
   * Licence details can be found in the file COPYING.
   *
- * \author Lars Gullik Bjønnes
+ * \author Lars Gullik Bjønnes
   * \author Jean-Marc Lasgouttes
+ * \author Jürgen Spitzmüller
   * \author Dekel Tsur
   *
   * Full author contact details are available in file CREDITS.
@@ -14,96 +15,508 @@
  
  #include "Language.h"
  
-#include "debug.h"
+#include "BufferParams.h"
  #include "Encoding.h"
-#include "LyXLex.h"
+#include "LaTeXFonts.h"
+#include "Lexer.h"
  #include "LyXRC.h"
  
+#include "support/debug.h"
+#include "support/docstring.h"
  #include "support/FileName.h"
+#include "support/filetools.h"
+#include "support/lassert.h"
+#include "support/lstrings.h"
+#include "support/qstring_helpers.h"
+#include "support/Messages.h"
  
+#include <QLocale>
+#include <QString>
+
+using namespace std;
+using namespace lyx::support;
  
  namespace lyx {
  
-using std::endl;
-using std::string;
+Languages languages;
+Language const * ignore_language = nullptr;
+Language const * default_language = nullptr;
+Language const * latex_language = nullptr;
+Language const * reset_language = nullptr;
  
  
-Languages languages;
-Language const * english_language;
-Language const * default_language;
-Language ignore_lang("ignore", "ignore", "Ignore", false, "", 0, "ignore", "");
-Language const * ignore_language = &ignore_lang;
-Language latex_lang("latex", "latex", "Latex", false, "", 0, "latex", "");
-Language const * latex_language = &latex_lang;
+bool Language::isPolyglossiaExclusive() const
+{
+       return babel().empty() && !polyglossia().empty() && required().empty();
+}
+
+
+bool Language::isBabelExclusive() const
+{
+       return !babel().empty() && polyglossia().empty() && required().empty();
+}
+
+
+docstring const Language::translateLayout(string const & msg) const
+{
+       if (msg.empty())
+               return docstring();
+
+       if (!isAscii(msg)) {
+               lyxerr << "Warning: not translating `" << msg
+                      << "' because it is not pure ASCII.\n";
+               return from_utf8(msg);
+       }
+
+       TranslationMap::const_iterator it = layoutTranslations_.find(msg);
+       if (it != layoutTranslations_.end())
+               return it->second;
+
+       docstring t = from_ascii(msg);
+       cleanTranslation(t);
+       return t;
+}
+
+
+string Language::fontenc(BufferParams const & params) const
+{
+       // Don't use LaTeX fonts, so just return the language's preferred
+       // (although this is not used with nonTeXFonts anyway).
+       if (params.useNonTeXFonts)
+               return fontenc_.front() == "ASCII" ? "T1" : fontenc_.front();
+
+       // Determine optimal font encoding
+       // We check whether the used rm font supports an encoding our language supports
+       LaTeXFont const & lf =
+               theLaTeXFonts().getLaTeXFont(from_ascii(params.fontsRoman()));
+       vector<string> const & lfe = lf.fontencs();
+       for (auto & fe : fontenc_) {
+               // ASCII means: support all T* encodings plus OT1
+               if (fe == "ASCII") {
+                       for (auto & afe : lfe) {
+                               if (afe == "OT1" || prefixIs(afe, "T"))
+                                       // we found a suitable one; return that.
+                                       return afe;
+                       }
+               }
+               // For other encodings, just check whether the font supports it
+               if (lf.hasFontenc(fe))
+                       return fe;
+       }
+       // We did not find a suitable one; just take the first in the list,
+       // the priorized one (which is "T1" for ASCII).
+       return fontenc_.front() == "ASCII" ? "T1" : fontenc_.front();
+}
+
+
+string Language::dateFormat(size_t i) const
+{
+       if (i > dateformats_.size())
+               return string();
+       return dateformats_.at(i);
+}
+
+
+docstring Language::decimalSeparator() const
+{
+       if (lyxrc.default_decimal_sep == "locale") {
+               QLocale loc = QLocale(toqstr(code()));
+               return qstring_to_ucs4(QString(loc.decimalPoint()));
+       }
+       return from_utf8(lyxrc.default_decimal_sep);
+}
+
+
+bool Language::readLanguage(Lexer & lex)
+{
+       enum LanguageTags {
+               LA_BABELNAME = 1,
+               LA_DATEFORMATS,
+               LA_ENCODING,
+               LA_END,
+               LA_FONTENC,
+               LA_GUINAME,
+               LA_HAS_GUI_SUPPORT,
+               LA_INTERNAL_ENC,
+               LA_LANG_CODE,
+               LA_LANG_VARIETY,
+               LA_POLYGLOSSIANAME,
+               LA_POLYGLOSSIAOPTS,
+               LA_XINDYNAME,
+               LA_POSTBABELPREAMBLE,
+               LA_PREBABELPREAMBLE,
+               LA_PROVIDES,
+               LA_REQUIRES,
+               LA_QUOTESTYLE,
+               LA_RTL,
+               LA_WORDWRAP,
+               LA_ACTIVECHARS
+       };
+
+       // Keep these sorted alphabetically!
+       LexerKeyword languageTags[] = {
+               { "activechars",          LA_ACTIVECHARS },
+               { "babelname",            LA_BABELNAME },
+               { "dateformats",          LA_DATEFORMATS },
+               { "encoding",             LA_ENCODING },
+               { "end",                  LA_END },
+               { "fontencoding",         LA_FONTENC },
+               { "guiname",              LA_GUINAME },
+               { "hasguisupport",        LA_HAS_GUI_SUPPORT },
+               { "internalencoding",     LA_INTERNAL_ENC },
+               { "langcode",             LA_LANG_CODE },
+               { "langvariety",          LA_LANG_VARIETY },
+               { "polyglossianame",      LA_POLYGLOSSIANAME },
+               { "polyglossiaopts",      LA_POLYGLOSSIAOPTS },
+               { "postbabelpreamble",    LA_POSTBABELPREAMBLE },
+               { "prebabelpreamble",     LA_PREBABELPREAMBLE },
+               { "provides",             LA_PROVIDES },
+               { "quotestyle",           LA_QUOTESTYLE },
+               { "requires",             LA_REQUIRES },
+               { "rtl",                  LA_RTL },
+               { "wordwrap",             LA_WORDWRAP },
+               { "xindyname",            LA_XINDYNAME }
+       };
+
+       bool error = false;
+       bool finished = false;
+       lex.pushTable(languageTags);
+       // parse style section
+       while (!finished && lex.isOK() && !error) {
+               int le = lex.lex();
+               // See comment in LyXRC.cpp.
+               switch (le) {
+               case Lexer::LEX_FEOF:
+                       continue;
+
+               case Lexer::LEX_UNDEF: // parse error
+                       lex.printError("Unknown language tag `$$Token'");
+                       error = true;
+                       continue;
+
+               default:
+                       break;
+               }
+               switch (static_cast<LanguageTags>(le)) {
+               case LA_END: // end of structure
+                       finished = true;
+                       break;
+               case LA_BABELNAME:
+                       lex >> babel_;
+                       break;
+               case LA_POLYGLOSSIANAME:
+                       lex >> polyglossia_name_;
+                       break;
+               case LA_POLYGLOSSIAOPTS:
+                       lex >> polyglossia_opts_;
+                       break;
+               case LA_XINDYNAME:
+                       lex >> xindy_;
+                       break;
+               case LA_QUOTESTYLE:
+                       lex >> quote_style_;
+                       break;
+               case LA_ACTIVECHARS:
+                       lex >> active_chars_;
+                       break;
+               case LA_ENCODING:
+                       lex >> encodingStr_;
+                       break;
+               case LA_FONTENC: {
+                       lex.eatLine();
+                       vector<string> const fe =
+                               getVectorFromString(lex.getString(true), "|");
+                       fontenc_.insert(fontenc_.end(), fe.begin(), fe.end());
+                       break;
+               }
+               case LA_DATEFORMATS: {
+                       lex.eatLine();
+                       vector<string> const df =
+                               getVectorFromString(trim(lex.getString(true), "\""), "|");
+                       dateformats_.insert(dateformats_.end(), df.begin(), df.end());
+                       break;
+               }
+               case LA_GUINAME:
+                       lex >> display_;
+                       break;
+               case LA_HAS_GUI_SUPPORT:
+                       lex >> has_gui_support_;
+                       break;
+               case LA_INTERNAL_ENC:
+                       lex >> internal_enc_;
+                       break;
+               case LA_LANG_CODE:
+                       lex >> code_;
+                       break;
+               case LA_LANG_VARIETY:
+                       lex >> variety_;
+                       break;
+               case LA_POSTBABELPREAMBLE:
+                       babel_postsettings_ =
+                               lex.getLongString(from_ascii("EndPostBabelPreamble"));
+                       break;
+               case LA_PREBABELPREAMBLE:
+                       babel_presettings_ =
+                               lex.getLongString(from_ascii("EndPreBabelPreamble"));
+                       break;
+               case LA_REQUIRES:
+                       lex >> required_;
+                       break;
+               case LA_PROVIDES:
+                       lex >> provides_;
+                       break;
+               case LA_RTL:
+                       lex >> rightToLeft_;
+                       break;
+               case LA_WORDWRAP:
+                       lex >> word_wrap_;
+                       break;
+               }
+       }
+       lex.popTable();
+       return finished && !error;
+}
+
+
+bool Language::read(Lexer & lex)
+{
+       encoding_ = nullptr;
+       internal_enc_ = false;
+       rightToLeft_ = false;
+
+       if (!lex.next()) {
+               lex.printError("No name given for language: `$$Token'.");
+               return false;
+       }
+
+       lang_ = lex.getString();
+       LYXERR(Debug::INFO, "Reading language " << lang_);
+       if (!readLanguage(lex)) {
+               LYXERR0("Error parsing language `" << lang_ << '\'');
+               return false;
+       }
+
+       encoding_ = encodings.fromLyXName(encodingStr_);
+       if (!encoding_ && !encodingStr_.empty()) {
+               encoding_ = encodings.fromLyXName("iso8859-1");
+               LYXERR0("Unknown encoding " << encodingStr_);
+       }
+       if (fontenc_.empty())
+               fontenc_.push_back("ASCII");
+       if (dateformats_.empty()) {
+               dateformats_.push_back("MMMM dd, yyyy");
+               dateformats_.push_back("MMM dd, yyyy");
+               dateformats_.push_back("M/d/yyyy");
+       }
+       return true;
+}
  
  
-void Languages::read(support::FileName const & filename)
+void Language::readLayoutTranslations(Language::TranslationMap const & trans, bool replace)
  {
-       // We need to set the encoding of latex_lang
-       latex_lang = Language("latex", "latex", "Latex", false, "iso8859-1",
-                             encodings.getFromLyXName("iso8859-1"),
-                             "latex", "");
+       for (auto const & t : trans) {
+               if (replace
+                   || layoutTranslations_.find(t.first) == layoutTranslations_.end())
+                       layoutTranslations_[t.first] = t.second;
+       }
+}
+
  
-       LyXLex lex(0, 0);
+void Languages::read(FileName const & filename)
+{
+       Lexer lex;
         lex.setFile(filename);
+       lex.setContext("Languages::read");
         while (lex.isOK()) {
-               string lang;
-               string babel;
-               string display;
-               string encoding_str;
-               string code;
-               string latex_options;
-               bool rtl = false;
-
-               if (lex.next())
-                       lang = lex.getString();
-               else
-                       break;
-               LYXERR(Debug::INFO) << "Reading language " << lang << endl;
-
-               if (lex.next())
-                       babel = lex.getString();
-               if (lex.next())
-                       display = lex.getString();
-               if (lex.next())
-                       rtl = lex.getBool();
-               if (lex.next())
-                       encoding_str = lex.getString();
-               if (lex.next())
-                       code = lex.getString();
-               if (lex.next())
-                       latex_options = lex.getString();
-
-               Encoding const * encoding = encodings.getFromLyXName(encoding_str);
-               if (!encoding) {
-                       encoding = encodings.getFromLyXName("iso8859-1");
-                       lyxerr << "Unknown encoding " << encoding_str << endl;
-               }
+               int le = lex.lex();
+               switch (le) {
+               case Lexer::LEX_FEOF:
+                       continue;
  
-               languagelist[lang] = Language(lang, babel, display, rtl,
-                                             encoding_str, encoding, code, latex_options);
+               default:
+                       break;
+               }
+               if (lex.getString() != "Language") {
+                       lex.printError("Unknown Language tag `$$Token'");
+                       continue;
+               }
+               Language l;
+               l.read(lex);
+               if (!lex)
+                       break;
+               if (l.lang() == "latex") {
+                       // Check if latex language was not already defined.
+                       LASSERT(latex_language == nullptr, continue);
+                       static const Language latex_lang = l;
+                       latex_language = &latex_lang;
+               } else if (l.lang() == "ignore") {
+                       // Check if ignore language was not already defined.
+                       LASSERT(ignore_language == nullptr, continue);
+                       static const Language ignore_lang = l;
+                       ignore_language = &ignore_lang;
+               } else
+                       languagelist_[l.lang()] = l;
         }
  
-       default_language = getLanguage(lyxrc.default_language);
+       default_language = getLanguage("english");
         if (!default_language) {
-               lyxerr << "Default language \"" << lyxrc.default_language
-                      << "\" not found!" << endl;
-               default_language = getLanguage("english");
-               if (!default_language)
-                       default_language = &(*languagelist.begin()).second;
-               lyxerr << "Using \"" << default_language->lang()
-                      << "\" instead!" << endl;
+               LYXERR0("Default language \"english\" not found!");
+               default_language = &(*languagelist_.begin()).second;
+               LYXERR0("Using \"" << default_language->lang() << "\" instead!");
+       }
+
+       // Read layout translations
+       FileName const path = libFileSearch(string(), "layouttranslations");
+       readLayoutTranslations(path);
+}
+
+
+namespace {
+
+bool readTranslations(Lexer & lex, Language::TranslationMap & trans)
+{
+       while (lex.isOK()) {
+               if (lex.checkFor("End"))
+                       break;
+               if (!lex.next(true))
+                       return false;
+               string const key = lex.getString();
+               if (!lex.next(true))
+                       return false;
+               docstring const val = lex.getDocString();
+               trans[key] = val;
+       }
+       return true;
+}
+
+
+enum Match {
+       NoMatch,
+       ApproximateMatch,
+       VeryApproximateMatch,
+       ExactMatch
+};
+
+
+Match match(string const & code, Language const & lang)
+{
+       // we need to mimic gettext: code can be a two-letter code, which
+       // should match all variants, e.g. "de" should match "de_DE",
+       // "de_AT" etc.
+       // special case for chinese:
+       // simplified  => code == "zh_CN", langcode == "zh_CN"
+       // traditional => code == "zh_TW", langcode == "zh_CN"
+       string const variety = lang.variety();
+       string const langcode = variety.empty() ?
+                               lang.code() : lang.code() + '_' + variety;
+       string const name = lang.lang();
+       if ((code == langcode && name != "chinese-traditional")
+               || (code == "zh_TW"  && name == "chinese-traditional"))
+               return ExactMatch;
+       if ((code.size() == 2) && (langcode.size() > 2)
+               && (code + '_' == langcode.substr(0, 3)))
+               return ApproximateMatch;
+       if (code.substr(0,2) == langcode.substr(0,2))
+               return VeryApproximateMatch;
+       return NoMatch;
+}
+
+} // namespace
+
+
+
+Language const * Languages::getFromCode(string const & code) const
+{
+       // 1/ exact match with any known language
+       for (auto const & l : languagelist_) {
+               if (match(code, l.second) == ExactMatch)
+                       return &l.second;
+       }
+
+       // 2/ approximate with any known language
+       for (auto const & l : languagelist_) {
+               if (match(code, l.second) == ApproximateMatch)
+                       return &l.second;
+       }
+       return nullptr;
+}
+
+
+Language const * Languages::getFromCode(string const & code,
+                       set<Language const *> const & tryfirst) const
+{
+       // 1/ exact match with tryfirst list
+       for (auto const * lptr : tryfirst) {
+               if (match(code, *lptr) == ExactMatch)
+                       return lptr;
+       }
+
+       // 2/ approximate match with tryfirst list
+       for (auto const * lptr : tryfirst) {
+               Match const m = match(code, *lptr);
+               if (m == ApproximateMatch || m == VeryApproximateMatch)
+                       return lptr;
         }
-       english_language = getLanguage("english");
-       if (!english_language)
-               english_language = default_language;
+
+       // 3/ stricter match in all languages
+       return getFromCode(code);
+
+       LYXERR0("Unknown language `" << code << "'");
+       return nullptr;
+}
+
+
+void Languages::readLayoutTranslations(support::FileName const & filename)
+{
+       Lexer lex;
+       lex.setFile(filename);
+       lex.setContext("Languages::read");
+
+       // 1) read all translations (exact and approximate matches) into trans
+       std::map<string, Language::TranslationMap> trans;
+       while (lex.isOK()) {
+               if (!lex.checkFor("Translation")) {
+                       if (lex.isOK())
+                               lex.printError("Unknown layout translation tag `$$Token'");
+                       break;
+               }
+               if (!lex.next(true))
+                       break;
+               string const code = lex.getString();
+               bool found = getFromCode(code);
+               if (!found) {
+                       lex.printError("Unknown language `" + code + "'");
+                       break;
+               }
+               if (!readTranslations(lex, trans[code])) {
+                       lex.printError("Could not read layout translations for language `"
+                               + code + "'");
+                       break;
+               }
+       }
+
+       // 2) merge all translations into the languages
+       // exact translations overwrite approximate ones
+       for (auto & tr : trans) {
+               for (auto & lang : languagelist_) {
+                       Match const m = match(tr.first, lang.second);
+                       if (m == NoMatch)
+                               continue;
+                       lang.second.readLayoutTranslations(tr.second, m == ExactMatch);
+               }
+       }
+
  }
  
  
  Language const * Languages::getLanguage(string const & language) const
  {
-       const_iterator it = languagelist.find(language);
-       return it == languagelist.end() ? 0 : &it->second;
+       if (language == "reset")
+               return reset_language;
+       if (language == "ignore")
+               return ignore_language;
+       const_iterator it = languagelist_.find(language);
+       return it == languagelist_.end() ? reset_language : &it->second;
  }