Implement native reading of mo files.

author Jean-Marc Lasgouttes <lasgouttes@lyx.org>

Wed, 8 May 2013 16:50:38 +0000 (18:50 +0200)

committer Vincent van Ravesteijn <vfr@lyx.org>

Thu, 30 May 2013 20:10:01 +0000 (22:10 +0200)
author Jean-Marc Lasgouttes <lasgouttes@lyx.org>
Wed, 8 May 2013 16:50:38 +0000 (18:50 +0200)
committer Vincent van Ravesteijn <vfr@lyx.org>
Thu, 30 May 2013 20:10:01 +0000 (22:10 +0200)
diff --git a/src/LyX.cpp b/src/LyX.cpp

index e678e98932154af0909028024a632847145cedd0..09fcfd5398e42ba1c6f9d0cc9f3de52dc27a4182 100644 (file)
--- a/src/LyX.cpp
+++ b/src/LyX.cpp
@@ -33,7 +33,6 @@
  #include "FuncStatus.h"
  #include "HunspellChecker.h"
  #include "KeyMap.h"
-#include "Language.h"
  #include "LaTeXFonts.h"
  #include "LayoutFile.h"
  #include "Lexer.h"
@@ -188,7 +187,7 @@ struct LyX::Impl {
         bool first_start;
         /// the parsed command line batch command if any
         vector<string> batch_commands;
-       
+
         ///
         LaTeXFonts * latexfonts_;
  
@@ -272,7 +271,7 @@ int LyX::exec(int & argc, char * argv[])
         try {
                 init_package(os::utf8_argv(0), string(), string());
                 // we do not get to this point when init_package throws an exception
-               locale_init();
+               setLocale();
         } catch (ExceptionMessage const & message) {
                 LYXERR(Debug::LOCALE, message.title_ + ", " + message.details_);
         }
@@ -294,7 +293,7 @@ int LyX::exec(int & argc, char * argv[])
  
         // Reinit the messages machinery in case package() knows
         // something interesting about the locale directory.
-       Messages::init();
+       setLocale();
  
         if (!use_gui) {
                 // FIXME: create a ConsoleApplication
@@ -337,7 +336,7 @@ int LyX::exec(int & argc, char * argv[])
  
         // Reestablish our defaults, as Qt overwrites them
         // after createApplication()
-       locale_init();
+       setLocale();//???
  
         // Parse and remove all known arguments in the LyX singleton
         // Give an error for all remaining ones.
@@ -794,6 +793,9 @@ bool LyX::init()
         if (!readRcFile("preferences", true))
                 return false;
  
+       // The language may have been set to someting useful through prefs
+       setLocale();
+
         if (!readEncodingsFile("encodings", "unicodesymbols"))
                 return false;
         if (!readLanguagesFile("languages"))
@@ -1379,19 +1381,7 @@ Messages const & getMessages(string const & language)
  Messages const & getGuiMessages()
  {
         LAPPERR(singleton_);
-       // A cache to translate full language name to language code
-       static string last_language = "auto";
-       static string code;
-       if (lyxrc.gui_language != last_language) {
-               if (lyxrc.gui_language == "auto")
-                       code.clear();
-               else {
-                       Language const * l = languages.getLanguage(lyxrc.gui_language);
-                       code = l ? l->code() : string();
-               }
-               last_language = lyxrc.gui_language;
-       }
-       return singleton_->messages(code);
+       return singleton_->messages(Messages::guiLanguage());
  }
  
  
diff --git a/src/frontends/Application.h b/src/frontends/Application.h

index 0609f90396fab18d232b31af8ab6ce150cf71714..0fe4d9101d8c5d24ca621ad17633f85474b00ed9 100644 (file)
--- a/src/frontends/Application.h
+++ b/src/frontends/Application.h
@@ -259,6 +259,8 @@ std::vector<std::string> loadableImageFormats();
  frontend::Application * theApp();
  frontend::Application * createApplication(int & argc, char * argv[]);
  void hideDialogs(std::string const & name, Inset * inset);
+/// Set locale correctly using LyXRC::gui_language
+void setLocale();
  
  } // namespace lyx
  
diff --git a/src/frontends/qt4/GuiApplication.cpp b/src/frontends/qt4/GuiApplication.cpp

index c319054803854e40f1089eb80df683a50756c1b0..80bd6991ba75b73aff3c44d6ed8bf1f53b05dfd1 100644 (file)
--- a/src/frontends/qt4/GuiApplication.cpp
+++ b/src/frontends/qt4/GuiApplication.cpp
@@ -175,6 +175,22 @@ frontend::Application * createApplication(int & argc, char * argv[])
         return guiApp;
  }
  
+
+void setLocale()
+{
+       QLocale theLocale;
+       if (lyxrc.gui_language == "auto") {
+               theLocale = QLocale::system();
+       } else {
+               Language const * l = languages.getLanguage(lyxrc.gui_language);
+               string const code = l ? l->code() : string();
+               theLocale = QLocale(toqstr(code));
+       }
+       Messages::guiLanguage(fromqstr(theLocale.name()));
+       QLocale::setDefault(theLocale);
+}
+
+
  namespace frontend {
  
  
@@ -2212,14 +2228,10 @@ void GuiApplication::exit(int status)
  
  void GuiApplication::setGuiLanguage()
  {
-       QString const default_language = toqstr(getGuiMessages().language());
-       LYXERR(Debug::LOCALE, "Trying to set default locale to: " << default_language);
-       QLocale const default_locale(default_language);
-       QLocale::setDefault(default_locale);
-
+       setLocale();
+       QLocale theLocale;
         // install translation file for Qt built-in dialogs
-       QString const language_name = QString("qt_") + default_locale.name();
-
+       QString const language_name = QString("qt_") + theLocale.name();
         // language_name can be short (e.g. qt_zh) or long (e.g. qt_zh_CN).
         // Short-named translator can be loaded from a long name, but not the
         // opposite. Therefore, long name should be used without truncation.
@@ -2233,7 +2245,7 @@ void GuiApplication::setGuiLanguage()
                         << language_name);
         }
  
-       switch (default_locale.language()) {
+       switch (theLocale.language()) {
         case QLocale::Arabic :
         case QLocale::Hebrew :
         case QLocale::Persian :
diff --git a/src/support/Messages.cpp b/src/support/Messages.cpp

index 5c8d45fd8f72db085c9cadae2f5f1ce2205398e6..4e8dca17b01e19e976346a97a014b1c245bc5083 100644 (file)
--- a/src/support/Messages.cpp
+++ b/src/support/Messages.cpp
@@ -7,28 +7,99 @@
   * Full author contact details are available in file CREDITS.
   */
  
+/*
+  This is a limited parser for gettext's po files. Several features are
+  not handled for now:
+   * encoding is supposed to be UTF-8 (the charset parameter is not honored)
+   * context is not handled (implemented differently in LyX)
+   * plural forms not implemented (not used for now in LyX).
+   * The byte endianness of the machine on which the .mo file have been
+     built is expected to be the same as the one of the machine where this
+     code is run.
+
+  The data is loaded in a std::map object for simplicity.
+ */
+
+/*
+  Format of a MO file. Source: http://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
+
+             byte
+                  +------------------------------------------+
+               0  | magic number = 0x950412de                |
+                  |                                          |
+               4  | file format revision = 0                 |
+                  |                                          |
+               8  | number of strings                        |  == N
+                  |                                          |
+              12  | offset of table with original strings    |  == O
+                  |                                          |
+              16  | offset of table with translation strings |  == T
+                  |                                          |
+              20  | size of hashing table                    |  == S
+                  |                                          |
+              24  | offset of hashing table                  |  == H
+                  |                                          |
+                  .                                          .
+                  .    (possibly more entries later)         .
+                  .                                          .
+                  |                                          |
+               O  | length & offset 0th string  ----------------.
+           O + 8  | length & offset 1st string  ------------------.
+                   ...                                    ...   | |
+     O + ((N-1)*8)| length & offset (N-1)th string           |  | |
+                  |                                          |  | |
+               T  | length & offset 0th translation  ---------------.
+           T + 8  | length & offset 1st translation  -----------------.
+                   ...                                    ...   | | | |
+     T + ((N-1)*8)| length & offset (N-1)th translation      |  | | | |
+                  |                                          |  | | | |
+               H  | start hash table                         |  | | | |
+                   ...                                    ...   | | | |
+       H + S * 4  | end hash table                           |  | | | |
+                  |                                          |  | | | |
+                  | NUL terminated 0th string  <----------------' | | |
+                  |                                          |    | | |
+                  | NUL terminated 1st string  <------------------' | |
+                  |                                          |      | |
+                   ...                                    ...       | |
+                  |                                          |      | |
+                  | NUL terminated 0th translation  <---------------' |
+                  |                                          |        |
+                  | NUL terminated 1st translation  <-----------------'
+                  |                                          |
+                   ...                                    ...
+                  |                                          |
+                  +------------------------------------------+
+
+ */
+
  #include <config.h>
  
  #include "support/Messages.h"
  
  #include "support/debug.h"
  #include "support/docstring.h"
-#include "support/environment.h"
  #include "support/lstrings.h"
  #include "support/Package.h"
  #include "support/unicode.h"
  
  #include "support/lassert.h"
  
+#include <boost/cstdint.hpp>
+
  #include <cerrno>
+#include <fstream>
  
-#  define N_(str) (str)              // for marking strings to be translated
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
  
  using namespace std;
+using boost::uint32_t;
  
  namespace lyx {
  
-void cleanTranslation(docstring & trans) 
+void cleanTranslation(docstring & trans)
  {
         /*
           Some english words have different translations, depending on
@@ -62,20 +133,13 @@ void cleanTranslation(docstring & trans)
  
  #ifdef ENABLE_NLS
  
-#  ifdef HAVE_LOCALE_H
-#    include <locale.h>
-#  endif
-
-#  if HAVE_GETTEXT
-#    include <libintl.h>      // use the header already in the system *EK*
-#  else
-#    include "intl/libintl.h"
-#  endif
-
  using namespace lyx::support;
  
  namespace lyx {
  
+std::string Messages::gui_lang_;
+
+
  // This version use the traditional gettext.
  Messages::Messages(string const & l)
         : lang_(l)
@@ -84,138 +148,166 @@ Messages::Messages(string const & l)
         size_t i = lang_.find(".");
         lang_ = lang_.substr(0, i);
         LYXERR(Debug::LOCALE, "language(" << lang_ << ")");
+
+       readMoFile();
  }
  
  
-void Messages::init()
+namespace {
+
+string moFile(string const & c)
  {
-       errno = 0;
-       string const locale_dir = package().locale_dir().toFilesystemEncoding();
-       char const * c = bindtextdomain(PACKAGE, locale_dir.c_str());
-       int e = errno;
-       if (e) {
-               LYXERR(Debug::LOCALE, "Error code: " << errno << '\n'
-                       << "Directory : " << package().locale_dir().absFileName() << '\n'
-                       << "Rtn value : " << c);
-       }
+       static string const locale_dir
+               = package().locale_dir().toFilesystemEncoding();
+       return locale_dir + "/" + c
+               + "/LC_MESSAGES/" PACKAGE ".mo";
+}
+
  
-       if (!bind_textdomain_codeset(PACKAGE, ucs4_codeset)) {
-               LYXERR(Debug::LOCALE, "Error code: " << errno << '\n'
-                       << "Codeset   : " << ucs4_codeset);
+// Find the code we have for a given language code. Return empty if not found.
+string realCode(string const & c)
+{
+       // Qt tries to outsmart us and transforms en_US to C.
+       string code = (c == "C") ? "en" : c;
+       // this loops at most twice
+       while (true) {
+               if (FileName(moFile(code)).isReadableFile())
+                       return code;
+               if (contains(code, '_'))
+                       code = token(code, '_', 0);
+               else
+                       break;
         }
+       return string();
+}
+}
+
  
-       textdomain(PACKAGE);
+bool Messages::available(string const & c)
+{
+       return !realCode(c).empty();
  }
  
  
  string Messages::language() const
  {
-       // get the language from the gmo file
-       string const test = N_("[[Replace with the code of your language]]");
-       string const trans = to_utf8(get(test));
-       if (trans == test) {
-               LYXERR0("Something is weird.");
-               return string();
-       } else
-               return trans;
+       return realCode(lang_);
  }
  
  
-bool Messages::available(string const & c)
+struct MoHeader
  {
-       static string locale_dir = package().locale_dir().toFilesystemEncoding();
-       string code = c;
-       // this loops at most twice
-       while (true) {
-               string const filen = locale_dir + "/" + code 
-                       + "/LC_MESSAGES/" PACKAGE ".mo";
-               if (FileName(filen).isReadableFile())
-                       return true;
-               if (contains(code, '_'))
-                       code = token(code, '_', 0);
-               else return false;
+       // magic number = 0x950412de
+       uint32_t magic;
+       // file format revision = 0
+       uint32_t rev;
+       // number of strings
+       uint32_t N;
+       // offset of table with original strings
+       uint32_t O;
+       // offset of table with translation strings
+       uint32_t T;
+       // there is a hashing table afterwrds, but we ignore it
+};
+
+
+struct StringTable
+{
+       // string length
+       uint32_t length;
+       // string offset
+       uint32_t offset;
+};
+
+
+bool Messages::readMoFile()
+{
+       // FIXME:remove
+       if (lang_.empty()) {
+               LYXERR0("No language given, nothing to load.");
+               return false;
         }
-       return false;
  
-}
+       string const code = realCode(lang_);
+       if (code.empty()) {
+               LYXERR0("Cannot find translation for language " << lang_);
+               return false;
+       }
  
-namespace {
+       string const filen = moFile(code);
  
-// Trivial wrapper around gettext()
-docstring const getText(string const & m)
-{
-       // FIXME: gettext sometimes "forgets" the ucs4_codeset we set
-       // in init(), which leads to severe message corruption (#7371)
-       // We set it again here unconditionally. A real fix must be found!
-       LATTEST(bind_textdomain_codeset(PACKAGE, ucs4_codeset));
-
-       char const * m_c = m.c_str();
-       char const * trans_c = gettext(m_c);
-       docstring trans;
-       if (!trans_c) {
-               LYXERR(Debug::LOCALE, "Undefined result from gettext for `" << m << "'.");
-               trans = from_ascii(m);
-       } else if (trans_c == m_c) {
-               //LYXERR(Debug::LOCALE, "Same as entered returned");
-               trans = from_ascii(m);
-       } else {
-               //LYXERR(Debug::LOCALE, "We got a translation");
-               // m is actually not a char const * but ucs4 data
-               trans = reinterpret_cast<char_type const *>(trans_c);
+       // get file size
+       struct stat buf;
+       if (stat(filen.c_str(), &buf)) {
+               LYXERR0("Cannot get information for file " << filen);
+               return false;
         }
  
-       cleanTranslation(trans);
+       vector<char> moData(buf.st_size);
  
-       return trans;
-}
+       ifstream is(filen.c_str(), ios::in | ios::binary);
+       if (!is.read(&moData[0], buf.st_size)) {
+               LYXERR0("Cannot read file " << filen);
+               return false;
+       }
  
-}
+       MoHeader const * header = reinterpret_cast<MoHeader const *>(&moData[0]);
+       if (header->magic != 0x950412de) {
+               LYXERR0("Wrong magic number for file " << filen
+                       << ".\nExpected 0x950412de, got " << std::hex << header->magic);
+               return false;
+       }
+
+       StringTable const * orig = reinterpret_cast<StringTable const *>(&moData[0] + header->O);
+       StringTable const * trans = reinterpret_cast<StringTable const *>(&moData[0] + header->T);
+       // First the header
+       string const info = string(&moData[0] + trans[0].offset, trans[0].length);
+       size_t pos = info.find("charset=");
+       if (pos != string::npos) {
+               pos += 8;
+               string charset;
+               size_t pos2 = info.find("\n", pos);
+               if (pos2 == string::npos)
+                       charset = info.substr(pos);
+               else
+                       charset = info.substr(pos, pos2 - pos);
+               charset = ascii_lowercase(trim(charset));
+               if (charset != "utf-8") {
+                       LYXERR0("Wrong encoding " << charset << " for file " << filen);
+                       return false;
+               }
+       } else {
+               LYXERR0("Cannot find encoding encoding for file " << filen);
+               return false;
+       }
+
+       for (size_t i = 1; i < header->N; ++i) {
+               // Note that in theory the strings may contain NUL characters.
+               // This may be the case with plural forms
+               string const ostr(&moData[0] + orig[i].offset, orig[i].length);
+               docstring tstr = from_utf8(string(&moData[0] + trans[i].offset,
+                                                 trans[i].length));
+               cleanTranslation(tstr);
+               trans_map_[ostr] = tstr;
+               //lyxerr << ostr << " ==> " << tstr << endl;
+       }
  
+       return true;
+}
  
  docstring const Messages::get(string const & m) const
  {
         if (m.empty())
                 return docstring();
  
-       // Look for the translated string in the cache.
-       TranslationCache::iterator it = cache_.find(m);
-       if (it != cache_.end())
+       TranslationMap::const_iterator it = trans_map_.find(m);
+       if (it != trans_map_.end())
                 return it->second;
-
-       // The string was not found, use gettext to generate it
-       docstring trans;
-       if (!lang_.empty()) {
-               // This GNU extension overrides any language locale
-               // wrt gettext.
-               LYXERR(Debug::LOCALE, "Setting LANGUAGE to " << lang_);
-               EnvChanger language_chg("LANGUAGE", lang_);
-               // However, setting LANGUAGE does nothing when the
-               // locale is "C". Therefore we set the locale to
-               // something that is believed to exist on most
-               // systems. The idea is that one should be able to
-               // load German documents even without having de_DE
-               // installed.
-               LYXERR(Debug::LOCALE, "Setting LC_ALL to en_US");
-               EnvChanger lc_all_chg("LC_ALL", "en_US");
-#ifdef HAVE_LC_MESSAGES
-               setlocale(LC_MESSAGES, "");
-#endif
-               trans = getText(m);
-       } else
-               trans = getText(m);
-               
-
-#ifdef HAVE_LC_MESSAGES
-       setlocale(LC_MESSAGES, "");
-#endif
-
-       // store translation in cache
-       pair<TranslationCache::iterator, bool> result =
-               cache_.insert(make_pair(m, trans));
-
-       LASSERT(result.second, return from_utf8(m));
-
-       return result.first->second;
+       else {
+               docstring res = from_utf8(m);
+               cleanTranslation(res);
+               return res;
+       }
  }
  
  } // namespace lyx
diff --git a/src/support/Messages.h b/src/support/Messages.h

index 655577c0c784772103ff18fc835a455fd448fbc4..a6e937256d1ab6ff913ecca1e01fc7211bc5be97 100644 (file)
--- a/src/support/Messages.h
+++ b/src/support/Messages.h
@@ -31,17 +31,20 @@ public:
         /// Is an (at least partial) translation of language with code \p c available?
         static bool available(std::string const & c);
         ///
-       static void init();
+       static void guiLanguage(std::string const & l) { gui_lang_ = l; }
+       ///
+       static std::string const & guiLanguage() { return gui_lang_; }
  
  private:
+       /// Read the strings from the .mo file. Returns true on success.
+       bool readMoFile();
         ///
         std::string lang_;
         ///
-       typedef std::map<std::string, docstring> TranslationCache;
-       /// Internal cache for gettext translated strings.
-       /// This is needed for performance reason within \c updateBuffer()
-       /// under Windows.
-       mutable TranslationCache cache_;
+       typedef std::map<std::string, docstring> TranslationMap;
+       TranslationMap trans_map_;
+       /// The language used by the Gui
+       static std::string gui_lang_;
  };
  
  /// Access to the unique Messages object for the passed \p language.
diff --git a/src/support/gettext.cpp b/src/support/gettext.cpp

index cb21d573f4db39ea76103e617bc4cb3586df4b2d..44dd5da57a71502c29544d45bcd5cd5725776cc2 100644 (file)
--- a/src/support/gettext.cpp
+++ b/src/support/gettext.cpp
@@ -17,10 +17,6 @@
  #include "support/Messages.h"
  #include "support/Package.h"
  
-#ifdef HAVE_LOCALE_H
-#  include <locale.h>
-#endif
-
  using namespace std;
  
  namespace lyx {
@@ -31,19 +27,6 @@ docstring const _(string const & str)
  }
  
  
-void locale_init()
-{
-#ifdef ENABLE_NLS
-#  ifdef HAVE_LC_MESSAGES
-       setlocale(LC_MESSAGES, "");
-#  endif
-       setlocale(LC_CTYPE, "");
-       Messages::init();
-#endif
-       setlocale(LC_NUMERIC, "C");
-}
-
-
  docstring const translateIfPossible(docstring const & name)
  {
         if (support::isAscii(name) && !name.empty())
diff --git a/src/support/gettext.h b/src/support/gettext.h

index 2ede4de552e6058367e3725110c8f41a2b459618..317feec82ec6ba5a78bb84a2ac172341baf3de37 100644 (file)
--- a/src/support/gettext.h
+++ b/src/support/gettext.h
@@ -78,10 +78,6 @@ docstring const translateIfPossible(docstring const & name);
   * language if they come from a file in the personal directory. */
  docstring const translateIfPossible(docstring const & name, std::string const & language);
  
-///
-void locale_init();
-
-
  } // namespace lyx
  
  #endif
author	Jean-Marc Lasgouttes <lasgouttes@lyx.org>
	Wed, 8 May 2013 16:50:38 +0000 (18:50 +0200)
committer	Vincent van Ravesteijn <vfr@lyx.org>
	Thu, 30 May 2013 20:10:01 +0000 (22:10 +0200)
src/LyX.cpp		patch \| blob \| history
src/frontends/Application.h		patch \| blob \| history
src/frontends/qt4/GuiApplication.cpp		patch \| blob \| history
src/support/Messages.cpp		patch \| blob \| history
src/support/Messages.h		patch \| blob \| history
src/support/gettext.cpp		patch \| blob \| history
src/support/gettext.h		patch \| blob \| history