Fix Qt6 deprecation warning (QString::fromUcs4(uint))

[features.git] / src / support / Messages.cpp
diff --git a/src/support/Messages.cpp b/src/support/Messages.cpp

index 4e58a873fd23ac15e60932b29a5e375a376e0d19..842697ed538ed4328a60340031b2fcc381dc1d26 100644 (file)
--- a/src/support/Messages.cpp
+++ b/src/support/Messages.cpp
@@ -3,17 +3,80 @@
   * Licence details can be found in the file COPYING.
   *
   * \author Lars Gullik Bjønnes
+ * \author Jean-Marc Lasgouttes
   *
   * Full author contact details are available in file CREDITS.
   */
  
+/*
+  This contains a limited parser for gettext's mo files. Several features are
+  not implemented currently:
+   * encoding is supposed to be UTF-8 (the charset parameter is enforced)
+   * context is not handled (implemented differently in LyX)
+   * plural forms are not implemented (not used for now in LyX).
+
+  The data is loaded in a std::map object for simplicity.
+ */
+
+/*
+  Format of a MO file. Source: http://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
+
+             byte
+                  +------------------------------------------+
+               0  | magic number = 0x950412de                |
+                  |                                          |
+               4  | file format revision = 0                 |
+                  |                                          |
+               8  | number of strings                        |  == N
+                  |                                          |
+              12  | offset of table with original strings    |  == O
+                  |                                          |
+              16  | offset of table with translation strings |  == T
+                  |                                          |
+              20  | size of hashing table                    |  == S
+                  |                                          |
+              24  | offset of hashing table                  |  == H
+                  |                                          |
+                  .                                          .
+                  .    (possibly more entries later)         .
+                  .                                          .
+                  |                                          |
+               O  | length & offset 0th string  ----------------.
+           O + 8  | length & offset 1st string  ------------------.
+                   ...                                    ...   | |
+     O + ((N-1)*8)| length & offset (N-1)th string           |  | |
+                  |                                          |  | |
+               T  | length & offset 0th translation  ---------------.
+           T + 8  | length & offset 1st translation  -----------------.
+                   ...                                    ...   | | | |
+     T + ((N-1)*8)| length & offset (N-1)th translation      |  | | | |
+                  |                                          |  | | | |
+               H  | start hash table                         |  | | | |
+                   ...                                    ...   | | | |
+       H + S * 4  | end hash table                           |  | | | |
+                  |                                          |  | | | |
+                  | NUL terminated 0th string  <----------------' | | |
+                  |                                          |    | | |
+                  | NUL terminated 1st string  <------------------' | |
+                  |                                          |      | |
+                   ...                                    ...       | |
+                  |                                          |      | |
+                  | NUL terminated 0th translation  <---------------' |
+                  |                                          |        |
+                  | NUL terminated 1st translation  <-----------------'
+                  |                                          |
+                   ...                                    ...
+                  |                                          |
+                  +------------------------------------------+
+
+ */
+
  #include <config.h>
  
  #include "support/Messages.h"
  
  #include "support/debug.h"
  #include "support/docstring.h"
-#include "support/environment.h"
  #include "support/lstrings.h"
  #include "support/Package.h"
  #include "support/unicode.h"
@@ -21,53 +84,59 @@
  #include "support/lassert.h"
  
  #include <cerrno>
+#include <cstdint>
+#include <fstream>
+#include <utility>
  
-#  define N_(str) (str)              // for marking strings to be translated
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
  
  using namespace std;
  
  namespace lyx {
  
-void cleanTranslation(docstring & trans) 
+void cleanTranslation(docstring & trans)
  {
         /*
           Some english words have different translations, depending on
           context. In these cases the original string is augmented by
           context information (e.g. "To:[[as in 'From page x to page
-         y']]" and "To:[[as in 'From format x to format y']]". This
-         means that we need to filter out everything in double square
-         brackets at the end of the string, otherwise the user sees
-         bogus messages. If we are unable to honour the request we
-         just return what we got in.
+         y']]" and "To:[[as in 'From format x to format y']]". Also,
+         when placeholders are used, the context can indicate what will
+         be substituted for the placeholder (e.g. "%1$s[[date]], %1$s
+         [[time]]). This means that we need to filter out everything
+         in double square brackets at the end of the string, otherwise
+         the user sees bogus messages. If we are unable to honour the
+         request we just return what we got in.
         */
-       size_t const pos1 = trans.find(from_ascii("[["));
-       if (pos1 != docstring::npos) {
-               size_t const pos2 = trans.find(from_ascii("]]"), pos1);
-               if (pos2 != docstring::npos) 
-                       trans.erase(pos1, pos2 - pos1 + 2);
+       static docstring const ctx_start = from_ascii("[[");
+       static docstring const ctx_end = from_ascii("]]");
+       while (true) {
+               size_t const pos1 = trans.find(ctx_start);
+               if (pos1 != docstring::npos) {
+                       size_t const pos2 = trans.find(ctx_end, pos1);
+                       if (pos2 != docstring::npos) {
+                               trans.erase(pos1, pos2 - pos1 + 2);
+                               continue;
+                       }
+               }
+               break;
         }
  }
  
-} // lyx
+} // namespace lyx
  
  
  #ifdef ENABLE_NLS
  
-#  ifdef HAVE_LOCALE_H
-#    include <locale.h>
-#  endif
-
-#  if HAVE_GETTEXT
-#    include <libintl.h>      // use the header already in the system *EK*
-#  else
-#    include "intl/libintl.h"
-#  endif
-
  using namespace lyx::support;
  
  namespace lyx {
  
-// This version use the traditional gettext.
+std::string Messages::gui_lang_;
+
+
  Messages::Messages(string const & l)
         : lang_(l)
  {
@@ -75,138 +144,218 @@ Messages::Messages(string const & l)
         size_t i = lang_.find(".");
         lang_ = lang_.substr(0, i);
         LYXERR(Debug::LOCALE, "language(" << lang_ << ")");
+
+       readMoFile();
  }
  
  
-void Messages::init()
+namespace {
+
+// Find the code we have for a given language code. Return empty if not found.
+string realCode(string code)
  {
-       errno = 0;
-       string const locale_dir = package().locale_dir().toFilesystemEncoding();
-       char const * c = bindtextdomain(PACKAGE, locale_dir.c_str());
-       int e = errno;
-       if (e) {
-               LYXERR(Debug::LOCALE, "Error code: " << errno << '\n'
-                       << "Directory : " << package().locale_dir().absFileName() << '\n'
-                       << "Rtn value : " << c);
+       // this loops at most twice
+       while (true) {
+               if (package().messages_file(code).isReadableFile())
+                       return code;
+               if (contains(code, '_'))
+                       code = token(code, '_', 0);
+               else
+                       break;
         }
+       return string();
+}
+} // namespace
  
-       if (!bind_textdomain_codeset(PACKAGE, ucs4_codeset)) {
-               LYXERR(Debug::LOCALE, "Error code: " << errno << '\n'
-                       << "Codeset   : " << ucs4_codeset);
-       }
  
-       textdomain(PACKAGE);
+bool Messages::available(string const & c)
+{
+       return !realCode(c).empty();
  }
  
  
  string Messages::language() const
  {
-       // get the language from the gmo file
-       string const test = N_("[[Replace with the code of your language]]");
-       string const trans = to_utf8(get(test));
-       if (trans == test) {
-               LYXERR0("Something is weird.");
-               return string();
-       } else
-               return trans;
+       return realCode(lang_);
  }
  
+namespace {
  
-bool Messages::available(string const & c)
+void swapInt(uint32_t & number)
  {
-       static string locale_dir = package().locale_dir().toFilesystemEncoding();
-       string code = c;
-       // this loops at most twice
-       while (true) {
-               string const filen = locale_dir + "/" + code 
-                       + "/LC_MESSAGES/" PACKAGE ".mo";
-               if (FileName(filen).isReadableFile())
-                       return true;
-               if (contains(code, '_'))
-                       code = token(code, '_', 0);
-               else return false;
-       }
-       return false;
-
+       unsigned char * num_ar = reinterpret_cast<unsigned char *>(&number);
+       swap(num_ar[0], num_ar[3]);
+       swap(num_ar[1], num_ar[2]);
  }
  
-namespace {
  
-// Trivial wrapper around gettext()
-docstring const getText(string const & m)
+struct MoHeader
  {
-       // FIXME: gettext sometimes "forgets" the ucs4_codeset we set
-       // in init(), which leads to severe message corruption (#7371)
-       // We set it again here unconditionally. A real fix must be found!
-       LATTEST(bind_textdomain_codeset(PACKAGE, ucs4_codeset));
-
-       char const * m_c = m.c_str();
-       char const * trans_c = gettext(m_c);
-       docstring trans;
-       if (!trans_c) {
-               LYXERR(Debug::LOCALE, "Undefined result from gettext for `" << m << "'.");
-               trans = from_ascii(m);
-       } else if (trans_c == m_c) {
-               //LYXERR(Debug::LOCALE, "Same as entered returned");
-               trans = from_ascii(m);
-       } else {
-               //LYXERR(Debug::LOCALE, "We got a translation");
-               // m is actually not a char const * but ucs4 data
-               trans = reinterpret_cast<char_type const *>(trans_c);
-       }
+       // magic number = 0x950412de
+       uint32_t magic;
+       // file format revision = 0
+       uint32_t rev;
+       // number of strings
+       uint32_t N;
+       // offset of table with original strings
+       uint32_t O;
+       // offset of table with translation strings
+       uint32_t T;
+       // there is a hash table afterwards, but we ignore it
+
+       // Change the endianness of header data
+       void swapEnd();
+};
  
-       cleanTranslation(trans);
  
-       return trans;
+void MoHeader::swapEnd()
+{
+       swapInt(magic);
+       swapInt(rev);
+       swapInt(N);
+       swapInt(O);
+       swapInt(T);
  }
  
+struct StringTable
+{
+       // string length
+       uint32_t length;
+       // string offset
+       uint32_t offset;
+
+       // Change the endianness of string stable data
+       void swapEnd();
+};
+
+
+void StringTable::swapEnd()
+{
+       swapInt(length);
+       swapInt(offset);
  }
  
  
+} // namespace
+
+bool Messages::readMoFile()
+{
+       // FIXME:remove
+       if (lang_.empty()) {
+               LYXERR0("No language given, nothing to load.");
+               return false;
+       }
+
+       string const code = realCode(lang_);
+       if (code.empty()) {
+               LYXERR(Debug::LOCALE, "Cannot find translation for language " << lang_);
+               return false;
+       }
+
+       string const filen = package().messages_file(code).toSafeFilesystemEncoding();
+
+       // get file size
+       struct stat buf;
+       if (stat(filen.c_str(), &buf)) {
+               LYXERR0("Cannot get information for file " << filen);
+               return false;
+       }
+
+       vector<char> moData(buf.st_size);
+
+       ifstream is(filen.c_str(), ios::in | ios::binary);
+       if (!is.read(&moData[0], buf.st_size)) {
+               LYXERR0("Cannot read file " << filen);
+               return false;
+       }
+
+       MoHeader * header = reinterpret_cast<MoHeader *>(&moData[0]);
+
+       bool doSwap = false;
+       if (header->magic == 0xde120495) {
+               header->swapEnd();
+               doSwap = true;
+       }
+
+       if (header->magic != 0x950412de) {
+               LYXERR0("Wrong magic number for file " << filen
+                       << ".\nExpected 0x950412de, got 0x" << std::hex << header->magic << std::dec);
+               return false;
+       }
+
+       StringTable * orig = reinterpret_cast<StringTable *>(&moData[0] + header->O);
+       StringTable * trans = reinterpret_cast<StringTable *>(&moData[0] + header->T);
+       // First the header
+       if (doSwap) {
+               // Handle endiannness change
+               orig[0].swapEnd();
+               trans[0].swapEnd();
+       }
+       string const info = string(&moData[0] + trans[0].offset, trans[0].length);
+       size_t pos = info.find("charset=");
+       if (pos != string::npos) {
+               pos += 8;
+               string charset;
+               size_t pos2 = info.find("\n", pos);
+               if (pos2 == string::npos)
+                       charset = info.substr(pos);
+               else
+                       charset = info.substr(pos, pos2 - pos);
+               charset = ascii_lowercase(trim(charset));
+               if (charset != "utf-8") {
+                       LYXERR0("Wrong encoding " << charset << " for file " << filen);
+                       return false;
+               }
+       } else {
+               LYXERR0("Cannot find encoding encoding for file " << filen);
+               return false;
+       }
+
+       for (size_t i = 1; i < header->N; ++i) {
+               if (doSwap) {
+                       // Handle endiannness change
+                       orig[i].swapEnd();
+                       trans[i].swapEnd();
+               }
+               // Note that in theory the strings may contain NUL characters.
+               // This may be the case with plural forms
+               string const ostr(&moData[0] + orig[i].offset, orig[i].length);
+               docstring tstr = from_utf8(string(&moData[0] + trans[i].offset,
+                                                 trans[i].length));
+               cleanTranslation(tstr);
+               trans_map_[ostr] = tstr;
+               //lyxerr << ostr << " ==> " << tstr << endl;
+       }
+
+       return true;
+}
+
  docstring const Messages::get(string const & m) const
  {
         if (m.empty())
                 return docstring();
  
-       // Look for the translated string in the cache.
-       TranslationCache::iterator it = cache_.find(m);
-       if (it != cache_.end())
+       TranslationMap::const_iterator it = trans_map_.find(m);
+       if (it != trans_map_.end())
                 return it->second;
+       else {
+               docstring res = from_utf8(m);
+               cleanTranslation(res);
+               return res;
+       }
+}
  
-       // The string was not found, use gettext to generate it
-       docstring trans;
-       if (!lang_.empty()) {
-               // This GNU extension overrides any language locale
-               // wrt gettext.
-               LYXERR(Debug::LOCALE, "Setting LANGUAGE to " << lang_);
-               EnvChanger language_chg("LANGUAGE", lang_);
-               // However, setting LANGUAGE does nothing when the
-               // locale is "C". Therefore we set the locale to
-               // something that is believed to exist on most
-               // systems. The idea is that one should be able to
-               // load German documents even without having de_DE
-               // installed.
-               LYXERR(Debug::LOCALE, "Setting LC_ALL to en_US");
-               EnvChanger lc_all_chg("LC_ALL", "en_US");
-#ifdef HAVE_LC_MESSAGES
-               setlocale(LC_MESSAGES, "");
-#endif
-               trans = getText(m);
-       } else
-               trans = getText(m);
-               
-
-#ifdef HAVE_LC_MESSAGES
-       setlocale(LC_MESSAGES, "");
-#endif
-
-       // store translation in cache
-       pair<TranslationCache::iterator, bool> result =
-               cache_.insert(make_pair(m, trans));
  
-       LASSERT(result.second, return from_utf8(m));
+docstring const Messages::getIfFound(string const & m) const
+{
+       if (m.empty())
+               return docstring();
  
-       return result.first->second;
+       TranslationMap::const_iterator it = trans_map_.find(m);
+       if (it != trans_map_.end())
+               return it->second;
+       else
+               return docstring();
  }
  
  } // namespace lyx
@@ -216,12 +365,9 @@ docstring const Messages::get(string const & m) const
  
  namespace lyx {
  
-Messages::Messages(string const & /* l */) {}
-
-void Messages::init()
-{
-}
+std::string Messages::gui_lang_;
  
+Messages::Messages(string const & /* l */) {}
  
  docstring const Messages::get(string const & m) const
  {
@@ -231,62 +377,19 @@ docstring const Messages::get(string const & m) const
  }
  
  std::string Messages::language() const
-    {
-        return string();
-    }
+{
+       return string();
+}
  
  bool Messages::available(string const & /* c */)
  {
         return false;
  }
  
-} // namespace lyx
-
-#endif
-
-#if 0
-
--#include <locale>
-
-namespace lyx {
-
-// This version of the Pimpl utilizes the message capability of
-// libstdc++ that is distributed with GNU G++.
-class Messages::Pimpl {
-public:
-       typedef messages<char>::catalog catalog;
-
-       Pimpl(string const & l)
-               : lang_(l),
-                 loc_gl(lang_.c_str()),
-                 mssg_gl(use_facet<messages<char> >(loc_gl))
-       {
-               //LYXERR("Messages: language(" << l << ") in dir(" << dir << ")");
-
-               string const locale_dir = package().locale_dir().toFilesystemEncoding();
-               cat_gl = mssg_gl.open(PACKAGE, loc_gl, locale_dir.c_str());
-
-       }
-
-       ~Pimpl()
-       {
-               mssg_gl.close(cat_gl);
-       }
-
-       docstring const get(string const & msg) const
-       {
-               return mssg_gl.get(cat_gl, 0, 0, msg);
-       }
-private:
-       ///
-       string lang_;
-       ///
-       locale loc_gl;
-       ///
-       messages<char> const & mssg_gl;
-       ///
-       catalog cat_gl;
-};
+docstring const Messages::getIfFound(string const & /* m */) const
+{
+       return docstring();
+}
  
  } // namespace lyx