]> git.lyx.org Git - lyx.git/blobdiff - src/support/Messages.cpp
Bulk cleanup/fix incorrect annotation at the end of namespaces.
[lyx.git] / src / support / Messages.cpp
index b86874c1780af8431c687f9f54c92fca5b4b220c..26a320b2d243c753c727b19947aade5ebfe0457b 100644 (file)
  * This file is part of LyX, the document processor.
  * Licence details can be found in the file COPYING.
  *
- * \author Lars Gullik Bjønnes
+ * \author Lars Gullik Bjønnes
+ * \author Jean-Marc Lasgouttes
  *
  * Full author contact details are available in file CREDITS.
  */
 
+/*
+  This contains a limited parser for gettext's mo files. Several features are
+  not implemented currently:
+   * encoding is supposed to be UTF-8 (the charset parameter is enforced)
+   * context is not handled (implemented differently in LyX)
+   * plural forms are not implemented (not used for now in LyX).
+
+  The data is loaded in a std::map object for simplicity.
+ */
+
+/*
+  Format of a MO file. Source: http://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
+
+             byte
+                  +------------------------------------------+
+               0  | magic number = 0x950412de                |
+                  |                                          |
+               4  | file format revision = 0                 |
+                  |                                          |
+               8  | number of strings                        |  == N
+                  |                                          |
+              12  | offset of table with original strings    |  == O
+                  |                                          |
+              16  | offset of table with translation strings |  == T
+                  |                                          |
+              20  | size of hashing table                    |  == S
+                  |                                          |
+              24  | offset of hashing table                  |  == H
+                  |                                          |
+                  .                                          .
+                  .    (possibly more entries later)         .
+                  .                                          .
+                  |                                          |
+               O  | length & offset 0th string  ----------------.
+           O + 8  | length & offset 1st string  ------------------.
+                   ...                                    ...   | |
+     O + ((N-1)*8)| length & offset (N-1)th string           |  | |
+                  |                                          |  | |
+               T  | length & offset 0th translation  ---------------.
+           T + 8  | length & offset 1st translation  -----------------.
+                   ...                                    ...   | | | |
+     T + ((N-1)*8)| length & offset (N-1)th translation      |  | | | |
+                  |                                          |  | | | |
+               H  | start hash table                         |  | | | |
+                   ...                                    ...   | | | |
+       H + S * 4  | end hash table                           |  | | | |
+                  |                                          |  | | | |
+                  | NUL terminated 0th string  <----------------' | | |
+                  |                                          |    | | |
+                  | NUL terminated 1st string  <------------------' | |
+                  |                                          |      | |
+                   ...                                    ...       | |
+                  |                                          |      | |
+                  | NUL terminated 0th translation  <---------------' |
+                  |                                          |        |
+                  | NUL terminated 1st translation  <-----------------'
+                  |                                          |
+                   ...                                    ...
+                  |                                          |
+                  +------------------------------------------+
+
+ */
+
 #include <config.h>
 
 #include "support/Messages.h"
 
 #include "support/debug.h"
 #include "support/docstring.h"
-#include "support/environment.h"
+#include "support/lstrings.h"
 #include "support/Package.h"
 #include "support/unicode.h"
 
-#include <boost/current_function.hpp>
+#include "support/lassert.h"
+
+#include <boost/cstdint.hpp>
 
 #include <cerrno>
+#include <fstream>
+#include <utility>
 
-using std::map;
-using std::string;
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
 
-namespace {
+using namespace std;
+using boost::uint32_t;
 
-using lyx::docstring;
-using lyx::from_ascii;
+namespace lyx {
 
-void cleanTranslation(docstring & trans) 
+void cleanTranslation(docstring & trans)
 {
        /*
          Some english words have different translations, depending on
          context. In these cases the original string is augmented by
          context information (e.g. "To:[[as in 'From page x to page
-         y']]" and "To:[[as in 'From format x to format y']]". This
-         means that we need to filter out everything in double square
-         brackets at the end of the string, otherwise the user sees
-         bogus messages. If we are unable to honour the request we
-         just return what we got in.
+         y']]" and "To:[[as in 'From format x to format y']]". Also,
+         when placeholders are used, the context can indicate what will
+         be substituted for the placeholder (e.g. "%1$s[[date]], %1$s
+         [[time]]). This means that we need to filter out everything
+         in double square brackets at the end of the string, otherwise
+         the user sees bogus messages. If we are unable to honour the
+         request we just return what we got in.
        */
-       size_t const pos1 = trans.find(from_ascii("[["));
-       if (pos1 != docstring::npos) {
-               size_t const pos2 = trans.find(from_ascii("]]"), pos1);
-               if (pos2 != docstring::npos) 
-                       trans.erase(pos1, pos2 - pos1 + 2);
+       static docstring const ctx_start = from_ascii("[[");
+       static docstring const ctx_end = from_ascii("]]");
+       while (true) {
+               size_t const pos1 = trans.find(ctx_start);
+               if (pos1 != docstring::npos) {
+                       size_t const pos2 = trans.find(ctx_end, pos1);
+                       if (pos2 != docstring::npos) {
+                               trans.erase(pos1, pos2 - pos1 + 2);
+                               continue;
+                       }
+               }
+               break;
        }
 }
 
-}
+} // namespace lyx
 
 
 #ifdef ENABLE_NLS
 
-#  ifdef HAVE_LOCALE_H
-#    include <locale.h>
-#  endif
-
-#  if HAVE_GETTEXT
-#    include <libintl.h>      // use the header already in the system *EK*
-#  else
-#    include "../../intl/libintl.h"
-#  endif
+using namespace lyx::support;
 
 namespace lyx {
 
-using support::package;
-using support::getEnv;
-using support::setEnv;
+std::string Messages::gui_lang_;
 
 
-// This version use the traditional gettext.
 Messages::Messages(string const & l)
-       : lang_(l), warned_(false)
+       : lang_(l)
 {
        // strip off any encoding suffix, i.e., assume 8-bit po files
-       string::size_type i = lang_.find(".");
+       size_t i = lang_.find(".");
        lang_ = lang_.substr(0, i);
-       LYXERR(Debug::DEBUG, BOOST_CURRENT_FUNCTION
-               << ": language(" << lang_ << ")");
+       LYXERR(Debug::LOCALE, "language(" << lang_ << ")");
+
+       readMoFile();
 }
 
 
-void Messages::init()
+namespace {
+
+// Find the code we have for a given language code. Return empty if not found.
+string realCode(string code)
 {
-       errno = 0;
-       string const locale_dir = package().locale_dir().toFilesystemEncoding();
-       char const * c = bindtextdomain(PACKAGE, locale_dir.c_str());
-       int e = errno;
-       if (e) {
-               LYXERR(Debug::DEBUG, BOOST_CURRENT_FUNCTION << '\n'
-                       << "Error code: " << errno << '\n'
-                       << "Directory : " << package().locale_dir().absFilename() << '\n'
-                       << "Rtn value : " << c);
+       // this loops at most twice
+       while (true) {
+               if (package().messages_file(code).isReadableFile())
+                       return code;
+               if (contains(code, '_'))
+                       code = token(code, '_', 0);
+               else
+                       break;
        }
+       return string();
+}
+} // namespace
 
-       if (!bind_textdomain_codeset(PACKAGE, ucs4_codeset)) {
-               LYXERR(Debug::DEBUG, BOOST_CURRENT_FUNCTION << '\n'
-                       << "Error code: " << errno << '\n'
-                       << "Codeset   : " << ucs4_codeset << '\n');
-       }
 
-       textdomain(PACKAGE);
+bool Messages::available(string const & c)
+{
+       return !realCode(c).empty();
 }
 
 
-docstring const Messages::get(string const & m) const
+string Messages::language() const
 {
-       if (m.empty())
-               return docstring();
+       return realCode(lang_);
+}
 
-       // Look for the translated string in the cache.
-       TranslationCache::iterator it = cache_.find(m);
-       if (it != cache_.end())
-               return it->second;
+namespace {
 
-       // The string was not found, use gettext to generate it
-
-       string const oldLANGUAGE = getEnv("LANGUAGE");
-       string const oldLC_ALL = getEnv("LC_ALL");
-       if (!lang_.empty()) {
-               // This GNU extension overrides any language locale
-               // wrt gettext.
-               setEnv("LANGUAGE", lang_);
-               // However, setting LANGUAGE does nothing when the
-               // locale is "C". Therefore we set the locale to
-               // something that is believed to exist on most
-               // systems. The idea is that one should be able to
-               // load German documents even without having de_DE
-               // installed.
-               setEnv("LC_ALL", "en_US");
-#ifdef HAVE_LC_MESSAGES
-               setlocale(LC_MESSAGES, "");
-#endif
+void swapInt(uint32_t & number)
+{
+       unsigned char * num_ar = reinterpret_cast<unsigned char *>(&number);
+       swap(num_ar[0], num_ar[3]);
+       swap(num_ar[1], num_ar[2]);
+}
+
+
+struct MoHeader
+{
+       // magic number = 0x950412de
+       uint32_t magic;
+       // file format revision = 0
+       uint32_t rev;
+       // number of strings
+       uint32_t N;
+       // offset of table with original strings
+       uint32_t O;
+       // offset of table with translation strings
+       uint32_t T;
+       // there is a hash table afterwards, but we ignore it
+
+       // Change the endianness of header data
+       void swapEnd();
+};
+
+
+void MoHeader::swapEnd()
+{
+       swapInt(magic);
+       swapInt(rev);
+       swapInt(N);
+       swapInt(O);
+       swapInt(T);
+}
+
+struct StringTable
+{
+       // string length
+       uint32_t length;
+       // string offset
+       uint32_t offset;
+
+       // Change the endianness of string stable data
+       void swapEnd();
+};
+
+
+void StringTable::swapEnd()
+{
+       swapInt(length);
+       swapInt(offset);
+}
+
+
+} // namespace
+
+bool Messages::readMoFile()
+{
+       // FIXME:remove
+       if (lang_.empty()) {
+               LYXERR0("No language given, nothing to load.");
+               return false;
        }
 
-       char const * m_c = m.c_str();
-       char const * trans_c = gettext(m_c);
-       docstring trans;
-       if (!trans_c)
-               LYXERR0("Undefined result from gettext");
-       else if (trans_c == m_c) {
-               LYXERR(Debug::DEBUG, "Same as entered returned");
-               trans = from_ascii(m);
-       } else {
-               LYXERR(Debug::DEBUG, "We got a translation");
-               // m is actually not a char const * but ucs4 data
-               trans = reinterpret_cast<char_type const *>(trans_c);
+       string const code = realCode(lang_);
+       if (code.empty()) {
+               LYXERR(Debug::LOCALE, "Cannot find translation for language " << lang_);
+               return false;
        }
 
-       cleanTranslation(trans);
+       string const filen = package().messages_file(code).toSafeFilesystemEncoding();
 
-       // Reset environment variables as they were.
-       if (!lang_.empty()) {
-               // Reset everything as it was.
-               setEnv("LANGUAGE", oldLANGUAGE);
-               setEnv("LC_ALL", oldLC_ALL);
-#ifdef HAVE_LC_MESSAGES
-               setlocale(LC_MESSAGES, "");
-#endif
+       // get file size
+       struct stat buf;
+       if (stat(filen.c_str(), &buf)) {
+               LYXERR0("Cannot get information for file " << filen);
+               return false;
        }
 
-       std::pair<TranslationCache::iterator, bool> result =
-               cache_.insert(std::make_pair(m, trans));
+       vector<char> moData(buf.st_size);
 
-       BOOST_ASSERT(result.second);
+       ifstream is(filen.c_str(), ios::in | ios::binary);
+       if (!is.read(&moData[0], buf.st_size)) {
+               LYXERR0("Cannot read file " << filen);
+               return false;
+       }
 
-       return result.first->second;
-}
+       MoHeader * header = reinterpret_cast<MoHeader *>(&moData[0]);
 
-} // namespace lyx
+       bool doSwap = false;
+       if (header->magic == 0xde120495) {
+               header->swapEnd();
+               doSwap = true;
+       }
 
-#else // ENABLE_NLS
-// This is the dummy variant.
+       if (header->magic != 0x950412de) {
+               LYXERR0("Wrong magic number for file " << filen
+                       << ".\nExpected 0x950412de, got 0x" << std::hex << header->magic << std::dec);
+               return false;
+       }
 
-namespace lyx {
+       StringTable * orig = reinterpret_cast<StringTable *>(&moData[0] + header->O);
+       StringTable * trans = reinterpret_cast<StringTable *>(&moData[0] + header->T);
+       // First the header
+       if (doSwap) {
+               // Handle endiannness change
+               orig[0].swapEnd();
+               trans[0].swapEnd();
+       }
+       string const info = string(&moData[0] + trans[0].offset, trans[0].length);
+       size_t pos = info.find("charset=");
+       if (pos != string::npos) {
+               pos += 8;
+               string charset;
+               size_t pos2 = info.find("\n", pos);
+               if (pos2 == string::npos)
+                       charset = info.substr(pos);
+               else
+                       charset = info.substr(pos, pos2 - pos);
+               charset = ascii_lowercase(trim(charset));
+               if (charset != "utf-8") {
+                       LYXERR0("Wrong encoding " << charset << " for file " << filen);
+                       return false;
+               }
+       } else {
+               LYXERR0("Cannot find encoding encoding for file " << filen);
+               return false;
+       }
 
-Messages::Messages(string const & l) {}
+       for (size_t i = 1; i < header->N; ++i) {
+               if (doSwap) {
+                       // Handle endiannness change
+                       orig[i].swapEnd();
+                       trans[i].swapEnd();
+               }
+               // Note that in theory the strings may contain NUL characters.
+               // This may be the case with plural forms
+               string const ostr(&moData[0] + orig[i].offset, orig[i].length);
+               docstring tstr = from_utf8(string(&moData[0] + trans[i].offset,
+                                                 trans[i].length));
+               cleanTranslation(tstr);
+               trans_map_[ostr] = tstr;
+               //lyxerr << ostr << " ==> " << tstr << endl;
+       }
 
-void Messages::init()
-{
+       return true;
 }
 
-
 docstring const Messages::get(string const & m) const
 {
-       docstring trans = from_ascii(m);
-       cleanTranslation(trans);
-       return trans;
+       if (m.empty())
+               return docstring();
+
+       TranslationMap::const_iterator it = trans_map_.find(m);
+       if (it != trans_map_.end())
+               return it->second;
+       else {
+               docstring res = from_utf8(m);
+               cleanTranslation(res);
+               return res;
+       }
 }
 
 } // namespace lyx
 
-#endif
-
-#if 0
-
--#include <locale>
+#else // ENABLE_NLS
+// This is the dummy variant.
 
 namespace lyx {
 
-// This version of the Pimpl utilizes the message capability of
-// libstdc++ that is distributed with GNU G++.
-class Messages::Pimpl {
-public:
-       typedef std::messages<char>::catalog catalog;
-
-       Pimpl(string const & l)
-               : lang_(l),
-                 loc_gl(lang_.c_str()),
-                 mssg_gl(std::use_facet<std::messages<char> >(loc_gl))
-       {
-               //LYXERR("Messages: language(" << l << ") in dir(" << dir << ")");
+std::string Messages::gui_lang_;
 
-               string const locale_dir = package().locale_dir().toFilesystemEncoding();
-               cat_gl = mssg_gl.open(PACKAGE, loc_gl, locale_dir.c_str());
+Messages::Messages(string const & /* l */) {}
 
-       }
+docstring const Messages::get(string const & m) const
+{
+       docstring trans = from_ascii(m);
+       cleanTranslation(trans);
+       return trans;
+}
 
-       ~Pimpl()
-       {
-               mssg_gl.close(cat_gl);
-       }
+std::string Messages::language() const
+{
+       return string();
+}
 
-       docstring const get(string const & msg) const
-       {
-               return mssg_gl.get(cat_gl, 0, 0, msg);
-       }
-private:
-       ///
-       string lang_;
-       ///
-       std::locale loc_gl;
-       ///
-       std::messages<char> const & mssg_gl;
-       ///
-       catalog cat_gl;
-};
+bool Messages::available(string const & /* c */)
+{
+       return false;
+}
 
 } // namespace lyx