* This file is part of LyX, the document processor.
* Licence details can be found in the file COPYING.
*
- * \author Lars Gullik Bjønnes
+ * \author Lars Gullik Bjønnes
+ * \author Jean-Marc Lasgouttes
*
* Full author contact details are available in file CREDITS.
*/
+/*
+ This contains a limited parser for gettext's mo files. Several features are
+ not implemented currently:
+ * encoding is supposed to be UTF-8 (the charset parameter is enforced)
+ * context is not handled (implemented differently in LyX)
+ * plural forms are not implemented (not used for now in LyX).
+
+ The data is loaded in a std::map object for simplicity.
+ */
+
+/*
+ Format of a MO file. Source: http://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
+
+ byte
+ +------------------------------------------+
+ 0 | magic number = 0x950412de |
+ | |
+ 4 | file format revision = 0 |
+ | |
+ 8 | number of strings | == N
+ | |
+ 12 | offset of table with original strings | == O
+ | |
+ 16 | offset of table with translation strings | == T
+ | |
+ 20 | size of hashing table | == S
+ | |
+ 24 | offset of hashing table | == H
+ | |
+ . .
+ . (possibly more entries later) .
+ . .
+ | |
+ O | length & offset 0th string ----------------.
+ O + 8 | length & offset 1st string ------------------.
+ ... ... | |
+ O + ((N-1)*8)| length & offset (N-1)th string | | |
+ | | | |
+ T | length & offset 0th translation ---------------.
+ T + 8 | length & offset 1st translation -----------------.
+ ... ... | | | |
+ T + ((N-1)*8)| length & offset (N-1)th translation | | | | |
+ | | | | | |
+ H | start hash table | | | | |
+ ... ... | | | |
+ H + S * 4 | end hash table | | | | |
+ | | | | | |
+ | NUL terminated 0th string <----------------' | | |
+ | | | | |
+ | NUL terminated 1st string <------------------' | |
+ | | | |
+ ... ... | |
+ | | | |
+ | NUL terminated 0th translation <---------------' |
+ | | |
+ | NUL terminated 1st translation <-----------------'
+ | |
+ ... ...
+ | |
+ +------------------------------------------+
+
+ */
+
#include <config.h>
#include "support/Messages.h"
#include "support/debug.h"
#include "support/docstring.h"
-#include "support/environment.h"
+#include "support/lstrings.h"
#include "support/Package.h"
#include "support/unicode.h"
-#include <boost/assert.hpp>
+#include "support/lassert.h"
#include <cerrno>
+#include <cstdint>
+#include <fstream>
+#include <utility>
-using namespace std;
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
-namespace {
+using namespace std;
-using lyx::docstring;
-using lyx::from_ascii;
+namespace lyx {
-void cleanTranslation(docstring & trans)
+void cleanTranslation(docstring & trans)
{
/*
Some english words have different translations, depending on
context. In these cases the original string is augmented by
context information (e.g. "To:[[as in 'From page x to page
- y']]" and "To:[[as in 'From format x to format y']]". This
- means that we need to filter out everything in double square
- brackets at the end of the string, otherwise the user sees
- bogus messages. If we are unable to honour the request we
- just return what we got in.
+ y']]" and "To:[[as in 'From format x to format y']]". Also,
+ when placeholders are used, the context can indicate what will
+ be substituted for the placeholder (e.g. "%1$s[[date]], %1$s
+ [[time]]). This means that we need to filter out everything
+ in double square brackets at the end of the string, otherwise
+ the user sees bogus messages. If we are unable to honour the
+ request we just return what we got in.
*/
- size_t const pos1 = trans.find(from_ascii("[["));
- if (pos1 != docstring::npos) {
- size_t const pos2 = trans.find(from_ascii("]]"), pos1);
- if (pos2 != docstring::npos)
- trans.erase(pos1, pos2 - pos1 + 2);
+ static docstring const ctx_start = from_ascii("[[");
+ static docstring const ctx_end = from_ascii("]]");
+ while (true) {
+ size_t const pos1 = trans.find(ctx_start);
+ if (pos1 != docstring::npos) {
+ size_t const pos2 = trans.find(ctx_end, pos1);
+ if (pos2 != docstring::npos) {
+ trans.erase(pos1, pos2 - pos1 + 2);
+ continue;
+ }
+ }
+ break;
}
}
-}
+} // namespace lyx
#ifdef ENABLE_NLS
-# ifdef HAVE_LOCALE_H
-# include <locale.h>
-# endif
-
-# if HAVE_GETTEXT
-# include <libintl.h> // use the header already in the system *EK*
-# else
-# include "../intl/libintl.h"
-# endif
-
using namespace lyx::support;
namespace lyx {
-// This version use the traditional gettext.
+std::string Messages::gui_lang_;
+
+
Messages::Messages(string const & l)
- : lang_(l), warned_(false)
+ : lang_(l)
{
// strip off any encoding suffix, i.e., assume 8-bit po files
size_t i = lang_.find(".");
lang_ = lang_.substr(0, i);
- LYXERR(Debug::DEBUG, "language(" << lang_ << ")");
+ LYXERR(Debug::LOCALE, "language(" << lang_ << ")");
+
+ readMoFile();
}
-void Messages::init()
+namespace {
+
+// Find the code we have for a given language code. Return empty if not found.
+string realCode(string code)
{
- errno = 0;
- string const locale_dir = package().locale_dir().toFilesystemEncoding();
- char const * c = bindtextdomain(PACKAGE, locale_dir.c_str());
- int e = errno;
- if (e) {
- LYXERR(Debug::DEBUG, "Error code: " << errno << '\n'
- << "Directory : " << package().locale_dir().absFilename() << '\n'
- << "Rtn value : " << c);
+ // this loops at most twice
+ while (true) {
+ if (package().messages_file(code).isReadableFile())
+ return code;
+ if (contains(code, '_'))
+ code = token(code, '_', 0);
+ else
+ break;
}
+ return string();
+}
+} // namespace
- if (!bind_textdomain_codeset(PACKAGE, ucs4_codeset)) {
- LYXERR(Debug::DEBUG, "Error code: " << errno << '\n'
- << "Codeset : " << ucs4_codeset);
- }
- textdomain(PACKAGE);
+bool Messages::available(string const & c)
+{
+ return !realCode(c).empty();
}
-docstring const Messages::get(string const & m) const
+string Messages::language() const
{
- if (m.empty())
- return docstring();
+ return realCode(lang_);
+}
- // Look for the translated string in the cache.
- TranslationCache::iterator it = cache_.find(m);
- if (it != cache_.end())
- return it->second;
+namespace {
- // The string was not found, use gettext to generate it
-
- static string const oldLANGUAGE = getEnv("LANGUAGE");
- static string const oldLC_ALL = getEnv("LC_ALL");
- if (!lang_.empty()) {
- // This GNU extension overrides any language locale
- // wrt gettext.
- setEnv("LANGUAGE", lang_);
- // However, setting LANGUAGE does nothing when the
- // locale is "C". Therefore we set the locale to
- // something that is believed to exist on most
- // systems. The idea is that one should be able to
- // load German documents even without having de_DE
- // installed.
- setEnv("LC_ALL", "en_US");
-#ifdef HAVE_LC_MESSAGES
- setlocale(LC_MESSAGES, "");
-#endif
+void swapInt(uint32_t & number)
+{
+ unsigned char * num_ar = reinterpret_cast<unsigned char *>(&number);
+ swap(num_ar[0], num_ar[3]);
+ swap(num_ar[1], num_ar[2]);
+}
+
+
+struct MoHeader
+{
+ // magic number = 0x950412de
+ uint32_t magic;
+ // file format revision = 0
+ uint32_t rev;
+ // number of strings
+ uint32_t N;
+ // offset of table with original strings
+ uint32_t O;
+ // offset of table with translation strings
+ uint32_t T;
+ // there is a hash table afterwards, but we ignore it
+
+ // Change the endianness of header data
+ void swapEnd();
+};
+
+
+void MoHeader::swapEnd()
+{
+ swapInt(magic);
+ swapInt(rev);
+ swapInt(N);
+ swapInt(O);
+ swapInt(T);
+}
+
+struct StringTable
+{
+ // string length
+ uint32_t length;
+ // string offset
+ uint32_t offset;
+
+ // Change the endianness of string stable data
+ void swapEnd();
+};
+
+
+void StringTable::swapEnd()
+{
+ swapInt(length);
+ swapInt(offset);
+}
+
+
+} // namespace
+
+bool Messages::readMoFile()
+{
+ // FIXME:remove
+ if (lang_.empty()) {
+ LYXERR0("No language given, nothing to load.");
+ return false;
}
- char const * m_c = m.c_str();
- char const * trans_c = gettext(m_c);
- docstring trans;
- if (!trans_c)
- LYXERR0("Undefined result from gettext");
- else if (trans_c == m_c) {
- LYXERR(Debug::DEBUG, "Same as entered returned");
- trans = from_ascii(m);
- } else {
- LYXERR(Debug::DEBUG, "We got a translation");
- // m is actually not a char const * but ucs4 data
- trans = reinterpret_cast<char_type const *>(trans_c);
+ string const code = realCode(lang_);
+ if (code.empty()) {
+ LYXERR(Debug::LOCALE, "Cannot find translation for language " << lang_);
+ return false;
}
- cleanTranslation(trans);
+ string const filen = package().messages_file(code).toSafeFilesystemEncoding();
- // Reset environment variables as they were.
- if (!lang_.empty()) {
- // Reset everything as it was.
- setEnv("LANGUAGE", oldLANGUAGE);
- setEnv("LC_ALL", oldLC_ALL);
-#ifdef HAVE_LC_MESSAGES
- setlocale(LC_MESSAGES, "");
-#endif
+ // get file size
+ struct stat buf;
+ if (stat(filen.c_str(), &buf)) {
+ LYXERR0("Cannot get information for file " << filen);
+ return false;
}
- pair<TranslationCache::iterator, bool> result =
- cache_.insert(make_pair(m, trans));
+ vector<char> moData(buf.st_size);
- BOOST_ASSERT(result.second);
+ ifstream is(filen.c_str(), ios::in | ios::binary);
+ if (!is.read(&moData[0], buf.st_size)) {
+ LYXERR0("Cannot read file " << filen);
+ return false;
+ }
- return result.first->second;
-}
+ MoHeader * header = reinterpret_cast<MoHeader *>(&moData[0]);
-} // namespace lyx
+ bool doSwap = false;
+ if (header->magic == 0xde120495) {
+ header->swapEnd();
+ doSwap = true;
+ }
-#else // ENABLE_NLS
-// This is the dummy variant.
+ if (header->magic != 0x950412de) {
+ LYXERR0("Wrong magic number for file " << filen
+ << ".\nExpected 0x950412de, got 0x" << std::hex << header->magic << std::dec);
+ return false;
+ }
-namespace lyx {
+ StringTable * orig = reinterpret_cast<StringTable *>(&moData[0] + header->O);
+ StringTable * trans = reinterpret_cast<StringTable *>(&moData[0] + header->T);
+ // First the header
+ if (doSwap) {
+ // Handle endiannness change
+ orig[0].swapEnd();
+ trans[0].swapEnd();
+ }
+ string const info = string(&moData[0] + trans[0].offset, trans[0].length);
+ size_t pos = info.find("charset=");
+ if (pos != string::npos) {
+ pos += 8;
+ string charset;
+ size_t pos2 = info.find("\n", pos);
+ if (pos2 == string::npos)
+ charset = info.substr(pos);
+ else
+ charset = info.substr(pos, pos2 - pos);
+ charset = ascii_lowercase(trim(charset));
+ if (charset != "utf-8") {
+ LYXERR0("Wrong encoding " << charset << " for file " << filen);
+ return false;
+ }
+ } else {
+ LYXERR0("Cannot find encoding encoding for file " << filen);
+ return false;
+ }
-Messages::Messages(string const & l) {}
+ for (size_t i = 1; i < header->N; ++i) {
+ if (doSwap) {
+ // Handle endiannness change
+ orig[i].swapEnd();
+ trans[i].swapEnd();
+ }
+ // Note that in theory the strings may contain NUL characters.
+ // This may be the case with plural forms
+ string const ostr(&moData[0] + orig[i].offset, orig[i].length);
+ docstring tstr = from_utf8(string(&moData[0] + trans[i].offset,
+ trans[i].length));
+ cleanTranslation(tstr);
+ trans_map_[ostr] = tstr;
+ //lyxerr << ostr << " ==> " << tstr << endl;
+ }
-void Messages::init()
-{
+ return true;
}
-
docstring const Messages::get(string const & m) const
{
- docstring trans = from_ascii(m);
- cleanTranslation(trans);
- return trans;
+ if (m.empty())
+ return docstring();
+
+ TranslationMap::const_iterator it = trans_map_.find(m);
+ if (it != trans_map_.end())
+ return it->second;
+ else {
+ docstring res = from_utf8(m);
+ cleanTranslation(res);
+ return res;
+ }
}
-} // namespace lyx
-#endif
+docstring const Messages::getIfFound(string const & m) const
+{
+ if (m.empty())
+ return docstring();
-#if 0
+ TranslationMap::const_iterator it = trans_map_.find(m);
+ if (it != trans_map_.end())
+ return it->second;
+ else
+ return docstring();
+}
--#include <locale>
+} // namespace lyx
+
+#else // ENABLE_NLS
+// This is the dummy variant.
namespace lyx {
-// This version of the Pimpl utilizes the message capability of
-// libstdc++ that is distributed with GNU G++.
-class Messages::Pimpl {
-public:
- typedef messages<char>::catalog catalog;
+std::string Messages::gui_lang_;
- Pimpl(string const & l)
- : lang_(l),
- loc_gl(lang_.c_str()),
- mssg_gl(use_facet<messages<char> >(loc_gl))
- {
- //LYXERR("Messages: language(" << l << ") in dir(" << dir << ")");
+Messages::Messages(string const & /* l */) {}
- string const locale_dir = package().locale_dir().toFilesystemEncoding();
- cat_gl = mssg_gl.open(PACKAGE, loc_gl, locale_dir.c_str());
+docstring const Messages::get(string const & m) const
+{
+ docstring trans = from_ascii(m);
+ cleanTranslation(trans);
+ return trans;
+}
- }
+std::string Messages::language() const
+{
+ return string();
+}
- ~Pimpl()
- {
- mssg_gl.close(cat_gl);
- }
+bool Messages::available(string const & /* c */)
+{
+ return false;
+}
- docstring const get(string const & msg) const
- {
- return mssg_gl.get(cat_gl, 0, 0, msg);
- }
-private:
- ///
- string lang_;
- ///
- locale loc_gl;
- ///
- messages<char> const & mssg_gl;
- ///
- catalog cat_gl;
-};
+docstring const Messages::getIfFound(string const & /* m */) const
+{
+ return docstring();
+}
} // namespace lyx