* Licence details can be found in the file COPYING.
*
* \author Lars Gullik Bjønnes
+ * \author Jean-Marc Lasgouttes
*
* Full author contact details are available in file CREDITS.
*/
+/*
+ This contains a limited parser for gettext's mo files. Several features are
+ not implemented currently:
+ * encoding is supposed to be UTF-8 (the charset parameter is enforced)
+ * context is not handled (implemented differently in LyX)
+ * plural forms are not implemented (not used for now in LyX).
+
+ The data is loaded in a std::map object for simplicity.
+ */
+
+/*
+ Format of a MO file. Source: http://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
+
+ byte
+ +------------------------------------------+
+ 0 | magic number = 0x950412de |
+ | |
+ 4 | file format revision = 0 |
+ | |
+ 8 | number of strings | == N
+ | |
+ 12 | offset of table with original strings | == O
+ | |
+ 16 | offset of table with translation strings | == T
+ | |
+ 20 | size of hashing table | == S
+ | |
+ 24 | offset of hashing table | == H
+ | |
+ . .
+ . (possibly more entries later) .
+ . .
+ | |
+ O | length & offset 0th string ----------------.
+ O + 8 | length & offset 1st string ------------------.
+ ... ... | |
+ O + ((N-1)*8)| length & offset (N-1)th string | | |
+ | | | |
+ T | length & offset 0th translation ---------------.
+ T + 8 | length & offset 1st translation -----------------.
+ ... ... | | | |
+ T + ((N-1)*8)| length & offset (N-1)th translation | | | | |
+ | | | | | |
+ H | start hash table | | | | |
+ ... ... | | | |
+ H + S * 4 | end hash table | | | | |
+ | | | | | |
+ | NUL terminated 0th string <----------------' | | |
+ | | | | |
+ | NUL terminated 1st string <------------------' | |
+ | | | |
+ ... ... | |
+ | | | |
+ | NUL terminated 0th translation <---------------' |
+ | | |
+ | NUL terminated 1st translation <-----------------'
+ | |
+ ... ...
+ | |
+ +------------------------------------------+
+
+ */
+
#include <config.h>
#include "support/Messages.h"
#include "support/debug.h"
#include "support/docstring.h"
-#include "support/environment.h"
#include "support/lstrings.h"
#include "support/Package.h"
#include "support/unicode.h"
#include "support/lassert.h"
#include <cerrno>
+#include <cstdint>
+#include <fstream>
+#include <utility>
-# define N_(str) (str) // for marking strings to be translated
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
using namespace std;
namespace lyx {
-void cleanTranslation(docstring & trans)
+void cleanTranslation(docstring & trans)
{
/*
Some english words have different translations, depending on
context. In these cases the original string is augmented by
context information (e.g. "To:[[as in 'From page x to page
- y']]" and "To:[[as in 'From format x to format y']]". This
- means that we need to filter out everything in double square
- brackets at the end of the string, otherwise the user sees
- bogus messages. If we are unable to honour the request we
- just return what we got in.
+ y']]" and "To:[[as in 'From format x to format y']]". Also,
+ when placeholders are used, the context can indicate what will
+ be substituted for the placeholder (e.g. "%1$s[[date]], %1$s
+ [[time]]). This means that we need to filter out everything
+ in double square brackets at the end of the string, otherwise
+ the user sees bogus messages. If we are unable to honour the
+ request we just return what we got in.
*/
- size_t const pos1 = trans.find(from_ascii("[["));
- if (pos1 != docstring::npos) {
- size_t const pos2 = trans.find(from_ascii("]]"), pos1);
- if (pos2 != docstring::npos)
- trans.erase(pos1, pos2 - pos1 + 2);
+ static docstring const ctx_start = from_ascii("[[");
+ static docstring const ctx_end = from_ascii("]]");
+ while (true) {
+ size_t const pos1 = trans.find(ctx_start);
+ if (pos1 != docstring::npos) {
+ size_t const pos2 = trans.find(ctx_end, pos1);
+ if (pos2 != docstring::npos) {
+ trans.erase(pos1, pos2 - pos1 + 2);
+ continue;
+ }
+ }
+ break;
}
}
-} // lyx
+} // namespace lyx
#ifdef ENABLE_NLS
-# ifdef HAVE_LOCALE_H
-# include <locale.h>
-# endif
-
-# if HAVE_GETTEXT
-# include <libintl.h> // use the header already in the system *EK*
-# else
-# include "intl/libintl.h"
-# endif
-
using namespace lyx::support;
namespace lyx {
-// This version use the traditional gettext.
+std::string Messages::gui_lang_;
+
+
Messages::Messages(string const & l)
: lang_(l)
{
size_t i = lang_.find(".");
lang_ = lang_.substr(0, i);
LYXERR(Debug::LOCALE, "language(" << lang_ << ")");
+
+ readMoFile();
}
-void Messages::init()
+namespace {
+
+// Find the code we have for a given language code. Return empty if not found.
+string realCode(string code)
{
- errno = 0;
- string const locale_dir = package().locale_dir().toFilesystemEncoding();
- char const * c = bindtextdomain(PACKAGE, locale_dir.c_str());
- int e = errno;
- if (e) {
- LYXERR(Debug::LOCALE, "Error code: " << errno << '\n'
- << "Directory : " << package().locale_dir().absFileName() << '\n'
- << "Rtn value : " << c);
+ // this loops at most twice
+ while (true) {
+ if (package().messages_file(code).isReadableFile())
+ return code;
+ if (contains(code, '_'))
+ code = token(code, '_', 0);
+ else
+ break;
}
+ return string();
+}
+} // namespace
- if (!bind_textdomain_codeset(PACKAGE, ucs4_codeset)) {
- LYXERR(Debug::LOCALE, "Error code: " << errno << '\n'
- << "Codeset : " << ucs4_codeset);
- }
- textdomain(PACKAGE);
+bool Messages::available(string const & c)
+{
+ return !realCode(c).empty();
}
string Messages::language() const
{
- // get the language from the gmo file
- string const test = N_("[[Replace with the code of your language]]");
- string const trans = to_utf8(get(test));
- if (trans == test) {
- LYXERR0("Something is weird.");
- return string();
- } else
- return trans;
+ return realCode(lang_);
}
+namespace {
-bool Messages::available(string const & c)
+void swapInt(uint32_t & number)
{
- static string locale_dir = package().locale_dir().toFilesystemEncoding();
- string code = c;
- // this loops at most twice
- while (true) {
- string const filen = locale_dir + "/" + code
- + "/LC_MESSAGES/" PACKAGE ".mo";
- if (FileName(filen).isReadableFile())
- return true;
- if (contains(code, '_'))
- code = token(code, '_', 0);
- else return false;
- }
- return false;
-
+ unsigned char * num_ar = reinterpret_cast<unsigned char *>(&number);
+ swap(num_ar[0], num_ar[3]);
+ swap(num_ar[1], num_ar[2]);
}
-namespace {
-// Trivial wrapper around gettext()
-docstring const getText(string const & m)
+struct MoHeader
{
- // FIXME: gettext sometimes "forgets" the ucs4_codeset we set
- // in init(), which leads to severe message corruption (#7371)
- // We set it again here unconditionally. A real fix must be found!
- LATTEST(bind_textdomain_codeset(PACKAGE, ucs4_codeset));
-
- char const * m_c = m.c_str();
- char const * trans_c = gettext(m_c);
- docstring trans;
- if (!trans_c) {
- LYXERR(Debug::LOCALE, "Undefined result from gettext for `" << m << "'.");
- trans = from_ascii(m);
- } else if (trans_c == m_c) {
- //LYXERR(Debug::LOCALE, "Same as entered returned");
- trans = from_ascii(m);
- } else {
- //LYXERR(Debug::LOCALE, "We got a translation");
- // m is actually not a char const * but ucs4 data
- trans = reinterpret_cast<char_type const *>(trans_c);
- }
+ // magic number = 0x950412de
+ uint32_t magic;
+ // file format revision = 0
+ uint32_t rev;
+ // number of strings
+ uint32_t N;
+ // offset of table with original strings
+ uint32_t O;
+ // offset of table with translation strings
+ uint32_t T;
+ // there is a hash table afterwards, but we ignore it
+
+ // Change the endianness of header data
+ void swapEnd();
+};
- cleanTranslation(trans);
- return trans;
+void MoHeader::swapEnd()
+{
+ swapInt(magic);
+ swapInt(rev);
+ swapInt(N);
+ swapInt(O);
+ swapInt(T);
}
+struct StringTable
+{
+ // string length
+ uint32_t length;
+ // string offset
+ uint32_t offset;
+
+ // Change the endianness of string stable data
+ void swapEnd();
+};
+
+
+void StringTable::swapEnd()
+{
+ swapInt(length);
+ swapInt(offset);
}
+} // namespace
+
+bool Messages::readMoFile()
+{
+ // FIXME:remove
+ if (lang_.empty()) {
+ LYXERR0("No language given, nothing to load.");
+ return false;
+ }
+
+ string const code = realCode(lang_);
+ if (code.empty()) {
+ LYXERR(Debug::LOCALE, "Cannot find translation for language " << lang_);
+ return false;
+ }
+
+ string const filen = package().messages_file(code).toSafeFilesystemEncoding();
+
+ // get file size
+ struct stat buf;
+ if (stat(filen.c_str(), &buf)) {
+ LYXERR0("Cannot get information for file " << filen);
+ return false;
+ }
+
+ vector<char> moData(buf.st_size);
+
+ ifstream is(filen.c_str(), ios::in | ios::binary);
+ if (!is.read(&moData[0], buf.st_size)) {
+ LYXERR0("Cannot read file " << filen);
+ return false;
+ }
+
+ MoHeader * header = reinterpret_cast<MoHeader *>(&moData[0]);
+
+ bool doSwap = false;
+ if (header->magic == 0xde120495) {
+ header->swapEnd();
+ doSwap = true;
+ }
+
+ if (header->magic != 0x950412de) {
+ LYXERR0("Wrong magic number for file " << filen
+ << ".\nExpected 0x950412de, got 0x" << std::hex << header->magic << std::dec);
+ return false;
+ }
+
+ StringTable * orig = reinterpret_cast<StringTable *>(&moData[0] + header->O);
+ StringTable * trans = reinterpret_cast<StringTable *>(&moData[0] + header->T);
+ // First the header
+ if (doSwap) {
+ // Handle endiannness change
+ orig[0].swapEnd();
+ trans[0].swapEnd();
+ }
+ string const info = string(&moData[0] + trans[0].offset, trans[0].length);
+ size_t pos = info.find("charset=");
+ if (pos != string::npos) {
+ pos += 8;
+ string charset;
+ size_t pos2 = info.find("\n", pos);
+ if (pos2 == string::npos)
+ charset = info.substr(pos);
+ else
+ charset = info.substr(pos, pos2 - pos);
+ charset = ascii_lowercase(trim(charset));
+ if (charset != "utf-8") {
+ LYXERR0("Wrong encoding " << charset << " for file " << filen);
+ return false;
+ }
+ } else {
+ LYXERR0("Cannot find encoding encoding for file " << filen);
+ return false;
+ }
+
+ for (size_t i = 1; i < header->N; ++i) {
+ if (doSwap) {
+ // Handle endiannness change
+ orig[i].swapEnd();
+ trans[i].swapEnd();
+ }
+ // Note that in theory the strings may contain NUL characters.
+ // This may be the case with plural forms
+ string const ostr(&moData[0] + orig[i].offset, orig[i].length);
+ docstring tstr = from_utf8(string(&moData[0] + trans[i].offset,
+ trans[i].length));
+ cleanTranslation(tstr);
+ trans_map_[ostr] = tstr;
+ //lyxerr << ostr << " ==> " << tstr << endl;
+ }
+
+ return true;
+}
+
docstring const Messages::get(string const & m) const
{
if (m.empty())
return docstring();
- // Look for the translated string in the cache.
- TranslationCache::iterator it = cache_.find(m);
- if (it != cache_.end())
+ TranslationMap::const_iterator it = trans_map_.find(m);
+ if (it != trans_map_.end())
return it->second;
+ else {
+ docstring res = from_utf8(m);
+ cleanTranslation(res);
+ return res;
+ }
+}
- // The string was not found, use gettext to generate it
- docstring trans;
- if (!lang_.empty()) {
- // This GNU extension overrides any language locale
- // wrt gettext.
- LYXERR(Debug::LOCALE, "Setting LANGUAGE to " << lang_);
- EnvChanger language_chg("LANGUAGE", lang_);
- // However, setting LANGUAGE does nothing when the
- // locale is "C". Therefore we set the locale to
- // something that is believed to exist on most
- // systems. The idea is that one should be able to
- // load German documents even without having de_DE
- // installed.
- LYXERR(Debug::LOCALE, "Setting LC_ALL to en_US");
- EnvChanger lc_all_chg("LC_ALL", "en_US");
-#ifdef HAVE_LC_MESSAGES
- setlocale(LC_MESSAGES, "");
-#endif
- trans = getText(m);
- } else
- trans = getText(m);
-
-
-#ifdef HAVE_LC_MESSAGES
- setlocale(LC_MESSAGES, "");
-#endif
-
- // store translation in cache
- pair<TranslationCache::iterator, bool> result =
- cache_.insert(make_pair(m, trans));
- LASSERT(result.second, return from_utf8(m));
+docstring const Messages::getIfFound(string const & m) const
+{
+ if (m.empty())
+ return docstring();
- return result.first->second;
+ TranslationMap::const_iterator it = trans_map_.find(m);
+ if (it != trans_map_.end())
+ return it->second;
+ else
+ return docstring();
}
} // namespace lyx
namespace lyx {
-Messages::Messages(string const & /* l */) {}
-
-void Messages::init()
-{
-}
+std::string Messages::gui_lang_;
+Messages::Messages(string const & /* l */) {}
docstring const Messages::get(string const & m) const
{
}
std::string Messages::language() const
- {
- return string();
- }
+{
+ return string();
+}
bool Messages::available(string const & /* c */)
{
return false;
}
-} // namespace lyx
-
-#endif
-
-#if 0
-
--#include <locale>
-
-namespace lyx {
-
-// This version of the Pimpl utilizes the message capability of
-// libstdc++ that is distributed with GNU G++.
-class Messages::Pimpl {
-public:
- typedef messages<char>::catalog catalog;
-
- Pimpl(string const & l)
- : lang_(l),
- loc_gl(lang_.c_str()),
- mssg_gl(use_facet<messages<char> >(loc_gl))
- {
- //LYXERR("Messages: language(" << l << ") in dir(" << dir << ")");
-
- string const locale_dir = package().locale_dir().toFilesystemEncoding();
- cat_gl = mssg_gl.open(PACKAGE, loc_gl, locale_dir.c_str());
-
- }
-
- ~Pimpl()
- {
- mssg_gl.close(cat_gl);
- }
-
- docstring const get(string const & msg) const
- {
- return mssg_gl.get(cat_gl, 0, 0, msg);
- }
-private:
- ///
- string lang_;
- ///
- locale loc_gl;
- ///
- messages<char> const & mssg_gl;
- ///
- catalog cat_gl;
-};
+docstring const Messages::getIfFound(string const & /* m */) const
+{
+ return docstring();
+}
} // namespace lyx