X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Fsupport%2FMessages.cpp;h=da1c3cc0b8fc14241fb654be683409a75aa8d978;hb=268ae66e3c7df04effc329373dc887715d9c6f06;hp=b86874c1780af8431c687f9f54c92fca5b4b220c;hpb=9d0ea8aeff32833a90b3fe64df0c5518a9e241be;p=lyx.git diff --git a/src/support/Messages.cpp b/src/support/Messages.cpp index b86874c178..da1c3cc0b8 100644 --- a/src/support/Messages.cpp +++ b/src/support/Messages.cpp @@ -2,242 +2,378 @@ * This file is part of LyX, the document processor. * Licence details can be found in the file COPYING. * - * \author Lars Gullik Bjønnes + * \author Lars Gullik Bjønnes + * \author Jean-Marc Lasgouttes * * Full author contact details are available in file CREDITS. */ +/* + This contains a limited parser for gettext's mo files. Several features are + not implemented currently: + * encoding is supposed to be UTF-8 (the charset parameter is enforced) + * context is not handled (implemented differently in LyX) + * plural forms are not implemented (not used for now in LyX). + + The data is loaded in a std::map object for simplicity. + */ + +/* + Format of a MO file. Source: http://www.gnu.org/software/gettext/manual/html_node/MO-Files.html + + byte + +------------------------------------------+ + 0 | magic number = 0x950412de | + | | + 4 | file format revision = 0 | + | | + 8 | number of strings | == N + | | + 12 | offset of table with original strings | == O + | | + 16 | offset of table with translation strings | == T + | | + 20 | size of hashing table | == S + | | + 24 | offset of hashing table | == H + | | + . . + . (possibly more entries later) . + . . + | | + O | length & offset 0th string ----------------. + O + 8 | length & offset 1st string ------------------. + ... ... | | + O + ((N-1)*8)| length & offset (N-1)th string | | | + | | | | + T | length & offset 0th translation ---------------. + T + 8 | length & offset 1st translation -----------------. + ... ... | | | | + T + ((N-1)*8)| length & offset (N-1)th translation | | | | | + | | | | | | + H | start hash table | | | | | + ... ... | | | | + H + S * 4 | end hash table | | | | | + | | | | | | + | NUL terminated 0th string <----------------' | | | + | | | | | + | NUL terminated 1st string <------------------' | | + | | | | + ... ... | | + | | | | + | NUL terminated 0th translation <---------------' | + | | | + | NUL terminated 1st translation <-----------------' + | | + ... ... + | | + +------------------------------------------+ + + */ + #include #include "support/Messages.h" #include "support/debug.h" #include "support/docstring.h" -#include "support/environment.h" +#include "support/lstrings.h" #include "support/Package.h" #include "support/unicode.h" -#include +#include "support/lassert.h" + +#include #include +#include +#include -using std::map; -using std::string; +#ifdef HAVE_SYS_STAT_H +# include +#endif -namespace { +using namespace std; +using boost::uint32_t; -using lyx::docstring; -using lyx::from_ascii; +namespace lyx { -void cleanTranslation(docstring & trans) +void cleanTranslation(docstring & trans) { /* Some english words have different translations, depending on context. In these cases the original string is augmented by context information (e.g. "To:[[as in 'From page x to page - y']]" and "To:[[as in 'From format x to format y']]". This - means that we need to filter out everything in double square - brackets at the end of the string, otherwise the user sees - bogus messages. If we are unable to honour the request we - just return what we got in. + y']]" and "To:[[as in 'From format x to format y']]". Also, + when placeholders are used, the context can indicate what will + be substituted for the placeholder (e.g. "%1$s[[date]], %1$s + [[time]]). This means that we need to filter out everything + in double square brackets at the end of the string, otherwise + the user sees bogus messages. If we are unable to honour the + request we just return what we got in. */ - size_t const pos1 = trans.find(from_ascii("[[")); - if (pos1 != docstring::npos) { - size_t const pos2 = trans.find(from_ascii("]]"), pos1); - if (pos2 != docstring::npos) - trans.erase(pos1, pos2 - pos1 + 2); + static docstring const ctx_start = from_ascii("[["); + static docstring const ctx_end = from_ascii("]]"); + while (true) { + size_t const pos1 = trans.find(ctx_start); + if (pos1 != docstring::npos) { + size_t const pos2 = trans.find(ctx_end, pos1); + if (pos2 != docstring::npos) { + trans.erase(pos1, pos2 - pos1 + 2); + continue; + } + } + break; } } -} +} // lyx #ifdef ENABLE_NLS -# ifdef HAVE_LOCALE_H -# include -# endif - -# if HAVE_GETTEXT -# include // use the header already in the system *EK* -# else -# include "../../intl/libintl.h" -# endif +using namespace lyx::support; namespace lyx { -using support::package; -using support::getEnv; -using support::setEnv; +std::string Messages::gui_lang_; -// This version use the traditional gettext. Messages::Messages(string const & l) - : lang_(l), warned_(false) + : lang_(l) { // strip off any encoding suffix, i.e., assume 8-bit po files - string::size_type i = lang_.find("."); + size_t i = lang_.find("."); lang_ = lang_.substr(0, i); - LYXERR(Debug::DEBUG, BOOST_CURRENT_FUNCTION - << ": language(" << lang_ << ")"); + LYXERR(Debug::LOCALE, "language(" << lang_ << ")"); + + readMoFile(); } -void Messages::init() +namespace { + +// Find the code we have for a given language code. Return empty if not found. +string realCode(string code) { - errno = 0; - string const locale_dir = package().locale_dir().toFilesystemEncoding(); - char const * c = bindtextdomain(PACKAGE, locale_dir.c_str()); - int e = errno; - if (e) { - LYXERR(Debug::DEBUG, BOOST_CURRENT_FUNCTION << '\n' - << "Error code: " << errno << '\n' - << "Directory : " << package().locale_dir().absFilename() << '\n' - << "Rtn value : " << c); + // this loops at most twice + while (true) { + if (package().messages_file(code).isReadableFile()) + return code; + if (contains(code, '_')) + code = token(code, '_', 0); + else + break; } + return string(); +} +} - if (!bind_textdomain_codeset(PACKAGE, ucs4_codeset)) { - LYXERR(Debug::DEBUG, BOOST_CURRENT_FUNCTION << '\n' - << "Error code: " << errno << '\n' - << "Codeset : " << ucs4_codeset << '\n'); - } - textdomain(PACKAGE); +bool Messages::available(string const & c) +{ + return !realCode(c).empty(); } -docstring const Messages::get(string const & m) const +string Messages::language() const { - if (m.empty()) - return docstring(); + return realCode(lang_); +} - // Look for the translated string in the cache. - TranslationCache::iterator it = cache_.find(m); - if (it != cache_.end()) - return it->second; +namespace { - // The string was not found, use gettext to generate it - - string const oldLANGUAGE = getEnv("LANGUAGE"); - string const oldLC_ALL = getEnv("LC_ALL"); - if (!lang_.empty()) { - // This GNU extension overrides any language locale - // wrt gettext. - setEnv("LANGUAGE", lang_); - // However, setting LANGUAGE does nothing when the - // locale is "C". Therefore we set the locale to - // something that is believed to exist on most - // systems. The idea is that one should be able to - // load German documents even without having de_DE - // installed. - setEnv("LC_ALL", "en_US"); -#ifdef HAVE_LC_MESSAGES - setlocale(LC_MESSAGES, ""); -#endif +void swapInt(uint32_t & number) +{ + unsigned char * num_ar = reinterpret_cast(&number); + swap(num_ar[0], num_ar[3]); + swap(num_ar[1], num_ar[2]); +} + + +struct MoHeader +{ + // magic number = 0x950412de + uint32_t magic; + // file format revision = 0 + uint32_t rev; + // number of strings + uint32_t N; + // offset of table with original strings + uint32_t O; + // offset of table with translation strings + uint32_t T; + // there is a hash table afterwards, but we ignore it + + // Change the endianness of header data + void swapEnd(); +}; + + +void MoHeader::swapEnd() +{ + swapInt(magic); + swapInt(rev); + swapInt(N); + swapInt(O); + swapInt(T); +} + +struct StringTable +{ + // string length + uint32_t length; + // string offset + uint32_t offset; + + // Change the endianness of string stable data + void swapEnd(); +}; + + +void StringTable::swapEnd() +{ + swapInt(length); + swapInt(offset); +} + + +} // namespace anon + +bool Messages::readMoFile() +{ + // FIXME:remove + if (lang_.empty()) { + LYXERR0("No language given, nothing to load."); + return false; } - char const * m_c = m.c_str(); - char const * trans_c = gettext(m_c); - docstring trans; - if (!trans_c) - LYXERR0("Undefined result from gettext"); - else if (trans_c == m_c) { - LYXERR(Debug::DEBUG, "Same as entered returned"); - trans = from_ascii(m); - } else { - LYXERR(Debug::DEBUG, "We got a translation"); - // m is actually not a char const * but ucs4 data - trans = reinterpret_cast(trans_c); + string const code = realCode(lang_); + if (code.empty()) { + LYXERR(Debug::LOCALE, "Cannot find translation for language " << lang_); + return false; } - cleanTranslation(trans); + string const filen = package().messages_file(code).toSafeFilesystemEncoding(); - // Reset environment variables as they were. - if (!lang_.empty()) { - // Reset everything as it was. - setEnv("LANGUAGE", oldLANGUAGE); - setEnv("LC_ALL", oldLC_ALL); -#ifdef HAVE_LC_MESSAGES - setlocale(LC_MESSAGES, ""); -#endif + // get file size + struct stat buf; + if (stat(filen.c_str(), &buf)) { + LYXERR0("Cannot get information for file " << filen); + return false; } - std::pair result = - cache_.insert(std::make_pair(m, trans)); + vector moData(buf.st_size); - BOOST_ASSERT(result.second); + ifstream is(filen.c_str(), ios::in | ios::binary); + if (!is.read(&moData[0], buf.st_size)) { + LYXERR0("Cannot read file " << filen); + return false; + } - return result.first->second; -} + MoHeader * header = reinterpret_cast(&moData[0]); -} // namespace lyx + bool doSwap = false; + if (header->magic == 0xde120495) { + header->swapEnd(); + doSwap = true; + } -#else // ENABLE_NLS -// This is the dummy variant. + if (header->magic != 0x950412de) { + LYXERR0("Wrong magic number for file " << filen + << ".\nExpected 0x950412de, got 0x" << std::hex << header->magic << std::dec); + return false; + } -namespace lyx { + StringTable * orig = reinterpret_cast(&moData[0] + header->O); + StringTable * trans = reinterpret_cast(&moData[0] + header->T); + // First the header + if (doSwap) { + // Handle endiannness change + orig[0].swapEnd(); + trans[0].swapEnd(); + } + string const info = string(&moData[0] + trans[0].offset, trans[0].length); + size_t pos = info.find("charset="); + if (pos != string::npos) { + pos += 8; + string charset; + size_t pos2 = info.find("\n", pos); + if (pos2 == string::npos) + charset = info.substr(pos); + else + charset = info.substr(pos, pos2 - pos); + charset = ascii_lowercase(trim(charset)); + if (charset != "utf-8") { + LYXERR0("Wrong encoding " << charset << " for file " << filen); + return false; + } + } else { + LYXERR0("Cannot find encoding encoding for file " << filen); + return false; + } -Messages::Messages(string const & l) {} + for (size_t i = 1; i < header->N; ++i) { + if (doSwap) { + // Handle endiannness change + orig[i].swapEnd(); + trans[i].swapEnd(); + } + // Note that in theory the strings may contain NUL characters. + // This may be the case with plural forms + string const ostr(&moData[0] + orig[i].offset, orig[i].length); + docstring tstr = from_utf8(string(&moData[0] + trans[i].offset, + trans[i].length)); + cleanTranslation(tstr); + trans_map_[ostr] = tstr; + //lyxerr << ostr << " ==> " << tstr << endl; + } -void Messages::init() -{ + return true; } - docstring const Messages::get(string const & m) const { - docstring trans = from_ascii(m); - cleanTranslation(trans); - return trans; + if (m.empty()) + return docstring(); + + TranslationMap::const_iterator it = trans_map_.find(m); + if (it != trans_map_.end()) + return it->second; + else { + docstring res = from_utf8(m); + cleanTranslation(res); + return res; + } } } // namespace lyx -#endif - -#if 0 - --#include +#else // ENABLE_NLS +// This is the dummy variant. namespace lyx { -// This version of the Pimpl utilizes the message capability of -// libstdc++ that is distributed with GNU G++. -class Messages::Pimpl { -public: - typedef std::messages::catalog catalog; - - Pimpl(string const & l) - : lang_(l), - loc_gl(lang_.c_str()), - mssg_gl(std::use_facet >(loc_gl)) - { - //LYXERR("Messages: language(" << l << ") in dir(" << dir << ")"); +std::string Messages::gui_lang_; - string const locale_dir = package().locale_dir().toFilesystemEncoding(); - cat_gl = mssg_gl.open(PACKAGE, loc_gl, locale_dir.c_str()); +Messages::Messages(string const & /* l */) {} - } +docstring const Messages::get(string const & m) const +{ + docstring trans = from_ascii(m); + cleanTranslation(trans); + return trans; +} - ~Pimpl() - { - mssg_gl.close(cat_gl); - } +std::string Messages::language() const +{ + return string(); +} - docstring const get(string const & msg) const - { - return mssg_gl.get(cat_gl, 0, 0, msg); - } -private: - /// - string lang_; - /// - std::locale loc_gl; - /// - std::messages const & mssg_gl; - /// - catalog cat_gl; -}; +bool Messages::available(string const & /* c */) +{ + return false; +} } // namespace lyx