X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Fsupport%2FMessages.cpp;h=da1c3cc0b8fc14241fb654be683409a75aa8d978;hb=268ae66e3c7df04effc329373dc887715d9c6f06;hp=0aa29d84e0290caecd3b0e36e865c0a637c29d51;hpb=58f11263559b0119bae320e4cc5496292da4c387;p=lyx.git diff --git a/src/support/Messages.cpp b/src/support/Messages.cpp index 0aa29d84e0..da1c3cc0b8 100644 --- a/src/support/Messages.cpp +++ b/src/support/Messages.cpp @@ -3,284 +3,377 @@ * Licence details can be found in the file COPYING. * * \author Lars Gullik Bjønnes + * \author Jean-Marc Lasgouttes * * Full author contact details are available in file CREDITS. */ +/* + This contains a limited parser for gettext's mo files. Several features are + not implemented currently: + * encoding is supposed to be UTF-8 (the charset parameter is enforced) + * context is not handled (implemented differently in LyX) + * plural forms are not implemented (not used for now in LyX). + + The data is loaded in a std::map object for simplicity. + */ + +/* + Format of a MO file. Source: http://www.gnu.org/software/gettext/manual/html_node/MO-Files.html + + byte + +------------------------------------------+ + 0 | magic number = 0x950412de | + | | + 4 | file format revision = 0 | + | | + 8 | number of strings | == N + | | + 12 | offset of table with original strings | == O + | | + 16 | offset of table with translation strings | == T + | | + 20 | size of hashing table | == S + | | + 24 | offset of hashing table | == H + | | + . . + . (possibly more entries later) . + . . + | | + O | length & offset 0th string ----------------. + O + 8 | length & offset 1st string ------------------. + ... ... | | + O + ((N-1)*8)| length & offset (N-1)th string | | | + | | | | + T | length & offset 0th translation ---------------. + T + 8 | length & offset 1st translation -----------------. + ... ... | | | | + T + ((N-1)*8)| length & offset (N-1)th translation | | | | | + | | | | | | + H | start hash table | | | | | + ... ... | | | | + H + S * 4 | end hash table | | | | | + | | | | | | + | NUL terminated 0th string <----------------' | | | + | | | | | + | NUL terminated 1st string <------------------' | | + | | | | + ... ... | | + | | | | + | NUL terminated 0th translation <---------------' | + | | | + | NUL terminated 1st translation <-----------------' + | | + ... ... + | | + +------------------------------------------+ + + */ + #include #include "support/Messages.h" #include "support/debug.h" #include "support/docstring.h" -#include "support/environment.h" #include "support/lstrings.h" #include "support/Package.h" #include "support/unicode.h" #include "support/lassert.h" +#include + #include +#include +#include + +#ifdef HAVE_SYS_STAT_H +# include +#endif using namespace std; +using boost::uint32_t; namespace lyx { -// Instanciate static member. -string Messages::main_lang_; - -namespace { - -void cleanTranslation(docstring & trans) +void cleanTranslation(docstring & trans) { /* Some english words have different translations, depending on context. In these cases the original string is augmented by context information (e.g. "To:[[as in 'From page x to page - y']]" and "To:[[as in 'From format x to format y']]". This - means that we need to filter out everything in double square - brackets at the end of the string, otherwise the user sees - bogus messages. If we are unable to honour the request we - just return what we got in. + y']]" and "To:[[as in 'From format x to format y']]". Also, + when placeholders are used, the context can indicate what will + be substituted for the placeholder (e.g. "%1$s[[date]], %1$s + [[time]]). This means that we need to filter out everything + in double square brackets at the end of the string, otherwise + the user sees bogus messages. If we are unable to honour the + request we just return what we got in. */ - size_t const pos1 = trans.find(from_ascii("[[")); - if (pos1 != docstring::npos) { - size_t const pos2 = trans.find(from_ascii("]]"), pos1); - if (pos2 != docstring::npos) - trans.erase(pos1, pos2 - pos1 + 2); + static docstring const ctx_start = from_ascii("[["); + static docstring const ctx_end = from_ascii("]]"); + while (true) { + size_t const pos1 = trans.find(ctx_start); + if (pos1 != docstring::npos) { + size_t const pos2 = trans.find(ctx_end, pos1); + if (pos2 != docstring::npos) { + trans.erase(pos1, pos2 - pos1 + 2); + continue; + } + } + break; } } -} // anonymous } // lyx #ifdef ENABLE_NLS -# ifdef HAVE_LOCALE_H -# include -# endif - -# if HAVE_GETTEXT -# include // use the header already in the system *EK* -# else -# include "../../intl/libintl.h" -# endif - using namespace lyx::support; namespace lyx { -void Messages::setDefaultLanguage() -{ - char const * env_lang[5] = {"LANGUAGE", "LC_ALL", "LC_MESSAGES", - "LC_MESSAGE", "LANG"}; - for (size_t i = 0; i != 5; ++i) { - string const lang = getEnv(env_lang[i]); - if (lang.empty()) - continue; - Messages::main_lang_ = lang; - return; - } - // Not found! - LYXERR(Debug::LOCALE, "Default language not found!"); -} +std::string Messages::gui_lang_; -// This version use the traditional gettext. Messages::Messages(string const & l) - : lang_(l), warned_(false) + : lang_(l) { // strip off any encoding suffix, i.e., assume 8-bit po files size_t i = lang_.find("."); lang_ = lang_.substr(0, i); LYXERR(Debug::LOCALE, "language(" << lang_ << ")"); + + readMoFile(); } -void Messages::init() +namespace { + +// Find the code we have for a given language code. Return empty if not found. +string realCode(string code) { - errno = 0; - string const locale_dir = package().locale_dir().toFilesystemEncoding(); - char const * c = bindtextdomain(PACKAGE, locale_dir.c_str()); - int e = errno; - if (e) { - LYXERR(Debug::LOCALE, "Error code: " << errno << '\n' - << "Directory : " << package().locale_dir().absFileName() << '\n' - << "Rtn value : " << c); + // this loops at most twice + while (true) { + if (package().messages_file(code).isReadableFile()) + return code; + if (contains(code, '_')) + code = token(code, '_', 0); + else + break; } + return string(); +} +} - if (!bind_textdomain_codeset(PACKAGE, ucs4_codeset)) { - LYXERR(Debug::LOCALE, "Error code: " << errno << '\n' - << "Codeset : " << ucs4_codeset); - } - textdomain(PACKAGE); +bool Messages::available(string const & c) +{ + return !realCode(c).empty(); +} + - // Reset default language; - setDefaultLanguage(); +string Messages::language() const +{ + return realCode(lang_); } +namespace { -bool Messages::available() const +void swapInt(uint32_t & number) { - string const test = languageTestString(); - string const trans = to_utf8(get(test)); - return !trans.empty() && trans != test; + unsigned char * num_ar = reinterpret_cast(&number); + swap(num_ar[0], num_ar[3]); + swap(num_ar[1], num_ar[2]); } -docstring const Messages::get(string const & m) const +struct MoHeader { - if (m.empty()) - return docstring(); + // magic number = 0x950412de + uint32_t magic; + // file format revision = 0 + uint32_t rev; + // number of strings + uint32_t N; + // offset of table with original strings + uint32_t O; + // offset of table with translation strings + uint32_t T; + // there is a hash table afterwards, but we ignore it + + // Change the endianness of header data + void swapEnd(); +}; - // Look for the translated string in the cache. - TranslationCache::iterator it = cache_.find(m); - if (it != cache_.end()) - return it->second; - // The string was not found, use gettext to generate it - static string oldLC_ALL; - static string oldLANGUAGE; - if (!lang_.empty()) { - oldLC_ALL = getEnv("LC_ALL"); - // This GNU extension overrides any language locale - // wrt gettext. - LYXERR(Debug::LOCALE, "Setting LANGUAGE to " << lang_); - oldLANGUAGE = getEnv("LANGUAGE"); - if (!setEnv("LANGUAGE", lang_)) - LYXERR(Debug::LOCALE, "\t... failed!"); - // However, setting LANGUAGE does nothing when the - // locale is "C". Therefore we set the locale to - // something that is believed to exist on most - // systems. The idea is that one should be able to - // load German documents even without having de_DE - // installed. - LYXERR(Debug::LOCALE, "Setting LC_ALL to en_US"); - if (!setEnv("LC_ALL", "en_US")) - LYXERR(Debug::LOCALE, "\t... failed!"); -#ifdef HAVE_LC_MESSAGES - setlocale(LC_MESSAGES, ""); -#endif +void MoHeader::swapEnd() +{ + swapInt(magic); + swapInt(rev); + swapInt(N); + swapInt(O); + swapInt(T); +} + +struct StringTable +{ + // string length + uint32_t length; + // string offset + uint32_t offset; + + // Change the endianness of string stable data + void swapEnd(); +}; + + +void StringTable::swapEnd() +{ + swapInt(length); + swapInt(offset); +} + + +} // namespace anon + +bool Messages::readMoFile() +{ + // FIXME:remove + if (lang_.empty()) { + LYXERR0("No language given, nothing to load."); + return false; } - char const * m_c = m.c_str(); - char const * trans_c = gettext(m_c); - docstring trans; - if (!trans_c) { - LYXERR(Debug::LOCALE, "Undefined result from gettext for `" << m << "'."); - trans = from_ascii(m); - } else if (trans_c == m_c) { - //LYXERR(Debug::LOCALE, "Same as entered returned"); - trans = from_ascii(m); - } else { - //LYXERR(Debug::LOCALE, "We got a translation"); - // m is actually not a char const * but ucs4 data - trans = reinterpret_cast(trans_c); + string const code = realCode(lang_); + if (code.empty()) { + LYXERR(Debug::LOCALE, "Cannot find translation for language " << lang_); + return false; } - cleanTranslation(trans); + string const filen = package().messages_file(code).toSafeFilesystemEncoding(); - // Reset environment variables as they were. - if (!lang_.empty()) { - // Reset everything as it was. - LYXERR(Debug::LOCALE, "restoring LANGUAGE from " - << getEnv("LANGUAGE") - << " to " << oldLANGUAGE); - if (!setEnv("LANGUAGE", oldLANGUAGE)) - LYXERR(Debug::LOCALE, "\t... failed!"); - LYXERR(Debug::LOCALE, "restoring LC_ALL from " << getEnv("LC_ALL") - << " to " << oldLC_ALL); - if (!setEnv("LC_ALL", oldLC_ALL)) - LYXERR(Debug::LOCALE, "\t... failed!"); -#ifdef HAVE_LC_MESSAGES - setlocale(LC_MESSAGES, ""); -#endif + // get file size + struct stat buf; + if (stat(filen.c_str(), &buf)) { + LYXERR0("Cannot get information for file " << filen); + return false; } - pair result = - cache_.insert(make_pair(m, trans)); + vector moData(buf.st_size); - LASSERT(result.second, /**/); + ifstream is(filen.c_str(), ios::in | ios::binary); + if (!is.read(&moData[0], buf.st_size)) { + LYXERR0("Cannot read file " << filen); + return false; + } - return result.first->second; -} + MoHeader * header = reinterpret_cast(&moData[0]); -} // namespace lyx + bool doSwap = false; + if (header->magic == 0xde120495) { + header->swapEnd(); + doSwap = true; + } -#else // ENABLE_NLS -// This is the dummy variant. + if (header->magic != 0x950412de) { + LYXERR0("Wrong magic number for file " << filen + << ".\nExpected 0x950412de, got 0x" << std::hex << header->magic << std::dec); + return false; + } -namespace lyx { + StringTable * orig = reinterpret_cast(&moData[0] + header->O); + StringTable * trans = reinterpret_cast(&moData[0] + header->T); + // First the header + if (doSwap) { + // Handle endiannness change + orig[0].swapEnd(); + trans[0].swapEnd(); + } + string const info = string(&moData[0] + trans[0].offset, trans[0].length); + size_t pos = info.find("charset="); + if (pos != string::npos) { + pos += 8; + string charset; + size_t pos2 = info.find("\n", pos); + if (pos2 == string::npos) + charset = info.substr(pos); + else + charset = info.substr(pos, pos2 - pos); + charset = ascii_lowercase(trim(charset)); + if (charset != "utf-8") { + LYXERR0("Wrong encoding " << charset << " for file " << filen); + return false; + } + } else { + LYXERR0("Cannot find encoding encoding for file " << filen); + return false; + } -Messages::Messages(string const & /* l */) {} + for (size_t i = 1; i < header->N; ++i) { + if (doSwap) { + // Handle endiannness change + orig[i].swapEnd(); + trans[i].swapEnd(); + } + // Note that in theory the strings may contain NUL characters. + // This may be the case with plural forms + string const ostr(&moData[0] + orig[i].offset, orig[i].length); + docstring tstr = from_utf8(string(&moData[0] + trans[i].offset, + trans[i].length)); + cleanTranslation(tstr); + trans_map_[ostr] = tstr; + //lyxerr << ostr << " ==> " << tstr << endl; + } -void Messages::init() -{ + return true; } - docstring const Messages::get(string const & m) const { - docstring trans = from_ascii(m); - cleanTranslation(trans); - return trans; -} - + if (m.empty()) + return docstring(); -bool Messages::available() const -{ - return true; + TranslationMap::const_iterator it = trans_map_.find(m); + if (it != trans_map_.end()) + return it->second; + else { + docstring res = from_utf8(m); + cleanTranslation(res); + return res; + } } } // namespace lyx -#endif - -#if 0 - --#include +#else // ENABLE_NLS +// This is the dummy variant. namespace lyx { -// This version of the Pimpl utilizes the message capability of -// libstdc++ that is distributed with GNU G++. -class Messages::Pimpl { -public: - typedef messages::catalog catalog; - - Pimpl(string const & l) - : lang_(l), - loc_gl(lang_.c_str()), - mssg_gl(use_facet >(loc_gl)) - { - //LYXERR("Messages: language(" << l << ") in dir(" << dir << ")"); +std::string Messages::gui_lang_; - string const locale_dir = package().locale_dir().toFilesystemEncoding(); - cat_gl = mssg_gl.open(PACKAGE, loc_gl, locale_dir.c_str()); +Messages::Messages(string const & /* l */) {} - } +docstring const Messages::get(string const & m) const +{ + docstring trans = from_ascii(m); + cleanTranslation(trans); + return trans; +} - ~Pimpl() - { - mssg_gl.close(cat_gl); - } +std::string Messages::language() const +{ + return string(); +} - docstring const get(string const & msg) const - { - return mssg_gl.get(cat_gl, 0, 0, msg); - } -private: - /// - string lang_; - /// - locale loc_gl; - /// - messages const & mssg_gl; - /// - catalog cat_gl; -}; +bool Messages::available(string const & /* c */) +{ + return false; +} } // namespace lyx