2 * This file is part of LyX, the document processor.
3 * Licence details can be found in the file COPYING.
5 * \author Lars Gullik Bjønnes
7 * Full author contact details are available in file CREDITS.
11 This is a limited parser for gettext's po files. Several features are
13 * encoding is supposed to be UTF-8 (the charset parameter is not honored)
14 * context is not handled (implemented differently in LyX)
15 * plural forms not implemented (not used for now in LyX).
16 * The byte endianness of the machine on which the .mo file have been
17 built is expected to be the same as the one of the machine where this
20 The data is loaded in a std::map object for simplicity.
24 Format of a MO file. Source: http://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
27 +------------------------------------------+
28 0 | magic number = 0x950412de |
30 4 | file format revision = 0 |
32 8 | number of strings | == N
34 12 | offset of table with original strings | == O
36 16 | offset of table with translation strings | == T
38 20 | size of hashing table | == S
40 24 | offset of hashing table | == H
43 . (possibly more entries later) .
46 O | length & offset 0th string ----------------.
47 O + 8 | length & offset 1st string ------------------.
49 O + ((N-1)*8)| length & offset (N-1)th string | | |
51 T | length & offset 0th translation ---------------.
52 T + 8 | length & offset 1st translation -----------------.
54 T + ((N-1)*8)| length & offset (N-1)th translation | | | | |
56 H | start hash table | | | | |
58 H + S * 4 | end hash table | | | | |
60 | NUL terminated 0th string <----------------' | | |
62 | NUL terminated 1st string <------------------' | |
66 | NUL terminated 0th translation <---------------' |
68 | NUL terminated 1st translation <-----------------'
72 +------------------------------------------+
78 #include "support/Messages.h"
80 #include "support/debug.h"
81 #include "support/docstring.h"
82 #include "support/lstrings.h"
83 #include "support/Package.h"
84 #include "support/unicode.h"
86 #include "support/lassert.h"
88 #include <boost/cstdint.hpp>
93 #ifdef HAVE_SYS_STAT_H
94 # include <sys/stat.h>
98 using boost::uint32_t;
102 void cleanTranslation(docstring & trans)
105 Some english words have different translations, depending on
106 context. In these cases the original string is augmented by
107 context information (e.g. "To:[[as in 'From page x to page
108 y']]" and "To:[[as in 'From format x to format y']]". Also,
109 when placeholders are used, the context can indicate what will
110 be substituted for the placeholder (e.g. "%1$s[[date]], %1$s
111 [[time]]). This means that we need to filter out everything
112 in double square brackets at the end of the string, otherwise
113 the user sees bogus messages. If we are unable to honour the
114 request we just return what we got in.
116 static docstring const ctx_start = from_ascii("[[");
117 static docstring const ctx_end = from_ascii("]]");
119 size_t const pos1 = trans.find(ctx_start);
120 if (pos1 != docstring::npos) {
121 size_t const pos2 = trans.find(ctx_end, pos1);
122 if (pos2 != docstring::npos) {
123 trans.erase(pos1, pos2 - pos1 + 2);
136 using namespace lyx::support;
140 std::string Messages::gui_lang_;
143 // This version use the traditional gettext.
144 Messages::Messages(string const & l)
147 // strip off any encoding suffix, i.e., assume 8-bit po files
148 size_t i = lang_.find(".");
149 lang_ = lang_.substr(0, i);
150 LYXERR(Debug::LOCALE, "language(" << lang_ << ")");
158 // Find the code we have for a given language code. Return empty if not found.
159 string realCode(string const & c)
161 // Qt tries to outsmart us and transforms en_US to C.
162 string code = (c == "C") ? "en" : c;
163 // this loops at most twice
165 if (package().messages_file(code).isReadableFile())
167 if (contains(code, '_'))
168 code = token(code, '_', 0);
177 bool Messages::available(string const & c)
179 return !realCode(c).empty();
183 string Messages::language() const
185 return realCode(lang_);
191 // magic number = 0x950412de
193 // file format revision = 0
197 // offset of table with original strings
199 // offset of table with translation strings
201 // there is a hashing table afterwrds, but we ignore it
214 bool Messages::readMoFile()
218 LYXERR0("No language given, nothing to load.");
222 string const code = realCode(lang_);
224 LYXERR0("Cannot find translation for language " << lang_);
228 string const filen = package().messages_file(code).toSafeFilesystemEncoding();
232 if (stat(filen.c_str(), &buf)) {
233 LYXERR0("Cannot get information for file " << filen);
237 vector<char> moData(buf.st_size);
239 ifstream is(filen.c_str(), ios::in | ios::binary);
240 if (!is.read(&moData[0], buf.st_size)) {
241 LYXERR0("Cannot read file " << filen);
245 MoHeader const * header = reinterpret_cast<MoHeader const *>(&moData[0]);
246 if (header->magic != 0x950412de) {
247 LYXERR0("Wrong magic number for file " << filen
248 << ".\nExpected 0x950412de, got " << std::hex << header->magic);
252 StringTable const * orig = reinterpret_cast<StringTable const *>(&moData[0] + header->O);
253 StringTable const * trans = reinterpret_cast<StringTable const *>(&moData[0] + header->T);
255 string const info = string(&moData[0] + trans[0].offset, trans[0].length);
256 size_t pos = info.find("charset=");
257 if (pos != string::npos) {
260 size_t pos2 = info.find("\n", pos);
261 if (pos2 == string::npos)
262 charset = info.substr(pos);
264 charset = info.substr(pos, pos2 - pos);
265 charset = ascii_lowercase(trim(charset));
266 if (charset != "utf-8") {
267 LYXERR0("Wrong encoding " << charset << " for file " << filen);
271 LYXERR0("Cannot find encoding encoding for file " << filen);
275 for (size_t i = 1; i < header->N; ++i) {
276 // Note that in theory the strings may contain NUL characters.
277 // This may be the case with plural forms
278 string const ostr(&moData[0] + orig[i].offset, orig[i].length);
279 docstring tstr = from_utf8(string(&moData[0] + trans[i].offset,
281 cleanTranslation(tstr);
282 trans_map_[ostr] = tstr;
283 //lyxerr << ostr << " ==> " << tstr << endl;
289 docstring const Messages::get(string const & m) const
294 TranslationMap::const_iterator it = trans_map_.find(m);
295 if (it != trans_map_.end())
298 docstring res = from_utf8(m);
299 cleanTranslation(res);
307 // This is the dummy variant.
311 Messages::Messages(string const & /* l */) {}
313 docstring const Messages::get(string const & m) const
315 docstring trans = from_ascii(m);
316 cleanTranslation(trans);
320 std::string Messages::language() const
325 bool Messages::available(string const & /* c */)