2 * This file is part of LyX, the document processor.
3 * Licence details can be found in the file COPYING.
5 * \author Lars Gullik Bjønnes
7 * Full author contact details are available in file CREDITS.
11 This is a limited parser for gettext's po files. Several features are
13 * encoding is supposed to be UTF-8 (the charset parameter is not honored)
14 * context is not handled (implemented differently in LyX)
15 * plural forms not implemented (not used for now in LyX).
16 * The byte endianness of the machine on which the .mo file have been
17 built is expected to be the same as the one of the machine where this
20 The data is loaded in a std::map object for simplicity.
24 Format of a MO file. Source: http://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
27 +------------------------------------------+
28 0 | magic number = 0x950412de |
30 4 | file format revision = 0 |
32 8 | number of strings | == N
34 12 | offset of table with original strings | == O
36 16 | offset of table with translation strings | == T
38 20 | size of hashing table | == S
40 24 | offset of hashing table | == H
43 . (possibly more entries later) .
46 O | length & offset 0th string ----------------.
47 O + 8 | length & offset 1st string ------------------.
49 O + ((N-1)*8)| length & offset (N-1)th string | | |
51 T | length & offset 0th translation ---------------.
52 T + 8 | length & offset 1st translation -----------------.
54 T + ((N-1)*8)| length & offset (N-1)th translation | | | | |
56 H | start hash table | | | | |
58 H + S * 4 | end hash table | | | | |
60 | NUL terminated 0th string <----------------' | | |
62 | NUL terminated 1st string <------------------' | |
66 | NUL terminated 0th translation <---------------' |
68 | NUL terminated 1st translation <-----------------'
72 +------------------------------------------+
78 #include "support/Messages.h"
80 #include "support/debug.h"
81 #include "support/docstring.h"
82 #include "support/lstrings.h"
83 #include "support/Package.h"
84 #include "support/unicode.h"
86 #include "support/lassert.h"
88 #include <boost/cstdint.hpp>
93 #ifdef HAVE_SYS_STAT_H
94 # include <sys/stat.h>
98 using boost::uint32_t;
102 void cleanTranslation(docstring & trans)
105 Some english words have different translations, depending on
106 context. In these cases the original string is augmented by
107 context information (e.g. "To:[[as in 'From page x to page
108 y']]" and "To:[[as in 'From format x to format y']]". Also,
109 when placeholders are used, the context can indicate what will
110 be substituted for the placeholder (e.g. "%1$s[[date]], %1$s
111 [[time]]). This means that we need to filter out everything
112 in double square brackets at the end of the string, otherwise
113 the user sees bogus messages. If we are unable to honour the
114 request we just return what we got in.
116 static docstring const ctx_start = from_ascii("[[");
117 static docstring const ctx_end = from_ascii("]]");
119 size_t const pos1 = trans.find(ctx_start);
120 if (pos1 != docstring::npos) {
121 size_t const pos2 = trans.find(ctx_end, pos1);
122 if (pos2 != docstring::npos) {
123 trans.erase(pos1, pos2 - pos1 + 2);
136 using namespace lyx::support;
140 std::string Messages::gui_lang_;
143 Messages::Messages(string const & l)
146 // strip off any encoding suffix, i.e., assume 8-bit po files
147 size_t i = lang_.find(".");
148 lang_ = lang_.substr(0, i);
149 LYXERR(Debug::LOCALE, "language(" << lang_ << ")");
157 // Find the code we have for a given language code. Return empty if not found.
158 string realCode(string const & c)
160 // Qt tries to outsmart us and transforms en_US to C.
161 string code = (c == "C") ? "en" : c;
162 // this loops at most twice
164 if (package().messages_file(code).isReadableFile())
166 if (contains(code, '_'))
167 code = token(code, '_', 0);
176 bool Messages::available(string const & c)
178 return !realCode(c).empty();
182 string Messages::language() const
184 return realCode(lang_);
190 // magic number = 0x950412de
192 // file format revision = 0
196 // offset of table with original strings
198 // offset of table with translation strings
200 // there is a hashing table afterwrds, but we ignore it
213 bool Messages::readMoFile()
217 LYXERR0("No language given, nothing to load.");
221 string const code = realCode(lang_);
223 LYXERR0("Cannot find translation for language " << lang_);
227 string const filen = package().messages_file(code).toSafeFilesystemEncoding();
231 if (stat(filen.c_str(), &buf)) {
232 LYXERR0("Cannot get information for file " << filen);
236 vector<char> moData(buf.st_size);
238 ifstream is(filen.c_str(), ios::in | ios::binary);
239 if (!is.read(&moData[0], buf.st_size)) {
240 LYXERR0("Cannot read file " << filen);
244 MoHeader const * header = reinterpret_cast<MoHeader const *>(&moData[0]);
245 if (header->magic != 0x950412de) {
246 LYXERR0("Wrong magic number for file " << filen
247 << ".\nExpected 0x950412de, got " << std::hex << header->magic);
251 StringTable const * orig = reinterpret_cast<StringTable const *>(&moData[0] + header->O);
252 StringTable const * trans = reinterpret_cast<StringTable const *>(&moData[0] + header->T);
254 string const info = string(&moData[0] + trans[0].offset, trans[0].length);
255 size_t pos = info.find("charset=");
256 if (pos != string::npos) {
259 size_t pos2 = info.find("\n", pos);
260 if (pos2 == string::npos)
261 charset = info.substr(pos);
263 charset = info.substr(pos, pos2 - pos);
264 charset = ascii_lowercase(trim(charset));
265 if (charset != "utf-8") {
266 LYXERR0("Wrong encoding " << charset << " for file " << filen);
270 LYXERR0("Cannot find encoding encoding for file " << filen);
274 for (size_t i = 1; i < header->N; ++i) {
275 // Note that in theory the strings may contain NUL characters.
276 // This may be the case with plural forms
277 string const ostr(&moData[0] + orig[i].offset, orig[i].length);
278 docstring tstr = from_utf8(string(&moData[0] + trans[i].offset,
280 cleanTranslation(tstr);
281 trans_map_[ostr] = tstr;
282 //lyxerr << ostr << " ==> " << tstr << endl;
288 docstring const Messages::get(string const & m) const
293 TranslationMap::const_iterator it = trans_map_.find(m);
294 if (it != trans_map_.end())
297 docstring res = from_utf8(m);
298 cleanTranslation(res);
306 // This is the dummy variant.
310 Messages::Messages(string const & /* l */) {}
312 docstring const Messages::get(string const & m) const
314 docstring trans = from_ascii(m);
315 cleanTranslation(trans);
319 std::string Messages::language() const
324 bool Messages::available(string const & /* c */)