2 * This file is part of LyX, the document processor.
3 * Licence details can be found in the file COPYING.
5 * \author Lars Gullik Bjønnes
7 * Full author contact details are available in file CREDITS.
11 This is a limited parser for gettext's po files. Several features are
13 * encoding is supposed to be UTF-8 (the charset parameter is not honored)
14 * context is not handled (implemented differently in LyX)
15 * plural forms not implemented (not used for now in LyX).
16 * The byte endianness of the machine on which the .mo file have been
17 built is expected to be the same as the one of the machine where this
20 The data is loaded in a std::map object for simplicity.
24 Format of a MO file. Source: http://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
27 +------------------------------------------+
28 0 | magic number = 0x950412de |
30 4 | file format revision = 0 |
32 8 | number of strings | == N
34 12 | offset of table with original strings | == O
36 16 | offset of table with translation strings | == T
38 20 | size of hashing table | == S
40 24 | offset of hashing table | == H
43 . (possibly more entries later) .
46 O | length & offset 0th string ----------------.
47 O + 8 | length & offset 1st string ------------------.
49 O + ((N-1)*8)| length & offset (N-1)th string | | |
51 T | length & offset 0th translation ---------------.
52 T + 8 | length & offset 1st translation -----------------.
54 T + ((N-1)*8)| length & offset (N-1)th translation | | | | |
56 H | start hash table | | | | |
58 H + S * 4 | end hash table | | | | |
60 | NUL terminated 0th string <----------------' | | |
62 | NUL terminated 1st string <------------------' | |
66 | NUL terminated 0th translation <---------------' |
68 | NUL terminated 1st translation <-----------------'
72 +------------------------------------------+
78 #include "support/Messages.h"
80 #include "support/debug.h"
81 #include "support/docstring.h"
82 #include "support/lstrings.h"
83 #include "support/Package.h"
84 #include "support/unicode.h"
86 #include "support/lassert.h"
88 #include <boost/cstdint.hpp>
93 #ifdef HAVE_SYS_STAT_H
94 # include <sys/stat.h>
98 using boost::uint32_t;
102 void cleanTranslation(docstring & trans)
105 Some english words have different translations, depending on
106 context. In these cases the original string is augmented by
107 context information (e.g. "To:[[as in 'From page x to page
108 y']]" and "To:[[as in 'From format x to format y']]". Also,
109 when placeholders are used, the context can indicate what will
110 be substituted for the placeholder (e.g. "%1$s[[date]], %1$s
111 [[time]]). This means that we need to filter out everything
112 in double square brackets at the end of the string, otherwise
113 the user sees bogus messages. If we are unable to honour the
114 request we just return what we got in.
116 static docstring const ctx_start = from_ascii("[[");
117 static docstring const ctx_end = from_ascii("]]");
119 size_t const pos1 = trans.find(ctx_start);
120 if (pos1 != docstring::npos) {
121 size_t const pos2 = trans.find(ctx_end, pos1);
122 if (pos2 != docstring::npos) {
123 trans.erase(pos1, pos2 - pos1 + 2);
136 using namespace lyx::support;
140 std::string Messages::gui_lang_;
143 Messages::Messages(string const & l)
146 // strip off any encoding suffix, i.e., assume 8-bit po files
147 size_t i = lang_.find(".");
148 lang_ = lang_.substr(0, i);
149 LYXERR(Debug::LOCALE, "language(" << lang_ << ")");
157 // Find the code we have for a given language code. Return empty if not found.
158 string realCode(string code)
160 // this loops at most twice
162 if (package().messages_file(code).isReadableFile())
164 if (contains(code, '_'))
165 code = token(code, '_', 0);
174 bool Messages::available(string const & c)
176 return !realCode(c).empty();
180 string Messages::language() const
182 return realCode(lang_);
188 // magic number = 0x950412de
190 // file format revision = 0
194 // offset of table with original strings
196 // offset of table with translation strings
198 // there is a hashing table afterwrds, but we ignore it
211 bool Messages::readMoFile()
215 LYXERR0("No language given, nothing to load.");
219 string const code = realCode(lang_);
221 LYXERR0("Cannot find translation for language " << lang_);
225 string const filen = package().messages_file(code).toSafeFilesystemEncoding();
229 if (stat(filen.c_str(), &buf)) {
230 LYXERR0("Cannot get information for file " << filen);
234 vector<char> moData(buf.st_size);
236 ifstream is(filen.c_str(), ios::in | ios::binary);
237 if (!is.read(&moData[0], buf.st_size)) {
238 LYXERR0("Cannot read file " << filen);
242 MoHeader const * header = reinterpret_cast<MoHeader const *>(&moData[0]);
243 if (header->magic != 0x950412de) {
244 LYXERR0("Wrong magic number for file " << filen
245 << ".\nExpected 0x950412de, got " << std::hex << header->magic);
249 StringTable const * orig = reinterpret_cast<StringTable const *>(&moData[0] + header->O);
250 StringTable const * trans = reinterpret_cast<StringTable const *>(&moData[0] + header->T);
252 string const info = string(&moData[0] + trans[0].offset, trans[0].length);
253 size_t pos = info.find("charset=");
254 if (pos != string::npos) {
257 size_t pos2 = info.find("\n", pos);
258 if (pos2 == string::npos)
259 charset = info.substr(pos);
261 charset = info.substr(pos, pos2 - pos);
262 charset = ascii_lowercase(trim(charset));
263 if (charset != "utf-8") {
264 LYXERR0("Wrong encoding " << charset << " for file " << filen);
268 LYXERR0("Cannot find encoding encoding for file " << filen);
272 for (size_t i = 1; i < header->N; ++i) {
273 // Note that in theory the strings may contain NUL characters.
274 // This may be the case with plural forms
275 string const ostr(&moData[0] + orig[i].offset, orig[i].length);
276 docstring tstr = from_utf8(string(&moData[0] + trans[i].offset,
278 cleanTranslation(tstr);
279 trans_map_[ostr] = tstr;
280 //lyxerr << ostr << " ==> " << tstr << endl;
286 docstring const Messages::get(string const & m) const
291 TranslationMap::const_iterator it = trans_map_.find(m);
292 if (it != trans_map_.end())
295 docstring res = from_utf8(m);
296 cleanTranslation(res);
304 // This is the dummy variant.
308 Messages::Messages(string const & /* l */) {}
310 docstring const Messages::get(string const & m) const
312 docstring trans = from_ascii(m);
313 cleanTranslation(trans);
317 std::string Messages::language() const
322 bool Messages::available(string const & /* c */)