2 * This file is part of LyX, the document processor.
3 * Licence details can be found in the file COPYING.
5 * \author Lars Gullik Bjønnes
7 * Full author contact details are available in file CREDITS.
11 This is a limited parser for gettext's po files. Several features are
13 * encoding is supposed to be UTF-8 (the charset parameter is not honored)
14 * context is not handled (implemented differently in LyX)
15 * plural forms not implemented (not used for now in LyX).
16 * The byte endianness of the machine on which the .mo file have been
17 built is expected to be the same as the one of the machine where this
20 The data is loaded in a std::map object for simplicity.
24 Format of a MO file. Source: http://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
27 +------------------------------------------+
28 0 | magic number = 0x950412de |
30 4 | file format revision = 0 |
32 8 | number of strings | == N
34 12 | offset of table with original strings | == O
36 16 | offset of table with translation strings | == T
38 20 | size of hashing table | == S
40 24 | offset of hashing table | == H
43 . (possibly more entries later) .
46 O | length & offset 0th string ----------------.
47 O + 8 | length & offset 1st string ------------------.
49 O + ((N-1)*8)| length & offset (N-1)th string | | |
51 T | length & offset 0th translation ---------------.
52 T + 8 | length & offset 1st translation -----------------.
54 T + ((N-1)*8)| length & offset (N-1)th translation | | | | |
56 H | start hash table | | | | |
58 H + S * 4 | end hash table | | | | |
60 | NUL terminated 0th string <----------------' | | |
62 | NUL terminated 1st string <------------------' | |
66 | NUL terminated 0th translation <---------------' |
68 | NUL terminated 1st translation <-----------------'
72 +------------------------------------------+
78 #include "support/Messages.h"
80 #include "support/debug.h"
81 #include "support/docstring.h"
82 #include "support/lstrings.h"
83 #include "support/Package.h"
84 #include "support/unicode.h"
86 #include "support/lassert.h"
88 #include <boost/cstdint.hpp>
93 #ifdef HAVE_SYS_STAT_H
94 # include <sys/stat.h>
98 using boost::uint32_t;
102 void cleanTranslation(docstring & trans)
105 Some english words have different translations, depending on
106 context. In these cases the original string is augmented by
107 context information (e.g. "To:[[as in 'From page x to page
108 y']]" and "To:[[as in 'From format x to format y']]". Also,
109 when placeholders are used, the context can indicate what will
110 be substituted for the placeholder (e.g. "%1$s[[date]], %1$s
111 [[time]]). This means that we need to filter out everything
112 in double square brackets at the end of the string, otherwise
113 the user sees bogus messages. If we are unable to honour the
114 request we just return what we got in.
116 static docstring const ctx_start = from_ascii("[[");
117 static docstring const ctx_end = from_ascii("]]");
119 size_t const pos1 = trans.find(ctx_start);
120 if (pos1 != docstring::npos) {
121 size_t const pos2 = trans.find(ctx_end, pos1);
122 if (pos2 != docstring::npos) {
123 trans.erase(pos1, pos2 - pos1 + 2);
136 using namespace lyx::support;
140 std::string Messages::gui_lang_;
143 // This version use the traditional gettext.
144 Messages::Messages(string const & l)
147 // strip off any encoding suffix, i.e., assume 8-bit po files
148 size_t i = lang_.find(".");
149 lang_ = lang_.substr(0, i);
150 LYXERR(Debug::LOCALE, "language(" << lang_ << ")");
158 string moFile(string const & c)
160 static string const locale_dir
161 = package().locale_dir().toFilesystemEncoding();
162 return locale_dir + "/" + c
163 + "/LC_MESSAGES/" PACKAGE ".mo";
167 // Find the code we have for a given language code. Return empty if not found.
168 string realCode(string const & c)
170 // Qt tries to outsmart us and transforms en_US to C.
171 string code = (c == "C") ? "en" : c;
172 // this loops at most twice
174 if (FileName(moFile(code)).isReadableFile())
176 if (contains(code, '_'))
177 code = token(code, '_', 0);
186 bool Messages::available(string const & c)
188 return !realCode(c).empty();
192 string Messages::language() const
194 return realCode(lang_);
200 // magic number = 0x950412de
202 // file format revision = 0
206 // offset of table with original strings
208 // offset of table with translation strings
210 // there is a hashing table afterwrds, but we ignore it
223 bool Messages::readMoFile()
227 LYXERR0("No language given, nothing to load.");
231 string const code = realCode(lang_);
233 LYXERR0("Cannot find translation for language " << lang_);
237 string const filen = moFile(code);
241 if (stat(filen.c_str(), &buf)) {
242 LYXERR0("Cannot get information for file " << filen);
246 vector<char> moData(buf.st_size);
248 ifstream is(filen.c_str(), ios::in | ios::binary);
249 if (!is.read(&moData[0], buf.st_size)) {
250 LYXERR0("Cannot read file " << filen);
254 MoHeader const * header = reinterpret_cast<MoHeader const *>(&moData[0]);
255 if (header->magic != 0x950412de) {
256 LYXERR0("Wrong magic number for file " << filen
257 << ".\nExpected 0x950412de, got " << std::hex << header->magic);
261 StringTable const * orig = reinterpret_cast<StringTable const *>(&moData[0] + header->O);
262 StringTable const * trans = reinterpret_cast<StringTable const *>(&moData[0] + header->T);
264 string const info = string(&moData[0] + trans[0].offset, trans[0].length);
265 size_t pos = info.find("charset=");
266 if (pos != string::npos) {
269 size_t pos2 = info.find("\n", pos);
270 if (pos2 == string::npos)
271 charset = info.substr(pos);
273 charset = info.substr(pos, pos2 - pos);
274 charset = ascii_lowercase(trim(charset));
275 if (charset != "utf-8") {
276 LYXERR0("Wrong encoding " << charset << " for file " << filen);
280 LYXERR0("Cannot find encoding encoding for file " << filen);
284 for (size_t i = 1; i < header->N; ++i) {
285 // Note that in theory the strings may contain NUL characters.
286 // This may be the case with plural forms
287 string const ostr(&moData[0] + orig[i].offset, orig[i].length);
288 docstring tstr = from_utf8(string(&moData[0] + trans[i].offset,
290 cleanTranslation(tstr);
291 trans_map_[ostr] = tstr;
292 //lyxerr << ostr << " ==> " << tstr << endl;
298 docstring const Messages::get(string const & m) const
303 TranslationMap::const_iterator it = trans_map_.find(m);
304 if (it != trans_map_.end())
307 docstring res = from_utf8(m);
308 cleanTranslation(res);
316 // This is the dummy variant.
320 Messages::Messages(string const & /* l */) {}
322 void Messages::init()
327 docstring const Messages::get(string const & m) const
329 docstring trans = from_ascii(m);
330 cleanTranslation(trans);
334 std::string Messages::language() const
339 bool Messages::available(string const & /* c */)