2 * This file is part of LyX, the document processor.
3 * Licence details can be found in the file COPYING.
5 * \author Lars Gullik Bjønnes
6 * \author Jean-Marc Lasgouttes
8 * Full author contact details are available in file CREDITS.
12 This contains a limited parser for gettext's mo files. Several features are
13 not implemented currently:
14 * encoding is supposed to be UTF-8 (the charset parameter is enforced)
15 * context is not handled (implemented differently in LyX)
16 * plural forms are not implemented (not used for now in LyX).
18 The data is loaded in a std::map object for simplicity.
22 Format of a MO file. Source: http://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
25 +------------------------------------------+
26 0 | magic number = 0x950412de |
28 4 | file format revision = 0 |
30 8 | number of strings | == N
32 12 | offset of table with original strings | == O
34 16 | offset of table with translation strings | == T
36 20 | size of hashing table | == S
38 24 | offset of hashing table | == H
41 . (possibly more entries later) .
44 O | length & offset 0th string ----------------.
45 O + 8 | length & offset 1st string ------------------.
47 O + ((N-1)*8)| length & offset (N-1)th string | | |
49 T | length & offset 0th translation ---------------.
50 T + 8 | length & offset 1st translation -----------------.
52 T + ((N-1)*8)| length & offset (N-1)th translation | | | | |
54 H | start hash table | | | | |
56 H + S * 4 | end hash table | | | | |
58 | NUL terminated 0th string <----------------' | | |
60 | NUL terminated 1st string <------------------' | |
64 | NUL terminated 0th translation <---------------' |
66 | NUL terminated 1st translation <-----------------'
70 +------------------------------------------+
76 #include "support/Messages.h"
78 #include "support/debug.h"
79 #include "support/docstring.h"
80 #include "support/lstrings.h"
81 #include "support/Package.h"
82 #include "support/unicode.h"
84 #include "support/lassert.h"
91 #ifdef HAVE_SYS_STAT_H
92 # include <sys/stat.h>
99 void cleanTranslation(docstring & trans)
102 Some english words have different translations, depending on
103 context. In these cases the original string is augmented by
104 context information (e.g. "To:[[as in 'From page x to page
105 y']]" and "To:[[as in 'From format x to format y']]". Also,
106 when placeholders are used, the context can indicate what will
107 be substituted for the placeholder (e.g. "%1$s[[date]], %1$s
108 [[time]]). This means that we need to filter out everything
109 in double square brackets at the end of the string, otherwise
110 the user sees bogus messages. If we are unable to honour the
111 request we just return what we got in.
113 static docstring const ctx_start = from_ascii("[[");
114 static docstring const ctx_end = from_ascii("]]");
116 size_t const pos1 = trans.find(ctx_start);
117 if (pos1 != docstring::npos) {
118 size_t const pos2 = trans.find(ctx_end, pos1);
119 if (pos2 != docstring::npos) {
120 trans.erase(pos1, pos2 - pos1 + 2);
133 using namespace lyx::support;
137 std::string Messages::gui_lang_;
140 Messages::Messages(string const & l)
143 // strip off any encoding suffix, i.e., assume 8-bit po files
144 size_t i = lang_.find(".");
145 lang_ = lang_.substr(0, i);
146 LYXERR(Debug::LOCALE, "language(" << lang_ << ")");
154 // Find the code we have for a given language code. Return empty if not found.
155 string realCode(string code)
157 // this loops at most twice
159 if (package().messages_file(code).isReadableFile())
161 if (contains(code, '_'))
162 code = token(code, '_', 0);
171 bool Messages::available(string const & c)
173 return !realCode(c).empty();
177 string Messages::language() const
179 return realCode(lang_);
184 void swapInt(uint32_t & number)
186 unsigned char * num_ar = reinterpret_cast<unsigned char *>(&number);
187 swap(num_ar[0], num_ar[3]);
188 swap(num_ar[1], num_ar[2]);
194 // magic number = 0x950412de
196 // file format revision = 0
200 // offset of table with original strings
202 // offset of table with translation strings
204 // there is a hash table afterwards, but we ignore it
206 // Change the endianness of header data
211 void MoHeader::swapEnd()
227 // Change the endianness of string stable data
232 void StringTable::swapEnd()
241 bool Messages::readMoFile()
245 LYXERR0("No language given, nothing to load.");
249 string const code = realCode(lang_);
251 LYXERR(Debug::LOCALE, "Cannot find translation for language " << lang_);
255 string const filen = package().messages_file(code).toSafeFilesystemEncoding();
259 if (stat(filen.c_str(), &buf)) {
260 LYXERR0("Cannot get information for file " << filen);
264 vector<char> moData(buf.st_size);
266 ifstream is(filen.c_str(), ios::in | ios::binary);
267 if (!is.read(&moData[0], buf.st_size)) {
268 LYXERR0("Cannot read file " << filen);
272 MoHeader * header = reinterpret_cast<MoHeader *>(&moData[0]);
275 if (header->magic == 0xde120495) {
280 if (header->magic != 0x950412de) {
281 LYXERR0("Wrong magic number for file " << filen
282 << ".\nExpected 0x950412de, got 0x" << std::hex << header->magic << std::dec);
286 StringTable * orig = reinterpret_cast<StringTable *>(&moData[0] + header->O);
287 StringTable * trans = reinterpret_cast<StringTable *>(&moData[0] + header->T);
290 // Handle endiannness change
294 string const info = string(&moData[0] + trans[0].offset, trans[0].length);
295 size_t pos = info.find("charset=");
296 if (pos != string::npos) {
299 size_t pos2 = info.find("\n", pos);
300 if (pos2 == string::npos)
301 charset = info.substr(pos);
303 charset = info.substr(pos, pos2 - pos);
304 charset = ascii_lowercase(trim(charset));
305 if (charset != "utf-8") {
306 LYXERR0("Wrong encoding " << charset << " for file " << filen);
310 LYXERR0("Cannot find encoding encoding for file " << filen);
314 for (size_t i = 1; i < header->N; ++i) {
316 // Handle endiannness change
320 // Note that in theory the strings may contain NUL characters.
321 // This may be the case with plural forms
322 string const ostr(&moData[0] + orig[i].offset, orig[i].length);
323 docstring tstr = from_utf8(string(&moData[0] + trans[i].offset,
325 cleanTranslation(tstr);
326 trans_map_[ostr] = tstr;
327 //lyxerr << ostr << " ==> " << tstr << endl;
333 docstring const Messages::get(string const & m) const
338 TranslationMap::const_iterator it = trans_map_.find(m);
339 if (it != trans_map_.end())
342 docstring res = from_utf8(m);
343 cleanTranslation(res);
349 docstring const Messages::getIfFound(string const & m) const
354 TranslationMap::const_iterator it = trans_map_.find(m);
355 if (it != trans_map_.end())
364 // This is the dummy variant.
368 std::string Messages::gui_lang_;
370 Messages::Messages(string const & /* l */) {}
372 docstring const Messages::get(string const & m) const
374 docstring trans = from_ascii(m);
375 cleanTranslation(trans);
379 std::string Messages::language() const
384 bool Messages::available(string const & /* c */)
389 docstring const Messages::getIfFound(string const & /* m */) const