2 * This file is part of LyX, the document processor.
3 * Licence details can be found in the file COPYING.
5 * \author Lars Gullik Bjønnes
6 * \author Jean-Marc Lasgouttes
8 * Full author contact details are available in file CREDITS.
12 This contains a limited parser for gettext's mo files. Several features are
13 not implemented currently:
14 * encoding is supposed to be UTF-8 (the charset parameter is enforced)
15 * context is not handled (implemented differently in LyX)
16 * plural forms are not implemented (not used for now in LyX).
18 The data is loaded in a std::map object for simplicity.
22 Format of a MO file. Source: http://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
25 +------------------------------------------+
26 0 | magic number = 0x950412de |
28 4 | file format revision = 0 |
30 8 | number of strings | == N
32 12 | offset of table with original strings | == O
34 16 | offset of table with translation strings | == T
36 20 | size of hashing table | == S
38 24 | offset of hashing table | == H
41 . (possibly more entries later) .
44 O | length & offset 0th string ----------------.
45 O + 8 | length & offset 1st string ------------------.
47 O + ((N-1)*8)| length & offset (N-1)th string | | |
49 T | length & offset 0th translation ---------------.
50 T + 8 | length & offset 1st translation -----------------.
52 T + ((N-1)*8)| length & offset (N-1)th translation | | | | |
54 H | start hash table | | | | |
56 H + S * 4 | end hash table | | | | |
58 | NUL terminated 0th string <----------------' | | |
60 | NUL terminated 1st string <------------------' | |
64 | NUL terminated 0th translation <---------------' |
66 | NUL terminated 1st translation <-----------------'
70 +------------------------------------------+
76 #include "support/Messages.h"
78 #include "support/debug.h"
79 #include "support/docstring.h"
80 #include "support/lstrings.h"
81 #include "support/Package.h"
82 #include "support/unicode.h"
84 #include "support/lassert.h"
86 #include <boost/cstdint.hpp>
92 #ifdef HAVE_SYS_STAT_H
93 # include <sys/stat.h>
97 using boost::uint32_t;
101 void cleanTranslation(docstring & trans)
104 Some english words have different translations, depending on
105 context. In these cases the original string is augmented by
106 context information (e.g. "To:[[as in 'From page x to page
107 y']]" and "To:[[as in 'From format x to format y']]". Also,
108 when placeholders are used, the context can indicate what will
109 be substituted for the placeholder (e.g. "%1$s[[date]], %1$s
110 [[time]]). This means that we need to filter out everything
111 in double square brackets at the end of the string, otherwise
112 the user sees bogus messages. If we are unable to honour the
113 request we just return what we got in.
115 static docstring const ctx_start = from_ascii("[[");
116 static docstring const ctx_end = from_ascii("]]");
118 size_t const pos1 = trans.find(ctx_start);
119 if (pos1 != docstring::npos) {
120 size_t const pos2 = trans.find(ctx_end, pos1);
121 if (pos2 != docstring::npos) {
122 trans.erase(pos1, pos2 - pos1 + 2);
135 using namespace lyx::support;
139 std::string Messages::gui_lang_;
142 Messages::Messages(string const & l)
145 // strip off any encoding suffix, i.e., assume 8-bit po files
146 size_t i = lang_.find(".");
147 lang_ = lang_.substr(0, i);
148 LYXERR(Debug::LOCALE, "language(" << lang_ << ")");
156 // Find the code we have for a given language code. Return empty if not found.
157 string realCode(string code)
159 // this loops at most twice
161 if (package().messages_file(code).isReadableFile())
163 if (contains(code, '_'))
164 code = token(code, '_', 0);
173 bool Messages::available(string const & c)
175 return !realCode(c).empty();
179 string Messages::language() const
181 return realCode(lang_);
186 void swapInt(uint32_t & number)
188 unsigned char * num_ar = reinterpret_cast<unsigned char *>(&number);
189 swap(num_ar[0], num_ar[3]);
190 swap(num_ar[1], num_ar[2]);
196 // magic number = 0x950412de
198 // file format revision = 0
202 // offset of table with original strings
204 // offset of table with translation strings
206 // there is a hash table afterwards, but we ignore it
208 // Change the endianness of header data
213 void MoHeader::swapEnd()
229 // Change the endianness of string stable data
234 void StringTable::swapEnd()
243 bool Messages::readMoFile()
247 LYXERR0("No language given, nothing to load.");
251 string const code = realCode(lang_);
253 LYXERR0("Cannot find translation for language " << lang_);
257 string const filen = package().messages_file(code).toSafeFilesystemEncoding();
261 if (stat(filen.c_str(), &buf)) {
262 LYXERR0("Cannot get information for file " << filen);
266 vector<char> moData(buf.st_size);
268 ifstream is(filen.c_str(), ios::in | ios::binary);
269 if (!is.read(&moData[0], buf.st_size)) {
270 LYXERR0("Cannot read file " << filen);
274 MoHeader * header = reinterpret_cast<MoHeader *>(&moData[0]);
277 if (header->magic == 0xde120495) {
282 if (header->magic != 0x950412de) {
283 LYXERR0("Wrong magic number for file " << filen
284 << ".\nExpected 0x950412de, got 0x" << std::hex << header->magic);
288 StringTable * orig = reinterpret_cast<StringTable *>(&moData[0] + header->O);
289 StringTable * trans = reinterpret_cast<StringTable *>(&moData[0] + header->T);
292 // Handle endiannness change
296 string const info = string(&moData[0] + trans[0].offset, trans[0].length);
297 size_t pos = info.find("charset=");
298 if (pos != string::npos) {
301 size_t pos2 = info.find("\n", pos);
302 if (pos2 == string::npos)
303 charset = info.substr(pos);
305 charset = info.substr(pos, pos2 - pos);
306 charset = ascii_lowercase(trim(charset));
307 if (charset != "utf-8") {
308 LYXERR0("Wrong encoding " << charset << " for file " << filen);
312 LYXERR0("Cannot find encoding encoding for file " << filen);
316 for (size_t i = 1; i < header->N; ++i) {
318 // Handle endiannness change
322 // Note that in theory the strings may contain NUL characters.
323 // This may be the case with plural forms
324 string const ostr(&moData[0] + orig[i].offset, orig[i].length);
325 docstring tstr = from_utf8(string(&moData[0] + trans[i].offset,
327 cleanTranslation(tstr);
328 trans_map_[ostr] = tstr;
329 //lyxerr << ostr << " ==> " << tstr << endl;
335 docstring const Messages::get(string const & m) const
340 TranslationMap::const_iterator it = trans_map_.find(m);
341 if (it != trans_map_.end())
344 docstring res = from_utf8(m);
345 cleanTranslation(res);
353 // This is the dummy variant.
357 std::string Messages::gui_lang_;
359 Messages::Messages(string const & /* l */) {}
361 docstring const Messages::get(string const & m) const
363 docstring trans = from_ascii(m);
364 cleanTranslation(trans);
368 std::string Messages::language() const
373 bool Messages::available(string const & /* c */)