X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Fsupport%2FMessages.cpp;h=da1c3cc0b8fc14241fb654be683409a75aa8d978;hb=268ae66e3c7df04effc329373dc887715d9c6f06;hp=b86874c1780af8431c687f9f54c92fca5b4b220c;hpb=9d0ea8aeff32833a90b3fe64df0c5518a9e241be;p=lyx.git

diff --git a/src/support/Messages.cpp b/src/support/Messages.cpp
index b86874c178..da1c3cc0b8 100644
--- a/src/support/Messages.cpp
+++ b/src/support/Messages.cpp
@@ -2,242 +2,378 @@
  * This file is part of LyX, the document processor.
  * Licence details can be found in the file COPYING.
  *
- * \author Lars Gullik Bjønnes
+ * \author Lars Gullik BjÃ¸nnes
+ * \author Jean-Marc Lasgouttes
  *
  * Full author contact details are available in file CREDITS.
  */
 
+/*
+  This contains a limited parser for gettext's mo files. Several features are
+  not implemented currently:
+   * encoding is supposed to be UTF-8 (the charset parameter is enforced)
+   * context is not handled (implemented differently in LyX)
+   * plural forms are not implemented (not used for now in LyX).
+
+  The data is loaded in a std::map object for simplicity.
+ */
+
+/*
+  Format of a MO file. Source: http://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
+
+             byte
+                  +------------------------------------------+
+               0  | magic number = 0x950412de                |
+                  |                                          |
+               4  | file format revision = 0                 |
+                  |                                          |
+               8  | number of strings                        |  == N
+                  |                                          |
+              12  | offset of table with original strings    |  == O
+                  |                                          |
+              16  | offset of table with translation strings |  == T
+                  |                                          |
+              20  | size of hashing table                    |  == S
+                  |                                          |
+              24  | offset of hashing table                  |  == H
+                  |                                          |
+                  .                                          .
+                  .    (possibly more entries later)         .
+                  .                                          .
+                  |                                          |
+               O  | length & offset 0th string  ----------------.
+           O + 8  | length & offset 1st string  ------------------.
+                   ...                                    ...   | |
+     O + ((N-1)*8)| length & offset (N-1)th string           |  | |
+                  |                                          |  | |
+               T  | length & offset 0th translation  ---------------.
+           T + 8  | length & offset 1st translation  -----------------.
+                   ...                                    ...   | | | |
+     T + ((N-1)*8)| length & offset (N-1)th translation      |  | | | |
+                  |                                          |  | | | |
+               H  | start hash table                         |  | | | |
+                   ...                                    ...   | | | |
+       H + S * 4  | end hash table                           |  | | | |
+                  |                                          |  | | | |
+                  | NUL terminated 0th string  <----------------' | | |
+                  |                                          |    | | |
+                  | NUL terminated 1st string  <------------------' | |
+                  |                                          |      | |
+                   ...                                    ...       | |
+                  |                                          |      | |
+                  | NUL terminated 0th translation  <---------------' |
+                  |                                          |        |
+                  | NUL terminated 1st translation  <-----------------'
+                  |                                          |
+                   ...                                    ...
+                  |                                          |
+                  +------------------------------------------+
+
+ */
+
 #include <config.h>
 
 #include "support/Messages.h"
 
 #include "support/debug.h"
 #include "support/docstring.h"
-#include "support/environment.h"
+#include "support/lstrings.h"
 #include "support/Package.h"
 #include "support/unicode.h"
 
-#include <boost/current_function.hpp>
+#include "support/lassert.h"
+
+#include <boost/cstdint.hpp>
 
 #include <cerrno>
+#include <fstream>
+#include <utility>
 
-using std::map;
-using std::string;
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
 
-namespace {
+using namespace std;
+using boost::uint32_t;
 
-using lyx::docstring;
-using lyx::from_ascii;
+namespace lyx {
 
-void cleanTranslation(docstring & trans) 
+void cleanTranslation(docstring & trans)
 {
 	/*
 	  Some english words have different translations, depending on
 	  context. In these cases the original string is augmented by
 	  context information (e.g. "To:[[as in 'From page x to page
-	  y']]" and "To:[[as in 'From format x to format y']]". This
-	  means that we need to filter out everything in double square
-	  brackets at the end of the string, otherwise the user sees
-	  bogus messages. If we are unable to honour the request we
-	  just return what we got in.
+	  y']]" and "To:[[as in 'From format x to format y']]". Also,
+	  when placeholders are used, the context can indicate what will
+	  be substituted for the placeholder (e.g. "%1$s[[date]], %1$s
+	  [[time]]). This means that we need to filter out everything
+	  in double square brackets at the end of the string, otherwise
+	  the user sees bogus messages. If we are unable to honour the
+	  request we just return what we got in.
 	*/
-	size_t const pos1 = trans.find(from_ascii("[["));
-	if (pos1 != docstring::npos) {
-		size_t const pos2 = trans.find(from_ascii("]]"), pos1);
-		if (pos2 != docstring::npos) 
-			trans.erase(pos1, pos2 - pos1 + 2);
+	static docstring const ctx_start = from_ascii("[[");
+	static docstring const ctx_end = from_ascii("]]");
+	while (true) {
+		size_t const pos1 = trans.find(ctx_start);
+		if (pos1 != docstring::npos) {
+			size_t const pos2 = trans.find(ctx_end, pos1);
+			if (pos2 != docstring::npos) {
+				trans.erase(pos1, pos2 - pos1 + 2);
+				continue;
+			}
+		}
+		break;
 	}
 }
 
-}
+} // lyx
 
 
 #ifdef ENABLE_NLS
 
-#  ifdef HAVE_LOCALE_H
-#    include <locale.h>
-#  endif
-
-#  if HAVE_GETTEXT
-#    include <libintl.h>      // use the header already in the system *EK*
-#  else
-#    include "../../intl/libintl.h"
-#  endif
+using namespace lyx::support;
 
 namespace lyx {
 
-using support::package;
-using support::getEnv;
-using support::setEnv;
+std::string Messages::gui_lang_;
 
 
-// This version use the traditional gettext.
 Messages::Messages(string const & l)
-	: lang_(l), warned_(false)
+	: lang_(l)
 {
 	// strip off any encoding suffix, i.e., assume 8-bit po files
-	string::size_type i = lang_.find(".");
+	size_t i = lang_.find(".");
 	lang_ = lang_.substr(0, i);
-	LYXERR(Debug::DEBUG, BOOST_CURRENT_FUNCTION
-		<< ": language(" << lang_ << ")");
+	LYXERR(Debug::LOCALE, "language(" << lang_ << ")");
+
+	readMoFile();
 }
 
 
-void Messages::init()
+namespace {
+
+// Find the code we have for a given language code. Return empty if not found.
+string realCode(string code)
 {
-	errno = 0;
-	string const locale_dir = package().locale_dir().toFilesystemEncoding();
-	char const * c = bindtextdomain(PACKAGE, locale_dir.c_str());
-	int e = errno;
-	if (e) {
-		LYXERR(Debug::DEBUG, BOOST_CURRENT_FUNCTION << '\n'
-			<< "Error code: " << errno << '\n'
-			<< "Directory : " << package().locale_dir().absFilename() << '\n'
-			<< "Rtn value : " << c);
+	// this loops at most twice
+	while (true) {
+		if (package().messages_file(code).isReadableFile())
+			return code;
+		if (contains(code, '_'))
+			code = token(code, '_', 0);
+		else
+			break;
 	}
+	return string();
+}
+}
 
-	if (!bind_textdomain_codeset(PACKAGE, ucs4_codeset)) {
-		LYXERR(Debug::DEBUG, BOOST_CURRENT_FUNCTION << '\n'
-			<< "Error code: " << errno << '\n'
-			<< "Codeset   : " << ucs4_codeset << '\n');
-	}
 
-	textdomain(PACKAGE);
+bool Messages::available(string const & c)
+{
+	return !realCode(c).empty();
 }
 
 
-docstring const Messages::get(string const & m) const
+string Messages::language() const
 {
-	if (m.empty())
-		return docstring();
+	return realCode(lang_);
+}
 
-	// Look for the translated string in the cache.
-	TranslationCache::iterator it = cache_.find(m);
-	if (it != cache_.end())
-		return it->second;
+namespace {
 
-	// The string was not found, use gettext to generate it
-
-	string const oldLANGUAGE = getEnv("LANGUAGE");
-	string const oldLC_ALL = getEnv("LC_ALL");
-	if (!lang_.empty()) {
-		// This GNU extension overrides any language locale
-		// wrt gettext.
-		setEnv("LANGUAGE", lang_);
-		// However, setting LANGUAGE does nothing when the
-		// locale is "C". Therefore we set the locale to
-		// something that is believed to exist on most
-		// systems. The idea is that one should be able to
-		// load German documents even without having de_DE
-		// installed.
-		setEnv("LC_ALL", "en_US");
-#ifdef HAVE_LC_MESSAGES
-		setlocale(LC_MESSAGES, "");
-#endif
+void swapInt(uint32_t & number)
+{
+	unsigned char * num_ar = reinterpret_cast<unsigned char *>(&number);
+	swap(num_ar[0], num_ar[3]);
+	swap(num_ar[1], num_ar[2]);
+}
+
+
+struct MoHeader
+{
+	// magic number = 0x950412de
+	uint32_t magic;
+	// file format revision = 0
+	uint32_t rev;
+	// number of strings
+	uint32_t N;
+	// offset of table with original strings
+	uint32_t O;
+	// offset of table with translation strings
+	uint32_t T;
+	// there is a hash table afterwards, but we ignore it
+
+	// Change the endianness of header data
+	void swapEnd();
+};
+
+
+void MoHeader::swapEnd()
+{
+	swapInt(magic);
+	swapInt(rev);
+	swapInt(N);
+	swapInt(O);
+	swapInt(T);
+}
+
+struct StringTable
+{
+	// string length
+	uint32_t length;
+	// string offset
+	uint32_t offset;
+
+	// Change the endianness of string stable data
+	void swapEnd();
+};
+
+
+void StringTable::swapEnd()
+{
+	swapInt(length);
+	swapInt(offset);
+}
+
+
+} // namespace anon
+
+bool Messages::readMoFile()
+{
+	// FIXME:remove
+	if (lang_.empty()) {
+		LYXERR0("No language given, nothing to load.");
+		return false;
 	}
 
-	char const * m_c = m.c_str();
-	char const * trans_c = gettext(m_c);
-	docstring trans;
-	if (!trans_c)
-		LYXERR0("Undefined result from gettext");
-	else if (trans_c == m_c) {
-		LYXERR(Debug::DEBUG, "Same as entered returned");
-		trans = from_ascii(m);
-	} else {
-		LYXERR(Debug::DEBUG, "We got a translation");
-		// m is actually not a char const * but ucs4 data
-		trans = reinterpret_cast<char_type const *>(trans_c);
+	string const code = realCode(lang_);
+	if (code.empty()) {
+		LYXERR(Debug::LOCALE, "Cannot find translation for language " << lang_);
+		return false;
 	}
 
-	cleanTranslation(trans);
+	string const filen = package().messages_file(code).toSafeFilesystemEncoding();
 
-	// Reset environment variables as they were.
-	if (!lang_.empty()) {
-		// Reset everything as it was.
-		setEnv("LANGUAGE", oldLANGUAGE);
-		setEnv("LC_ALL", oldLC_ALL);
-#ifdef HAVE_LC_MESSAGES
-		setlocale(LC_MESSAGES, "");
-#endif
+	// get file size
+	struct stat buf;
+	if (stat(filen.c_str(), &buf)) {
+		LYXERR0("Cannot get information for file " << filen);
+		return false;
 	}
 
-	std::pair<TranslationCache::iterator, bool> result =
-		cache_.insert(std::make_pair(m, trans));
+	vector<char> moData(buf.st_size);
 
-	BOOST_ASSERT(result.second);
+	ifstream is(filen.c_str(), ios::in | ios::binary);
+	if (!is.read(&moData[0], buf.st_size)) {
+		LYXERR0("Cannot read file " << filen);
+		return false;
+	}
 
-	return result.first->second;
-}
+	MoHeader * header = reinterpret_cast<MoHeader *>(&moData[0]);
 
-} // namespace lyx
+	bool doSwap = false;
+	if (header->magic == 0xde120495) {
+		header->swapEnd();
+		doSwap = true;
+	}
 
-#else // ENABLE_NLS
-// This is the dummy variant.
+	if (header->magic != 0x950412de) {
+		LYXERR0("Wrong magic number for file " << filen
+			<< ".\nExpected 0x950412de, got 0x" << std::hex << header->magic << std::dec);
+		return false;
+	}
 
-namespace lyx {
+	StringTable * orig = reinterpret_cast<StringTable *>(&moData[0] + header->O);
+	StringTable * trans = reinterpret_cast<StringTable *>(&moData[0] + header->T);
+	// First the header
+	if (doSwap) {
+		// Handle endiannness change
+		orig[0].swapEnd();
+		trans[0].swapEnd();
+	}
+	string const info = string(&moData[0] + trans[0].offset, trans[0].length);
+	size_t pos = info.find("charset=");
+	if (pos != string::npos) {
+		pos += 8;
+		string charset;
+		size_t pos2 = info.find("\n", pos);
+		if (pos2 == string::npos)
+			charset = info.substr(pos);
+		else
+			charset = info.substr(pos, pos2 - pos);
+		charset = ascii_lowercase(trim(charset));
+		if (charset != "utf-8") {
+			LYXERR0("Wrong encoding " << charset << " for file " << filen);
+			return false;
+		}
+	} else {
+		LYXERR0("Cannot find encoding encoding for file " << filen);
+		return false;
+	}
 
-Messages::Messages(string const & l) {}
+	for (size_t i = 1; i < header->N; ++i) {
+		if (doSwap) {
+			// Handle endiannness change
+			orig[i].swapEnd();
+			trans[i].swapEnd();
+		}
+		// Note that in theory the strings may contain NUL characters.
+		// This may be the case with plural forms
+		string const ostr(&moData[0] + orig[i].offset, orig[i].length);
+		docstring tstr = from_utf8(string(&moData[0] + trans[i].offset,
+						  trans[i].length));
+		cleanTranslation(tstr);
+		trans_map_[ostr] = tstr;
+		//lyxerr << ostr << " ==> " << tstr << endl;
+	}
 
-void Messages::init()
-{
+	return true;
 }
 
-
 docstring const Messages::get(string const & m) const
 {
-	docstring trans = from_ascii(m);
-	cleanTranslation(trans);
-	return trans;
+	if (m.empty())
+		return docstring();
+
+	TranslationMap::const_iterator it = trans_map_.find(m);
+	if (it != trans_map_.end())
+		return it->second;
+	else {
+		docstring res = from_utf8(m);
+		cleanTranslation(res);
+		return res;
+	}
 }
 
 } // namespace lyx
 
-#endif
-
-#if 0
-
--#include <locale>
+#else // ENABLE_NLS
+// This is the dummy variant.
 
 namespace lyx {
 
-// This version of the Pimpl utilizes the message capability of
-// libstdc++ that is distributed with GNU G++.
-class Messages::Pimpl {
-public:
-	typedef std::messages<char>::catalog catalog;
-
-	Pimpl(string const & l)
-		: lang_(l),
-		  loc_gl(lang_.c_str()),
-		  mssg_gl(std::use_facet<std::messages<char> >(loc_gl))
-	{
-		//LYXERR("Messages: language(" << l << ") in dir(" << dir << ")");
+std::string Messages::gui_lang_;
 
-		string const locale_dir = package().locale_dir().toFilesystemEncoding();
-		cat_gl = mssg_gl.open(PACKAGE, loc_gl, locale_dir.c_str());
+Messages::Messages(string const & /* l */) {}
 
-	}
+docstring const Messages::get(string const & m) const
+{
+	docstring trans = from_ascii(m);
+	cleanTranslation(trans);
+	return trans;
+}
 
-	~Pimpl()
-	{
-		mssg_gl.close(cat_gl);
-	}
+std::string Messages::language() const
+{
+	return string();
+}
 
-	docstring const get(string const & msg) const
-	{
-		return mssg_gl.get(cat_gl, 0, 0, msg);
-	}
-private:
-	///
-	string lang_;
-	///
-	std::locale loc_gl;
-	///
-	std::messages<char> const & mssg_gl;
-	///
-	catalog cat_gl;
-};
+bool Messages::available(string const & /* c */)
+{
+	return false;
+}
 
 } // namespace lyx