#include <config.h>
#include "docstream.h"
+#include "unicode.h"
#include <cerrno>
#include <cstdio>
#include <iconv.h>
#include <locale>
-namespace {
-#ifdef WORDS_BIGENDIAN
-char const * ucs4_codeset = "UCS-4BE";
-#else
-char const * ucs4_codeset = "UCS-4LE";
-#endif
-char const * utf8_codeset = "UTF-8";
+using lyx::ucs4_codeset;
-// We use C IO throughout this file, because the facets might be used with
-// lyxerr in the future.
+using std::string;
-class utf8_codecvt_facet_exception : public std::exception {
-public:
- virtual ~utf8_codecvt_facet_exception() throw() {}
- virtual const char* what() const throw()
- {
- return "iconv problem in utf8_codecvt_facet initialization";
- }
-};
+namespace {
+
+// We use C IO throughout this file, because the facets might be used with
+// lyxerr in the future.
/// codecvt facet for conversion of UCS4 (internal representation) to UTF8
/// (external representation) or vice versa
-class utf8_codecvt_facet : public std::codecvt<lyx::char_type, char, std::mbstate_t>
+class iconv_codecvt_facet : public std::codecvt<lyx::char_type, char, std::mbstate_t>
{
typedef std::codecvt<lyx::char_type, char, std::mbstate_t> base;
public:
/// Constructor. You have to specify with \p inout whether you want
/// to use this facet only for input, only for output or for both.
- explicit utf8_codecvt_facet(std::ios_base::openmode inout = std::ios_base::in | std::ios_base::out,
+ explicit iconv_codecvt_facet(string const & encoding = "UTF-8",
+ std::ios_base::openmode inout = std::ios_base::in | std::ios_base::out,
size_t refs = 0)
- : base(refs)
+ : base(refs), utf8_(encoding == "UTF-8")
{
if (inout & std::ios_base::in) {
- in_cd_ = iconv_open(ucs4_codeset, utf8_codeset);
+ in_cd_ = iconv_open(ucs4_codeset, encoding.c_str());
if (in_cd_ == (iconv_t)(-1)) {
fprintf(stderr, "Error %d returned from iconv_open(in_cd_): %s\n",
errno, strerror(errno));
fflush(stderr);
- throw utf8_codecvt_facet_exception();
+ throw lyx::iconv_codecvt_facet_exception();
}
} else
in_cd_ = (iconv_t)(-1);
if (inout & std::ios_base::out) {
- out_cd_ = iconv_open(utf8_codeset, ucs4_codeset);
+ out_cd_ = iconv_open(encoding.c_str(), ucs4_codeset);
if (out_cd_ == (iconv_t)(-1)) {
fprintf(stderr, "Error %d returned from iconv_open(out_cd_): %s\n",
errno, strerror(errno));
fflush(stderr);
- throw utf8_codecvt_facet_exception();
+ throw lyx::iconv_codecvt_facet_exception();
}
} else
out_cd_ = (iconv_t)(-1);
}
protected:
- virtual ~utf8_codecvt_facet()
+ virtual ~iconv_codecvt_facet()
{
if (in_cd_ != (iconv_t)(-1))
if (iconv_close(in_cd_) == -1) {
}
virtual int do_max_length() const throw()
{
- // UTF8 uses at most 6 bytes to represent one code point
- return 6;
+ // UTF8 uses at most 4 bytes to represent one UCS4 code point
+ // (see RFC 3629). RFC 2279 specifies 6 bytes, but that
+ // information is outdated, and RFC 2279 has been superseded by
+ // RFC 3629.
+ // All other encodings encode one UCS4 code point in one byte
+ // (and can therefore only encode a subset of UCS4)
+ return utf8_ ? 4 : 1;
}
private:
/// Do the actual conversion. The interface is equivalent to that of
}
iconv_t in_cd_;
iconv_t out_cd_;
+ /// Is the narrow encoding UTF8?
+ bool utf8_;
};
-}
+} // namespace anon
namespace lyx {
-idocfstream::idocfstream() : base()
+const char * iconv_codecvt_facet_exception::what() const throw()
+{
+ return "iconv problem in iconv_codecvt_facet initialization";
+}
+
+
+idocfstream::idocfstream(string const & encoding) : base()
{
std::locale global;
- std::locale locale(global, new utf8_codecvt_facet(in));
+ std::locale locale(global, new iconv_codecvt_facet(encoding, in));
imbue(locale);
}
-idocfstream::idocfstream(const char* s, std::ios_base::openmode mode)
+idocfstream::idocfstream(const char* s, std::ios_base::openmode mode,
+ string const & encoding)
: base()
{
// We must imbue the stream before openening the file
std::locale global;
- std::locale locale(global, new utf8_codecvt_facet(in));
+ std::locale locale(global, new iconv_codecvt_facet(encoding, in));
imbue(locale);
open(s, mode);
}
-odocfstream::odocfstream() : base()
+odocfstream::odocfstream(string const & encoding) : base()
{
std::locale global;
- std::locale locale(global, new utf8_codecvt_facet(out));
+ std::locale locale(global, new iconv_codecvt_facet(encoding, out));
imbue(locale);
}
-
-odocfstream::odocfstream(const char* s, std::ios_base::openmode mode)
+
+odocfstream::odocfstream(const char* s, std::ios_base::openmode mode,
+ string const & encoding)
: base()
{
// We must imbue the stream before openening the file
std::locale global;
- std::locale locale(global, new utf8_codecvt_facet(out));
+ std::locale locale(global, new iconv_codecvt_facet(encoding, out));
imbue(locale);
open(s, mode);
}
}
+
+#if (!defined(HAVE_WCHAR_T) || SIZEOF_WCHAR_T != 4) && defined(__GNUC__)
+// We get undefined references to these virtual methods. This looks like
+// a bug in gcc. The implementation here does not do anything useful, since
+// it is overriden in iconv_codecvt_facet.
+namespace std {
+template<> codecvt<lyx::char_type, char, mbstate_t>::result
+codecvt<lyx::char_type, char, mbstate_t>::do_out(mbstate_t &, const lyx::char_type *, const lyx::char_type *, const lyx::char_type *&,
+ char *, char *, char *&) const { return error; }
+template<> codecvt<lyx::char_type, char, mbstate_t>::result
+codecvt<lyx::char_type, char, mbstate_t>::do_unshift(mbstate_t &, char *, char *, char *&) const { return error; }
+template<> codecvt<lyx::char_type, char, mbstate_t>::result
+codecvt<lyx::char_type, char, mbstate_t>::do_in(mbstate_t &, const char *, const char *, const char *&,
+ lyx::char_type *, lyx::char_type *, lyx::char_type *&) const { return error; }
+template<> int codecvt<lyx::char_type, char, mbstate_t>::do_encoding() const throw() { return 0; }
+template<> bool codecvt<lyx::char_type, char, mbstate_t>::do_always_noconv() const throw() { return true; }
+#if __GNUC__ == 3 && __GNUC_MINOR__ < 4
+template<> int codecvt<lyx::char_type, char, mbstate_t>::do_length(mbstate_t const &, const char *, const char *, size_t) const { return 1; }
+#else
+template<> int codecvt<lyx::char_type, char, mbstate_t>::do_length(mbstate_t &, const char *, const char *, size_t) const { return 1; }
+#endif
+template<> int codecvt<lyx::char_type, char, mbstate_t>::do_max_length() const throw() { return 4; }
+}
+#endif