X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Fsupport%2Fdocstream.C;h=cec49561563f1af43fbc43c7effea12e01c9b883;hb=6bfb3e6dc47ae7caa0565607e52a34104ea4ac19;hp=a2bb3c3172ef9c97c6b88daa9bd16cb6825e472b;hpb=6c300f72a217722652dc27db9108e1050028979c;p=lyx.git diff --git a/src/support/docstream.C b/src/support/docstream.C index a2bb3c3172..cec4956156 100644 --- a/src/support/docstream.C +++ b/src/support/docstream.C @@ -20,62 +20,52 @@ using lyx::ucs4_codeset; -using lyx::ucs2_codeset; +using std::string; -namespace { -char const * utf8_codeset = "UTF-8"; +namespace { // We use C IO throughout this file, because the facets might be used with // lyxerr in the future. -class utf8_codecvt_facet_exception : public std::exception { -public: - virtual ~utf8_codecvt_facet_exception() throw() {} - virtual const char* what() const throw() - { - return "iconv problem in utf8_codecvt_facet initialization"; - } -}; - - /// codecvt facet for conversion of UCS4 (internal representation) to UTF8 /// (external representation) or vice versa -class utf8_codecvt_facet : public std::codecvt +class iconv_codecvt_facet : public std::codecvt { typedef std::codecvt base; public: /// Constructor. You have to specify with \p inout whether you want /// to use this facet only for input, only for output or for both. - explicit utf8_codecvt_facet(std::ios_base::openmode inout = std::ios_base::in | std::ios_base::out, + explicit iconv_codecvt_facet(string const & encoding = "UTF-8", + std::ios_base::openmode inout = std::ios_base::in | std::ios_base::out, size_t refs = 0) - : base(refs) + : base(refs), utf8_(encoding == "UTF-8") { if (inout & std::ios_base::in) { - in_cd_ = iconv_open(ucs4_codeset, utf8_codeset); + in_cd_ = iconv_open(ucs4_codeset, encoding.c_str()); if (in_cd_ == (iconv_t)(-1)) { fprintf(stderr, "Error %d returned from iconv_open(in_cd_): %s\n", errno, strerror(errno)); fflush(stderr); - throw utf8_codecvt_facet_exception(); + throw lyx::iconv_codecvt_facet_exception(); } } else in_cd_ = (iconv_t)(-1); if (inout & std::ios_base::out) { - out_cd_ = iconv_open(utf8_codeset, ucs4_codeset); + out_cd_ = iconv_open(encoding.c_str(), ucs4_codeset); if (out_cd_ == (iconv_t)(-1)) { fprintf(stderr, "Error %d returned from iconv_open(out_cd_): %s\n", errno, strerror(errno)); fflush(stderr); - throw utf8_codecvt_facet_exception(); + throw lyx::iconv_codecvt_facet_exception(); } } else out_cd_ = (iconv_t)(-1); } protected: - virtual ~utf8_codecvt_facet() + virtual ~iconv_codecvt_facet() { if (in_cd_ != (iconv_t)(-1)) if (iconv_close(in_cd_) == -1) { @@ -155,8 +145,13 @@ protected: } virtual int do_max_length() const throw() { - // UTF8 uses at most 6 bytes to represent one code point - return 6; + // UTF8 uses at most 4 bytes to represent one UCS4 code point + // (see RFC 3629). RFC 2279 specifies 6 bytes, but that + // information is outdated, and RFC 2279 has been superseded by + // RFC 3629. + // All other encodings encode one UCS4 code point in one byte + // (and can therefore only encode a subset of UCS4) + return utf8_ ? 4 : 1; } private: /// Do the actual conversion. The interface is equivalent to that of @@ -186,6 +181,8 @@ private: } iconv_t in_cd_; iconv_t out_cd_; + /// Is the narrow encoding UTF8? + bool utf8_; }; } // namespace anon @@ -194,39 +191,47 @@ private: namespace lyx { -idocfstream::idocfstream() : base() +const char * iconv_codecvt_facet_exception::what() const throw() +{ + return "iconv problem in iconv_codecvt_facet initialization"; +} + + +idocfstream::idocfstream(string const & encoding) : base() { std::locale global; - std::locale locale(global, new utf8_codecvt_facet(in)); + std::locale locale(global, new iconv_codecvt_facet(encoding, in)); imbue(locale); } -idocfstream::idocfstream(const char* s, std::ios_base::openmode mode) +idocfstream::idocfstream(const char* s, std::ios_base::openmode mode, + string const & encoding) : base() { // We must imbue the stream before openening the file std::locale global; - std::locale locale(global, new utf8_codecvt_facet(in)); + std::locale locale(global, new iconv_codecvt_facet(encoding, in)); imbue(locale); open(s, mode); } -odocfstream::odocfstream() : base() +odocfstream::odocfstream(string const & encoding) : base() { std::locale global; - std::locale locale(global, new utf8_codecvt_facet(out)); + std::locale locale(global, new iconv_codecvt_facet(encoding, out)); imbue(locale); } - -odocfstream::odocfstream(const char* s, std::ios_base::openmode mode) + +odocfstream::odocfstream(const char* s, std::ios_base::openmode mode, + string const & encoding) : base() { // We must imbue the stream before openening the file std::locale global; - std::locale locale(global, new utf8_codecvt_facet(out)); + std::locale locale(global, new iconv_codecvt_facet(encoding, out)); imbue(locale); open(s, mode); } @@ -236,7 +241,7 @@ odocfstream::odocfstream(const char* s, std::ios_base::openmode mode) #if (!defined(HAVE_WCHAR_T) || SIZEOF_WCHAR_T != 4) && defined(__GNUC__) // We get undefined references to these virtual methods. This looks like // a bug in gcc. The implementation here does not do anything useful, since -// it is overriden in utf8_codecvt_facet. +// it is overriden in iconv_codecvt_facet. namespace std { template<> codecvt::result codecvt::do_out(mbstate_t &, const lyx::char_type *, const lyx::char_type *, const lyx::char_type *&,