using lyx::ucs4_codeset;
-using lyx::ucs2_codeset;
using std::string;
namespace {
-char const * utf8_codeset = "UTF-8";
-
// We use C IO throughout this file, because the facets might be used with
// lyxerr in the future.
explicit iconv_codecvt_facet(string const & encoding = "UTF-8",
std::ios_base::openmode inout = std::ios_base::in | std::ios_base::out,
size_t refs = 0)
- : base(refs), utf8_(encoding == "UTF-8")
+ : base(refs), encoding_(encoding)
{
if (inout & std::ios_base::in) {
in_cd_ = iconv_open(ucs4_codeset, encoding.c_str());
size_t outbytesleft = (to_end - to) * sizeof(extern_type);
from_next = from;
to_next = to;
- return do_iconv(out_cd_, reinterpret_cast<char const **>(&from_next),
+ result const retval = do_iconv(out_cd_,
+ reinterpret_cast<char const **>(&from_next),
&inbytesleft, &to_next, &outbytesleft);
+ if (retval == base::error) {
+ fprintf(stderr,
+ "Error %d returned from iconv when converting from %s to %s: %s\n",
+ errno, ucs4_codeset, encoding_.c_str(),
+ strerror(errno));
+ fputs("Converted input:", stderr);
+ for (intern_type const * i = from; i < from_next; ++i) {
+ unsigned int const c = *i;
+ fprintf(stderr, " 0x%04x", c);
+ }
+ unsigned int const c = *from_next;
+ fprintf(stderr, "\nStopped at: 0x%04x\n", c);
+ fputs("Unconverted input:", stderr);
+ for (intern_type const * i = from_next + 1; i < from_end; ++i) {
+ unsigned int const c = *i;
+ fprintf(stderr, " 0x%04x", c);
+ }
+ fputs("\nConverted output:", stderr);
+ for (extern_type const * i = to; i < to_next; ++i) {
+ // extern_type may be signed, avoid output of
+ // something like 0xffffffc2
+ unsigned int const c =
+ *reinterpret_cast<unsigned char const *>(i);
+ fprintf(stderr, " 0x%02x", c);
+ }
+ fputc('\n', stderr);
+ fflush(stderr);
+ }
+ return retval;
}
virtual result do_unshift(state_type &, extern_type * to,
extern_type *, extern_type *& to_next) const
size_t outbytesleft = (to_end - to) * sizeof(intern_type);
from_next = from;
to_next = to;
- return do_iconv(in_cd_, &from_next, &inbytesleft,
+ result const retval = do_iconv(in_cd_, &from_next, &inbytesleft,
reinterpret_cast<char **>(&to_next),
&outbytesleft);
+ if (retval == base::error) {
+ fprintf(stderr,
+ "Error %d returned from iconv when converting from %s to %s: %s\n",
+ errno, encoding_.c_str(), ucs4_codeset,
+ strerror(errno));
+ fputs("Converted input:", stderr);
+ for (extern_type const * i = from; i < from_next; ++i) {
+ // extern_type may be signed, avoid output of
+ // something like 0xffffffc2
+ unsigned int const c =
+ *reinterpret_cast<unsigned char const *>(i);
+ fprintf(stderr, " 0x%02x", c);
+ }
+ unsigned int const c =
+ *reinterpret_cast<unsigned char const *>(from_next);
+ fprintf(stderr, "\nStopped at: 0x%02x\n", c);
+ fputs("Unconverted input:", stderr);
+ for (extern_type const * i = from_next + 1; i < from_end; ++i) {
+ unsigned int const c =
+ *reinterpret_cast<unsigned char const *>(i);
+ fprintf(stderr, " 0x%02x", c);
+ }
+ fputs("\nConverted output:", stderr);
+ for (intern_type const * i = to; i < to_next; ++i) {
+ unsigned int const c = *i;
+ fprintf(stderr, " 0x%02x", c);
+ }
+ fputc('\n', stderr);
+ fflush(stderr);
+ }
+ return retval;
}
virtual int do_encoding() const throw()
{
// RFC 3629.
// All other encodings encode one UCS4 code point in one byte
// (and can therefore only encode a subset of UCS4)
- return utf8_ ? 4 : 1;
+ return encoding_ == "UTF-8" ? 4 : 1;
}
private:
/// Do the actual conversion. The interface is equivalent to that of
inline base::result do_iconv(iconv_t cd, char const ** from,
size_t * inbytesleft, char ** to, size_t * outbytesleft) const
{
- char const * to_start = *to;
+ char const * const to_start = *to;
size_t converted = iconv(cd, const_cast<char ICONV_CONST **>(from),
inbytesleft, to, outbytesleft);
if (converted == (size_t)(-1)) {
return base::partial;
case EILSEQ:
default:
- fprintf(stderr, "Error %d returned from iconv: %s\n",
- errno, strerror(errno));
- fflush(stderr);
return base::error;
}
}
}
iconv_t in_cd_;
iconv_t out_cd_;
- /// Is the narrow encoding UTF8?
- bool utf8_;
+ /// The narrow encoding
+ std::string encoding_;
};
} // namespace anon
}
-idocfstream::idocfstream() : base()
+idocfstream::idocfstream(string const & encoding) : base()
{
std::locale global;
- std::locale locale(global, new iconv_codecvt_facet(utf8_codeset, in));
+ std::locale locale(global, new iconv_codecvt_facet(encoding, in));
imbue(locale);
}
-idocfstream::idocfstream(const char* s, std::ios_base::openmode mode)
+idocfstream::idocfstream(const char* s, std::ios_base::openmode mode,
+ string const & encoding)
: base()
{
// We must imbue the stream before openening the file
std::locale global;
- std::locale locale(global, new iconv_codecvt_facet(utf8_codeset, in));
+ std::locale locale(global, new iconv_codecvt_facet(encoding, in));
imbue(locale);
open(s, mode);
}
open(s, mode);
}
+
+SetEnc setEncoding(string const & encoding)
+{
+ return SetEnc(encoding);
+}
+
+
+odocstream & operator<<(odocstream & os, SetEnc e)
+{
+ if (std::has_facet<iconv_codecvt_facet>(os.rdbuf()->getloc())) {
+ // This stream must be a file stream, since we never imbue
+ // any other stream with a locale having a iconv_codecvt_facet.
+ // Flush the stream so that all pending output is written
+ // with the old encoding.
+ os.flush();
+ std::locale locale(os.rdbuf()->getloc(),
+ new iconv_codecvt_facet(e.encoding, std::ios_base::out));
+ // FIXME Does changing the codecvt facet of an open file
+ // stream always work? It does with gcc 4.1, but I have read
+ // somewhere that it does not with MSVC.
+ // What does the standard say?
+ os.imbue(locale);
+ }
+ return os;
+}
+
}
#if (!defined(HAVE_WCHAR_T) || SIZEOF_WCHAR_T != 4) && defined(__GNUC__)