]> git.lyx.org Git - lyx.git/blobdiff - src/support/docstream.C
* src/encoding.C (latexChar,read):
[lyx.git] / src / support / docstream.C
index 4347d6ba5b345b8520ece846e475cc6288d23d09..ac9a8ed23050ac38e91e9c28787dda125151e798 100644 (file)
 
 
 using lyx::ucs4_codeset;
-using lyx::ucs2_codeset;
 
 using std::string;
 
 
 namespace {
 
-char const * utf8_codeset = "UTF-8";
-
 // We use C IO throughout this file, because the facets might be used with
 // lyxerr in the future.
 
@@ -44,7 +41,7 @@ public:
        explicit iconv_codecvt_facet(string const & encoding = "UTF-8",
                        std::ios_base::openmode inout = std::ios_base::in | std::ios_base::out,
                        size_t refs = 0)
-               : base(refs), utf8_(encoding == "UTF-8")
+               : base(refs), encoding_(encoding)
        {
                if (inout & std::ios_base::in) {
                        in_cd_ = iconv_open(ucs4_codeset, encoding.c_str());
@@ -92,8 +89,38 @@ protected:
                size_t outbytesleft = (to_end - to) * sizeof(extern_type);
                from_next = from;
                to_next = to;
-               return do_iconv(out_cd_, reinterpret_cast<char const **>(&from_next),
+               result const retval = do_iconv(out_cd_,
+                               reinterpret_cast<char const **>(&from_next),
                                &inbytesleft, &to_next, &outbytesleft);
+               if (retval == base::error) {
+                       fprintf(stderr,
+                               "Error %d returned from iconv when converting from %s to %s: %s\n",
+                               errno, ucs4_codeset, encoding_.c_str(),
+                               strerror(errno));
+                       fputs("Converted input:", stderr);
+                       for (intern_type const * i = from; i < from_next; ++i) {
+                               unsigned int const c = *i;
+                               fprintf(stderr, " 0x%04x", c);
+                       }
+                       unsigned int const c = *from_next;
+                       fprintf(stderr, "\nStopped at: 0x%04x\n", c);
+                       fputs("Unconverted input:", stderr);
+                       for (intern_type const * i = from_next + 1; i < from_end; ++i) {
+                               unsigned int const c = *i;
+                               fprintf(stderr, " 0x%04x", c);
+                       }
+                       fputs("\nConverted output:", stderr);
+                       for (extern_type const * i = to; i < to_next; ++i) {
+                               // extern_type may be signed, avoid output of
+                               // something like 0xffffffc2
+                               unsigned int const c =
+                                       *reinterpret_cast<unsigned char const *>(i);
+                               fprintf(stderr, " 0x%02x", c);
+                       }
+                       fputc('\n', stderr);
+                       fflush(stderr);
+               }
+               return retval;
        }
        virtual result do_unshift(state_type &, extern_type * to,
                        extern_type *, extern_type *& to_next) const
@@ -112,9 +139,40 @@ protected:
                size_t outbytesleft = (to_end - to) * sizeof(intern_type);
                from_next = from;
                to_next = to;
-               return do_iconv(in_cd_, &from_next, &inbytesleft,
+               result const retval = do_iconv(in_cd_, &from_next, &inbytesleft,
                                reinterpret_cast<char **>(&to_next),
                                &outbytesleft);
+               if (retval == base::error) {
+                       fprintf(stderr,
+                               "Error %d returned from iconv when converting from %s to %s: %s\n",
+                               errno, encoding_.c_str(), ucs4_codeset,
+                               strerror(errno));
+                       fputs("Converted input:", stderr);
+                       for (extern_type const * i = from; i < from_next; ++i) {
+                               // extern_type may be signed, avoid output of
+                               // something like 0xffffffc2
+                               unsigned int const c =
+                                       *reinterpret_cast<unsigned char const *>(i);
+                               fprintf(stderr, " 0x%02x", c);
+                       }
+                       unsigned int const c =
+                               *reinterpret_cast<unsigned char const *>(from_next);
+                       fprintf(stderr, "\nStopped at: 0x%02x\n", c);
+                       fputs("Unconverted input:", stderr);
+                       for (extern_type const * i = from_next + 1; i < from_end; ++i) {
+                               unsigned int const c =
+                                       *reinterpret_cast<unsigned char const *>(i);
+                               fprintf(stderr, " 0x%02x", c);
+                       }
+                       fputs("\nConverted output:", stderr);
+                       for (intern_type const * i = to; i < to_next; ++i) {
+                               unsigned int const c = *i;
+                               fprintf(stderr, " 0x%02x", c);
+                       }
+                       fputc('\n', stderr);
+                       fflush(stderr);
+               }
+               return retval;
        }
        virtual int do_encoding() const throw()
        {
@@ -154,7 +212,7 @@ protected:
                // RFC 3629.
                // All other encodings encode one UCS4 code point in one byte
                // (and can therefore only encode a subset of UCS4)
-               return utf8_ ? 4 : 1;
+               return encoding_ == "UTF-8" ? 4 : 1;
        }
 private:
        /// Do the actual conversion. The interface is equivalent to that of
@@ -162,7 +220,7 @@ private:
        inline base::result do_iconv(iconv_t cd, char const ** from,
                        size_t * inbytesleft, char ** to, size_t * outbytesleft) const
        {
-               char const * to_start = *to;
+               char const * const to_start = *to;
                size_t converted = iconv(cd, const_cast<char ICONV_CONST **>(from),
                                inbytesleft, to, outbytesleft);
                if (converted == (size_t)(-1)) {
@@ -172,9 +230,6 @@ private:
                                return base::partial;
                        case EILSEQ:
                        default:
-                               fprintf(stderr, "Error %d returned from iconv: %s\n",
-                                       errno, strerror(errno));
-                               fflush(stderr);
                                return base::error;
                        }
                }
@@ -184,8 +239,8 @@ private:
        }
        iconv_t in_cd_;
        iconv_t out_cd_;
-       /// Is the narrow encoding UTF8?
-       bool utf8_;
+       /// The narrow encoding
+       std::string encoding_;
 };
 
 } // namespace anon
@@ -200,20 +255,21 @@ const char * iconv_codecvt_facet_exception::what() const throw()
 }
 
 
-idocfstream::idocfstream() : base()
+idocfstream::idocfstream(string const & encoding) : base()
 {
        std::locale global;
-       std::locale locale(global, new iconv_codecvt_facet(utf8_codeset, in));
+       std::locale locale(global, new iconv_codecvt_facet(encoding, in));
        imbue(locale);
 }
 
        
-idocfstream::idocfstream(const char* s, std::ios_base::openmode mode)
+idocfstream::idocfstream(const char* s, std::ios_base::openmode mode,
+                         string const & encoding)
        : base()
 {
        // We must imbue the stream before openening the file
        std::locale global;
-       std::locale locale(global, new iconv_codecvt_facet(utf8_codeset, in));
+       std::locale locale(global, new iconv_codecvt_facet(encoding, in));
        imbue(locale);
        open(s, mode);
 }
@@ -238,6 +294,32 @@ odocfstream::odocfstream(const char* s, std::ios_base::openmode mode,
        open(s, mode);
 }
 
+
+SetEnc setEncoding(string const & encoding)
+{
+       return SetEnc(encoding);
+}
+
+
+odocstream & operator<<(odocstream & os, SetEnc e)
+{
+       if (std::has_facet<iconv_codecvt_facet>(os.rdbuf()->getloc())) {
+               // This stream must be a file stream, since we never imbue
+               // any other stream with a locale having a iconv_codecvt_facet.
+               // Flush the stream so that all pending output is written
+               // with the old encoding.
+               os.flush();
+               std::locale locale(os.rdbuf()->getloc(),
+                       new iconv_codecvt_facet(e.encoding, std::ios_base::out));
+               // FIXME Does changing the codecvt facet of an open file
+               // stream always work? It does with gcc 4.1, but I have read
+               // somewhere that it does not with MSVC.
+               // What does the standard say?
+               os.imbue(locale);
+       }
+       return os;
+}
+
 }
 
 #if (!defined(HAVE_WCHAR_T) || SIZEOF_WCHAR_T != 4) && defined(__GNUC__)