* src/encoding.C (latexChar,read):

[lyx.git] / src / support / docstream.C
diff --git a/src/support/docstream.C b/src/support/docstream.C

index 4347d6ba5b345b8520ece846e475cc6288d23d09..ac9a8ed23050ac38e91e9c28787dda125151e798 100644 (file)
--- a/src/support/docstream.C
+++ b/src/support/docstream.C
@@ -20,15 +20,12 @@
  
  
  using lyx::ucs4_codeset;
-using lyx::ucs2_codeset;
  
  using std::string;
  
  
  namespace {
  
-char const * utf8_codeset = "UTF-8";
-
  // We use C IO throughout this file, because the facets might be used with
  // lyxerr in the future.
  
@@ -44,7 +41,7 @@ public:
         explicit iconv_codecvt_facet(string const & encoding = "UTF-8",
                         std::ios_base::openmode inout = std::ios_base::in | std::ios_base::out,
                         size_t refs = 0)
-               : base(refs), utf8_(encoding == "UTF-8")
+               : base(refs), encoding_(encoding)
         {
                 if (inout & std::ios_base::in) {
                         in_cd_ = iconv_open(ucs4_codeset, encoding.c_str());
@@ -92,8 +89,38 @@ protected:
                 size_t outbytesleft = (to_end - to) * sizeof(extern_type);
                 from_next = from;
                 to_next = to;
-               return do_iconv(out_cd_, reinterpret_cast<char const **>(&from_next),
+               result const retval = do_iconv(out_cd_,
+                               reinterpret_cast<char const **>(&from_next),
                                 &inbytesleft, &to_next, &outbytesleft);
+               if (retval == base::error) {
+                       fprintf(stderr,
+                               "Error %d returned from iconv when converting from %s to %s: %s\n",
+                               errno, ucs4_codeset, encoding_.c_str(),
+                               strerror(errno));
+                       fputs("Converted input:", stderr);
+                       for (intern_type const * i = from; i < from_next; ++i) {
+                               unsigned int const c = *i;
+                               fprintf(stderr, " 0x%04x", c);
+                       }
+                       unsigned int const c = *from_next;
+                       fprintf(stderr, "\nStopped at: 0x%04x\n", c);
+                       fputs("Unconverted input:", stderr);
+                       for (intern_type const * i = from_next + 1; i < from_end; ++i) {
+                               unsigned int const c = *i;
+                               fprintf(stderr, " 0x%04x", c);
+                       }
+                       fputs("\nConverted output:", stderr);
+                       for (extern_type const * i = to; i < to_next; ++i) {
+                               // extern_type may be signed, avoid output of
+                               // something like 0xffffffc2
+                               unsigned int const c =
+                                       *reinterpret_cast<unsigned char const *>(i);
+                               fprintf(stderr, " 0x%02x", c);
+                       }
+                       fputc('\n', stderr);
+                       fflush(stderr);
+               }
+               return retval;
         }
         virtual result do_unshift(state_type &, extern_type * to,
                         extern_type *, extern_type *& to_next) const
@@ -112,9 +139,40 @@ protected:
                 size_t outbytesleft = (to_end - to) * sizeof(intern_type);
                 from_next = from;
                 to_next = to;
-               return do_iconv(in_cd_, &from_next, &inbytesleft,
+               result const retval = do_iconv(in_cd_, &from_next, &inbytesleft,
                                 reinterpret_cast<char **>(&to_next),
                                 &outbytesleft);
+               if (retval == base::error) {
+                       fprintf(stderr,
+                               "Error %d returned from iconv when converting from %s to %s: %s\n",
+                               errno, encoding_.c_str(), ucs4_codeset,
+                               strerror(errno));
+                       fputs("Converted input:", stderr);
+                       for (extern_type const * i = from; i < from_next; ++i) {
+                               // extern_type may be signed, avoid output of
+                               // something like 0xffffffc2
+                               unsigned int const c =
+                                       *reinterpret_cast<unsigned char const *>(i);
+                               fprintf(stderr, " 0x%02x", c);
+                       }
+                       unsigned int const c =
+                               *reinterpret_cast<unsigned char const *>(from_next);
+                       fprintf(stderr, "\nStopped at: 0x%02x\n", c);
+                       fputs("Unconverted input:", stderr);
+                       for (extern_type const * i = from_next + 1; i < from_end; ++i) {
+                               unsigned int const c =
+                                       *reinterpret_cast<unsigned char const *>(i);
+                               fprintf(stderr, " 0x%02x", c);
+                       }
+                       fputs("\nConverted output:", stderr);
+                       for (intern_type const * i = to; i < to_next; ++i) {
+                               unsigned int const c = *i;
+                               fprintf(stderr, " 0x%02x", c);
+                       }
+                       fputc('\n', stderr);
+                       fflush(stderr);
+               }
+               return retval;
         }
         virtual int do_encoding() const throw()
         {
@@ -154,7 +212,7 @@ protected:
                 // RFC 3629.
                 // All other encodings encode one UCS4 code point in one byte
                 // (and can therefore only encode a subset of UCS4)
-               return utf8_ ? 4 : 1;
+               return encoding_ == "UTF-8" ? 4 : 1;
         }
  private:
         /// Do the actual conversion. The interface is equivalent to that of
@@ -162,7 +220,7 @@ private:
         inline base::result do_iconv(iconv_t cd, char const ** from,
                         size_t * inbytesleft, char ** to, size_t * outbytesleft) const
         {
-               char const * to_start = *to;
+               char const * const to_start = *to;
                 size_t converted = iconv(cd, const_cast<char ICONV_CONST **>(from),
                                 inbytesleft, to, outbytesleft);
                 if (converted == (size_t)(-1)) {
@@ -172,9 +230,6 @@ private:
                                 return base::partial;
                         case EILSEQ:
                         default:
-                               fprintf(stderr, "Error %d returned from iconv: %s\n",
-                                       errno, strerror(errno));
-                               fflush(stderr);
                                 return base::error;
                         }
                 }
@@ -184,8 +239,8 @@ private:
         }
         iconv_t in_cd_;
         iconv_t out_cd_;
-       /// Is the narrow encoding UTF8?
-       bool utf8_;
+       /// The narrow encoding
+       std::string encoding_;
  };
  
  } // namespace anon
@@ -200,20 +255,21 @@ const char * iconv_codecvt_facet_exception::what() const throw()
  }
  
  
-idocfstream::idocfstream() : base()
+idocfstream::idocfstream(string const & encoding) : base()
  {
         std::locale global;
-       std::locale locale(global, new iconv_codecvt_facet(utf8_codeset, in));
+       std::locale locale(global, new iconv_codecvt_facet(encoding, in));
         imbue(locale);
  }
  
         
-idocfstream::idocfstream(const char* s, std::ios_base::openmode mode)
+idocfstream::idocfstream(const char* s, std::ios_base::openmode mode,
+                         string const & encoding)
         : base()
  {
         // We must imbue the stream before openening the file
         std::locale global;
-       std::locale locale(global, new iconv_codecvt_facet(utf8_codeset, in));
+       std::locale locale(global, new iconv_codecvt_facet(encoding, in));
         imbue(locale);
         open(s, mode);
  }
@@ -238,6 +294,32 @@ odocfstream::odocfstream(const char* s, std::ios_base::openmode mode,
         open(s, mode);
  }
  
+
+SetEnc setEncoding(string const & encoding)
+{
+       return SetEnc(encoding);
+}
+
+
+odocstream & operator<<(odocstream & os, SetEnc e)
+{
+       if (std::has_facet<iconv_codecvt_facet>(os.rdbuf()->getloc())) {
+               // This stream must be a file stream, since we never imbue
+               // any other stream with a locale having a iconv_codecvt_facet.
+               // Flush the stream so that all pending output is written
+               // with the old encoding.
+               os.flush();
+               std::locale locale(os.rdbuf()->getloc(),
+                       new iconv_codecvt_facet(e.encoding, std::ios_base::out));
+               // FIXME Does changing the codecvt facet of an open file
+               // stream always work? It does with gcc 4.1, but I have read
+               // somewhere that it does not with MSVC.
+               // What does the standard say?
+               os.imbue(locale);
+       }
+       return os;
+}
+
  }
  
  #if (!defined(HAVE_WCHAR_T) || SIZEOF_WCHAR_T != 4) && defined(__GNUC__)