X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fsupport%2Fdocstream.cpp;h=f6548e139fccccbeb50286a6c156d2781aa7cf02;hb=59e4d16ab9611732dbc208f23d3de4b9da321dce;hp=53d403b584cc727d2c42a6a308cdfaec3edee9da;hpb=07c0a6e496b847cc23d8a5fb72ef0650d173c49a;p=lyx.git diff --git a/src/support/docstream.cpp b/src/support/docstream.cpp index 53d403b584..f6548e139f 100644 --- a/src/support/docstream.cpp +++ b/src/support/docstream.cpp @@ -11,8 +11,10 @@ #include #include "support/docstream.h" +#include "support/lstrings.h" #include "support/unicode.h" +#include #include #include #include @@ -23,6 +25,12 @@ using namespace std; using lyx::ucs4_codeset; + +#if defined(_MSC_VER) && (_MSC_VER >= 1600) +std::locale::id numpunct::id; +#endif + + namespace { // We use C IO throughout this file, because the facets might be used with @@ -86,32 +94,47 @@ protected: { #define WORKAROUND_ICONV_BUG 1 #if WORKAROUND_ICONV_BUG - // Due to a bug in some iconv versions, when the last char in - // the buffer is a wide char, it gets truncated (see bugs 5216, - // 5280, and also 5489). As a workaround, we append a null - // char and then remove it from output after the conversion. + // Due to a bug in some iconv versions, when the last char + // in the buffer is a wide char and the output encoding is + // ISO-2022-JP and we are going to switch to another encoding, + // the appropriate escape sequence for changing the character + // set is not output (see bugs 5216, 5280, and also 5489). + // As a workaround, we append a nul char in order to force + // a switch to ASCII, and then remove it from output after + // the conversion. intern_type * from_new = 0; - if (*(from_end - 1) >= 0x80) { + intern_type const * from_old = from; + size_t extra = 0; + if (*(from_end - 1) >= 0x80 && encoding_ == "ISO-2022-JP") { size_t len = from_end - from; from_new = new intern_type[len + 1]; memcpy(from_new, from, len * sizeof(intern_type)); from_new[len] = 0; from_end = from_new + len + 1; from = from_new; + extra = 1; } #endif size_t inbytesleft = (from_end - from) * sizeof(intern_type); size_t outbytesleft = (to_end - to) * sizeof(extern_type); +#if WORKAROUND_ICONV_BUG + outbytesleft += extra * sizeof(extern_type); +#endif from_next = from; to_next = to; result const retval = do_iconv(out_cd_, reinterpret_cast(&from_next), &inbytesleft, &to_next, &outbytesleft); #if WORKAROUND_ICONV_BUG - // Remove from output the null char that we inserted at the end + // Remove from output the nul char that we inserted at the end // of the input buffer in order to circumvent an iconv bug. - if (from_new) + if (from_new) { --to_next; + --from_next; + from_next = from_old + (from_next - from); + from = from_old; + delete[] from_new; + } #endif if (retval == base::error) { fprintf(stderr, @@ -141,9 +164,6 @@ protected: fputc('\n', stderr); fflush(stderr); } -#if WORKAROUND_ICONV_BUG - delete[] from_new; -#endif return retval; } virtual result do_unshift(state_type &, extern_type * to, @@ -230,32 +250,7 @@ protected: } virtual int do_max_length() const throw() { - // FIXME: this information should be transferred to lib/encodings - // UTF8 uses at most 4 bytes to represent one UCS4 code point - // (see RFC 3629). RFC 2279 specifies 6 bytes, but that - // information is outdated, and RFC 2279 has been superseded by - // RFC 3629. - // The CJK encodings use (different) multibyte representation as well. - // All other encodings encode one UCS4 code point in one byte - // (and can therefore only encode a subset of UCS4) - // Note that BIG5 and SJIS do not work with LaTeX (see lib/encodings). - // Furthermore, all encodings that use shifting (like SJIS) do not work with - // iconv_codecvt_facet. - if (encoding_ == "UTF-8" || - encoding_ == "GB" || - encoding_ == "EUC-TW") - return 4; - else if (encoding_ == "EUC-JP" || - encoding_ == "ISO-2022-JP") - return 3; - else if (encoding_ == "BIG5" || - encoding_ == "EUC-KR" || - encoding_ == "EUC-CN" || - encoding_ == "SJIS" || - encoding_ == "GBK") - return 2; - else - return 1; + return lyx::max_encoded_bytes(encoding_); } private: /// Do the actual conversion. The interface is equivalent to that of @@ -268,6 +263,14 @@ private: inbytesleft, to, outbytesleft); if (converted == (size_t)(-1)) { switch(errno) { + case 0: + // As strange as it may seem, this + // does happen on windows when parsing + // comments with accented chars in + // tex2lyx. See the following thread + // for details + // http://thread.gmane.org/gmane.editors.lyx.devel/117636 + break; case EINVAL: case E2BIG: return base::partial; @@ -307,9 +310,9 @@ const char * iconv_codecvt_facet_exception::what() const throw() } -ifdocstream::ifdocstream(string const & encoding) : base() +ifdocstream::ifdocstream() : base() { - setEncoding(*this, encoding, in); + setEncoding(*this, "UTF-8", in); } @@ -402,6 +405,7 @@ odocstream & operator<<(odocstream & os, char c) } + #if ! defined(USE_WCHAR_T) && defined(__GNUC__) // We get undefined references to these virtual methods. This looks like // a bug in gcc. The implementation here does not do anything useful, since @@ -447,17 +451,6 @@ bool codecvt::do_always_noconv() const throw() return true; } -#if __GNUC__ == 3 && __GNUC_MINOR__ < 4 - -template<> -int codecvt::do_length( - mbstate_t const &, const char *, const char *, size_t) const -{ - return 1; -} - -#else - template<> int codecvt::do_length( mbstate_t &, const char *, const char *, size_t) const @@ -465,8 +458,6 @@ int codecvt::do_length( return 1; } -#endif - template<> int codecvt::do_max_length() const throw() {