X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Fsupport%2Funicode.cpp;h=95415a5538e45ad11a4ed4695c5b277ae79024c3;hb=12554c93d81f75f87c34040fd7737048d3518d6d;hp=39090769d2993b261445c3d71adec1417cb9ad2b;hpb=f1cba8ff64b369792fd49f5ddf90e8126ab476ac;p=lyx.git diff --git a/src/support/unicode.cpp b/src/support/unicode.cpp index 39090769d2..95415a5538 100644 --- a/src/support/unicode.cpp +++ b/src/support/unicode.cpp @@ -14,17 +14,19 @@ #include "support/unicode.h" #include "support/debug.h" +#include "support/mutex.h" #include #include #include -#include #include #include +//Needed in MSVC #include + using namespace std; namespace { @@ -64,6 +66,8 @@ struct IconvProcessor::Impl iconv_t cd; string tocode_; string fromcode_; + + Mutex mutex_; // iconv() is not thread save, see #7240 }; @@ -120,6 +124,8 @@ bool IconvProcessor::init() int IconvProcessor::convert(char const * buf, size_t buflen, char * outbuf, size_t maxoutsize) { + Mutex::Locker lock(&pimpl_->mutex_); + if (buflen == 0) return 0; @@ -197,6 +203,18 @@ int IconvProcessor::convert(char const * buf, size_t buflen, } +std::string IconvProcessor::from() const +{ + return pimpl_->fromcode_; +} + + +std::string IconvProcessor::to() const +{ + return pimpl_->tocode_; +} + + namespace { @@ -210,17 +228,21 @@ iconv_convert(IconvProcessor & processor, InType const * buf, size_t buflen) char const * inbuf = reinterpret_cast(buf); size_t inbytesleft = buflen * sizeof(InType); - size_t const outsize = 32768; - static char out[outsize]; - char * outbuf = out; + static std::vector outbuf(32768); + // The number of UCS4 code points in buf is at most inbytesleft. + // The output encoding will use at most + // max_encoded_bytes(pimpl_->tocode_) per UCS4 code point. + size_t maxoutbufsize = max_encoded_bytes(processor.to()) * inbytesleft; + if (outbuf.size() < maxoutbufsize) + outbuf.resize(maxoutbufsize); - int bytes = processor.convert(inbuf, inbytesleft, outbuf, outsize); + int bytes = processor.convert(inbuf, inbytesleft, &outbuf[0], outbuf.size()); if (bytes <= 0) // Conversion failed // FIXME Maybe throw an exception and handle that in the caller? return vector(); - RetType const * tmp = reinterpret_cast(out); + RetType const * tmp = reinterpret_cast(&outbuf[0]); return vector(tmp, tmp + bytes / sizeof(RetType)); } @@ -345,4 +367,34 @@ void ucs4_to_multibytes(char_type ucs4, vector & out, out.clear(); } +int max_encoded_bytes(std::string const & encoding) +{ + // FIXME: this information should be transferred to lib/encodings + // UTF8 uses at most 4 bytes to represent one UCS4 code point + // (see RFC 3629). RFC 2279 specifies 6 bytes, but that + // information is outdated, and RFC 2279 has been superseded by + // RFC 3629. + // The CJK encodings use (different) multibyte representation as well. + // All other encodings encode one UCS4 code point in one byte + // (and can therefore only encode a subset of UCS4) + // Furthermore, all encodings that use shifting (like SJIS) do not work with + // iconv_codecvt_facet. + if (encoding == "UTF-8" || + encoding == "GB" || + encoding == "EUC-TW") + return 4; + else if (encoding == "EUC-JP") + return 3; + else if (encoding == "ISO-2022-JP") + return 8; + else if (encoding == "BIG5" || + encoding == "EUC-KR" || + encoding == "EUC-CN" || + encoding == "SJIS" || + encoding == "GBK") + return 2; + else + return 1; +} + } // namespace lyx