X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Fsupport%2Funicode.cpp;h=b48f10609f7fe9315e257575aec432b9d8fcb1dc;hb=f3ff5e083a6e89861db5fce9eea4532504f8341d;hp=8e003437e3be72ae4b71aa61eab03e3904640af8;hpb=c26050ac83a516fb0a7b0a94f3a1cfc3212b0260;p=lyx.git diff --git a/src/support/unicode.cpp b/src/support/unicode.cpp index 8e003437e3..b48f10609f 100644 --- a/src/support/unicode.cpp +++ b/src/support/unicode.cpp @@ -3,7 +3,7 @@ * This file is part of LyX, the document processor. * Licence details can be found in the file COPYING. * - * \author Lars Gullik Bjønnes + * \author Lars Gullik Bjønnes * * Full author contact details are available in file CREDITS. * @@ -14,6 +14,7 @@ #include "support/unicode.h" #include "support/debug.h" +#include "support/mutex.h" #include @@ -21,10 +22,11 @@ #include #include -#include #include +#include #include + using namespace std; namespace { @@ -64,6 +66,8 @@ struct IconvProcessor::Impl iconv_t cd; string tocode_; string fromcode_; + + Mutex mutex_; // iconv() is not thread save, see #7240 }; @@ -120,6 +124,8 @@ bool IconvProcessor::init() int IconvProcessor::convert(char const * buf, size_t buflen, char * outbuf, size_t maxoutsize) { + Mutex::Locker lock(&pimpl_->mutex_); + if (buflen == 0) return 0; @@ -197,6 +203,18 @@ int IconvProcessor::convert(char const * buf, size_t buflen, } +std::string IconvProcessor::from() const +{ + return pimpl_->fromcode_; +} + + +std::string IconvProcessor::to() const +{ + return pimpl_->tocode_; +} + + namespace { @@ -210,17 +228,21 @@ iconv_convert(IconvProcessor & processor, InType const * buf, size_t buflen) char const * inbuf = reinterpret_cast(buf); size_t inbytesleft = buflen * sizeof(InType); - size_t const outsize = 32768; - static char out[outsize]; - char * outbuf = out; + static std::vector outbuf(32768); + // The number of UCS4 code points in buf is at most inbytesleft. + // The output encoding will use at most + // max_encoded_bytes(pimpl_->tocode_) per UCS4 code point. + size_t maxoutbufsize = max_encoded_bytes(processor.to()) * inbytesleft; + if (outbuf.size() < maxoutbufsize) + outbuf.resize(maxoutbufsize); - int bytes = processor.convert(inbuf, inbytesleft, outbuf, outsize); + int bytes = processor.convert(inbuf, inbytesleft, &outbuf[0], outbuf.size()); if (bytes <= 0) // Conversion failed // FIXME Maybe throw an exception and handle that in the caller? return vector(); - RetType const * tmp = reinterpret_cast(out); + RetType const * tmp = reinterpret_cast(&outbuf[0]); return vector(tmp, tmp + bytes / sizeof(RetType)); } @@ -345,4 +367,35 @@ void ucs4_to_multibytes(char_type ucs4, vector & out, out.clear(); } +int max_encoded_bytes(std::string const & encoding) +{ + // FIXME: this information should be transferred to lib/encodings + // UTF8 uses at most 4 bytes to represent one UCS4 code point + // (see RFC 3629). RFC 2279 specifies 6 bytes, but that + // information is outdated, and RFC 2279 has been superseded by + // RFC 3629. + // The CJK encodings use (different) multibyte representation as well. + // All other encodings encode one UCS4 code point in one byte + // (and can therefore only encode a subset of UCS4) + // Note that BIG5 and SJIS do not work with LaTeX (see lib/encodings). + // Furthermore, all encodings that use shifting (like SJIS) do not work with + // iconv_codecvt_facet. + if (encoding == "UTF-8" || + encoding == "GB" || + encoding == "EUC-TW") + return 4; + else if (encoding == "EUC-JP") + return 3; + else if (encoding == "ISO-2022-JP") + return 8; + else if (encoding == "BIG5" || + encoding == "EUC-KR" || + encoding == "EUC-CN" || + encoding == "SJIS" || + encoding == "GBK") + return 2; + else + return 1; +} + } // namespace lyx