X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Fsupport%2Funicode.cpp;h=187d018f2bc03da066e011483d6c3b0e21cff8cf;hb=aef8746712ccc64f1f10073fe6d011ff1d7eb4a4;hp=26c6a239d759d5a355712ccccadb6afa0839092c;hpb=9b9a3cf0c001d25c07056f37b3ccfd8025eefbcf;p=lyx.git diff --git a/src/support/unicode.cpp b/src/support/unicode.cpp index 26c6a239d7..187d018f2b 100644 --- a/src/support/unicode.cpp +++ b/src/support/unicode.cpp @@ -15,16 +15,19 @@ #include "support/unicode.h" #include "support/debug.h" +#include + #include #include #include -#include #include #include +//Needed in MSVC #include + using namespace std; namespace { @@ -46,98 +49,66 @@ namespace lyx { char const * ucs4_codeset = "UCS-4LE"; #endif -static const iconv_t invalid_cd = (iconv_t)(-1); - -struct IconvProcessor::Impl -{ - Impl(string const & to, string const & from) - : cd(invalid_cd), tocode_(to), fromcode_(from) - {} - - ~Impl() - { - if (cd != invalid_cd && iconv_close(cd) == -1) - LYXERR0("Error returned from iconv_close(" << errno << ")"); +struct IconvProcessor::Handler { + // assumes cd is valid + Handler(iconv_t const cd) : cd(cd) {} + ~Handler() { + if (iconv_close(cd) == -1) + LYXERR0("Error returned from iconv_close(" << errno << ')'); } - - iconv_t cd; - string tocode_; - string fromcode_; + iconv_t const cd; }; -IconvProcessor::IconvProcessor(char const * tocode, char const * fromcode) - : pimpl_(new IconvProcessor::Impl(tocode, fromcode)) -{ -} - - -IconvProcessor::IconvProcessor(IconvProcessor const & other) - : pimpl_(new IconvProcessor::Impl(other.pimpl_->tocode_, other.pimpl_->fromcode_)) -{ -} - - -IconvProcessor::~IconvProcessor() -{ - delete pimpl_; -} - - -void IconvProcessor::operator=(IconvProcessor const & other) -{ - if (&other != this) - pimpl_ = new Impl(other.pimpl_->tocode_, other.pimpl_->fromcode_); -} +IconvProcessor::IconvProcessor(string tocode, string fromcode) + : tocode_(tocode), fromcode_(fromcode) +{} bool IconvProcessor::init() { - if (pimpl_->cd != invalid_cd) + if (h_) return true; - - pimpl_->cd = iconv_open(pimpl_->tocode_.c_str(), pimpl_->fromcode_.c_str()); - if (pimpl_->cd != invalid_cd) + iconv_t cd = iconv_open(tocode_.c_str(), fromcode_.c_str()); + if (cd != (iconv_t)(-1)) { + h_ = make_unique(cd); return true; - + } lyxerr << "Error returned from iconv_open" << endl; switch (errno) { - case EINVAL: - lyxerr << "EINVAL The conversion from " << pimpl_->fromcode_ - << " to " << pimpl_->tocode_ - << " is not supported by the implementation." - << endl; - break; - default: - lyxerr << "\tSome other error: " << errno << endl; - break; + case EINVAL: + lyxerr << "EINVAL The conversion from " << fromcode_ << " to " + << tocode_ << " is not supported by the implementation." + << endl; + break; + default: + lyxerr << "\tSome other error: " << errno << endl; + break; } return false; } int IconvProcessor::convert(char const * buf, size_t buflen, - char * outbuf, size_t maxoutsize) + char * outbuf, size_t maxoutsize) { if (buflen == 0) return 0; - if (pimpl_->cd == invalid_cd) { - if (!init()) - return -1; - } + if (!h_ && !init()) + return -1; char ICONV_CONST * inbuf = const_cast(buf); size_t inbytesleft = buflen; size_t outbytesleft = maxoutsize; - int res = iconv(pimpl_->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + int res = iconv(h_->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); // flush out remaining data. This is needed because iconv sometimes // holds back chars in the stream, waiting for a combination character // (see e.g. http://sources.redhat.com/bugzilla/show_bug.cgi?id=1124) - iconv(pimpl_->cd, NULL, NULL, &outbuf, &outbytesleft); + iconv(h_->cd, NULL, NULL, &outbuf, &outbytesleft); //lyxerr << dec; //lyxerr << "Inbytesleft: " << inbytesleft << endl; @@ -156,8 +127,8 @@ int IconvProcessor::convert(char const * buf, size_t buflen, case EILSEQ: lyxerr << "EILSEQ An invalid multibyte sequence" << " has been encountered in the input.\n" - << "When converting from " << pimpl_->fromcode_ - << " to " << pimpl_->tocode_ << ".\n"; + << "When converting from " << fromcode_ + << " to " << tocode_ << ".\n"; lyxerr << "Input:" << hex; for (size_t i = 0; i < buflen; ++i) { // char may be signed, avoid output of @@ -171,8 +142,8 @@ int IconvProcessor::convert(char const * buf, size_t buflen, case EINVAL: lyxerr << "EINVAL An incomplete multibyte sequence" << " has been encountered in the input.\n" - << "When converting from " << pimpl_->fromcode_ - << " to " << pimpl_->tocode_ << ".\n"; + << "When converting from " << fromcode_ + << " to " << tocode_ << ".\n"; lyxerr << "Input:" << hex; for (size_t i = 0; i < buflen; ++i) { // char may be signed, avoid output of @@ -188,27 +159,11 @@ int IconvProcessor::convert(char const * buf, size_t buflen, break; } // We got an error so we close down the conversion engine - if (iconv_close(pimpl_->cd) == -1) { - lyxerr << "Error returned from iconv_close(" - << errno << ")" << endl; - } - pimpl_->cd = invalid_cd; + h_.reset(); return -1; } -std::string IconvProcessor::from() const -{ - return pimpl_->fromcode_; -} - - -std::string IconvProcessor::to() const -{ - return pimpl_->tocode_; -} - - namespace { @@ -222,7 +177,10 @@ iconv_convert(IconvProcessor & processor, InType const * buf, size_t buflen) char const * inbuf = reinterpret_cast(buf); size_t inbytesleft = buflen * sizeof(InType); - static std::vector outbuf(32768); + static QThreadStorage *> static_outbuf; + if (!static_outbuf.hasLocalData()) + static_outbuf.setLocalData(new std::vector(32768)); + std::vector & outbuf = *static_outbuf.localData(); // The number of UCS4 code points in buf is at most inbytesleft. // The output encoding will use at most // max_encoded_bytes(pimpl_->tocode_) per UCS4 code point. @@ -243,6 +201,15 @@ iconv_convert(IconvProcessor & processor, InType const * buf, size_t buflen) } // anon namespace +IconvProcessor & utf8ToUcs4() +{ + static QThreadStorage processor; + if (!processor.hasLocalData()) + processor.setLocalData(new IconvProcessor(ucs4_codeset, "UTF-8")); + return *processor.localData(); +} + + vector utf8_to_ucs4(vector const & utf8str) { if (utf8str.empty()) @@ -255,32 +222,60 @@ vector utf8_to_ucs4(vector const & utf8str) vector utf8_to_ucs4(char const * utf8str, size_t ls) { - static IconvProcessor processor(ucs4_codeset, "UTF-8"); - return iconv_convert(processor, utf8str, ls); + return iconv_convert(utf8ToUcs4(), utf8str, ls); } vector utf16_to_ucs4(unsigned short const * s, size_t ls) { - static IconvProcessor processor(ucs4_codeset, utf16_codeset); - return iconv_convert(processor, s, ls); + static QThreadStorage processor; + if (!processor.hasLocalData()) + processor.setLocalData(new IconvProcessor(ucs4_codeset, utf16_codeset)); + return iconv_convert(*processor.localData(), s, ls); } vector ucs4_to_utf16(char_type const * s, size_t ls) { - static IconvProcessor processor(utf16_codeset, ucs4_codeset); - return iconv_convert(processor, s, ls); + static QThreadStorage processor; + if (!processor.hasLocalData()) + processor.setLocalData(new IconvProcessor(utf16_codeset, ucs4_codeset)); + return iconv_convert(*processor.localData(), s, ls); +} + + +IconvProcessor & ucs4ToUtf8() +{ + static QThreadStorage processor; + if (!processor.hasLocalData()) + processor.setLocalData(new IconvProcessor("UTF-8", ucs4_codeset)); + return *processor.localData(); } +namespace { + +IconvProcessor & getProc(map & processors, + string const & encoding, bool to) +{ + string const & fromcode = to ? ucs4_codeset : encoding; + string const & tocode = to ? encoding : ucs4_codeset; + map::iterator const it = processors.find(encoding); + if (it == processors.end()) { + IconvProcessor p(fromcode, tocode); + return processors.insert(make_pair(encoding, move(p))).first->second; + } else + return it->second; +} + +} //anon namespace + vector ucs4_to_utf8(char_type c) { - static IconvProcessor processor("UTF-8", ucs4_codeset); - return iconv_convert(processor, &c, 1); + return iconv_convert(ucs4ToUtf8(), &c, 1); } @@ -297,46 +292,49 @@ ucs4_to_utf8(vector const & ucs4str) vector ucs4_to_utf8(char_type const * ucs4str, size_t ls) { - static IconvProcessor processor("UTF-8", ucs4_codeset); - return iconv_convert(processor, ucs4str, ls); + return iconv_convert(ucs4ToUtf8(), ucs4str, ls); } vector eightbit_to_ucs4(char const * s, size_t ls, string const & encoding) { - static map processors; - if (processors.find(encoding) == processors.end()) { - IconvProcessor processor(ucs4_codeset, encoding.c_str()); - processors.insert(make_pair(encoding, processor)); - } - return iconv_convert(processors[encoding], s, ls); + static QThreadStorage *> static_processors; + if (!static_processors.hasLocalData()) + static_processors.setLocalData(new map); + map & processors = *static_processors.localData(); + IconvProcessor & processor = getProc(processors, encoding, true); + return iconv_convert(processor, s, ls); } +namespace { + +map & ucs4To8bitProcessors() +{ + static QThreadStorage *> processors; + if (!processors.hasLocalData()) + processors.setLocalData(new map); + return *processors.localData(); +} + +} + vector ucs4_to_eightbit(char_type const * ucs4str, size_t ls, string const & encoding) { - static map processors; - if (processors.find(encoding) == processors.end()) { - IconvProcessor processor(encoding.c_str(), ucs4_codeset); - processors.insert(make_pair(encoding, processor)); - } - return iconv_convert(processors[encoding], ucs4str, ls); + map & processors(ucs4To8bitProcessors()); + IconvProcessor & processor = getProc(processors, encoding, false); + return iconv_convert(processor, ucs4str, ls); } char ucs4_to_eightbit(char_type ucs4, string const & encoding) { - static map processors; - map::iterator it = processors.find(encoding); - if (it == processors.end()) { - IconvProcessor processor(encoding.c_str(), ucs4_codeset); - it = processors.insert(make_pair(encoding, processor)).first; - } - + map & processors(ucs4To8bitProcessors()); + IconvProcessor & processor = getProc(processors, encoding, false); char out; - int const bytes = it->second.convert((char *)(&ucs4), 4, &out, 1); + int const bytes = processor.convert((char *)(&ucs4), 4, &out, 1); if (bytes > 0) return out; return 0; @@ -346,15 +344,13 @@ char ucs4_to_eightbit(char_type ucs4, string const & encoding) void ucs4_to_multibytes(char_type ucs4, vector & out, string const & encoding) { - static map processors; - map::iterator it = processors.find(encoding); - if (it == processors.end()) { - IconvProcessor processor(encoding.c_str(), ucs4_codeset); - it = processors.insert(make_pair(encoding, processor)).first; - } - + static QThreadStorage *> static_processors; + if (!static_processors.hasLocalData()) + static_processors.setLocalData(new map); + map & processors = *static_processors.localData(); + IconvProcessor & processor = getProc(processors, encoding, false); out.resize(4); - int bytes = it->second.convert((char *)(&ucs4), 4, &out[0], 4); + int bytes = processor.convert((char *)(&ucs4), 4, &out[0], 4); if (bytes > 0) out.resize(bytes); else @@ -371,7 +367,6 @@ int max_encoded_bytes(std::string const & encoding) // The CJK encodings use (different) multibyte representation as well. // All other encodings encode one UCS4 code point in one byte // (and can therefore only encode a subset of UCS4) - // Note that BIG5 and SJIS do not work with LaTeX (see lib/encodings). // Furthermore, all encodings that use shifting (like SJIS) do not work with // iconv_codecvt_facet. if (encoding == "UTF-8" ||