From 6a30211fef7beda0a91966b42ecfa4631953b776 Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Thu, 10 Oct 2013 21:20:44 +0200 Subject: [PATCH] Make encoding conversions thread safe This is the result of the discussion on the list "2.1.0 Blocker". Thanks to all contributors! The main idea is to use thread-local storage for all static variables. This solution does not need any mutex. For more details, see the comment in unicode.h. --- src/support/docstring.cpp | 8 ---- src/support/unicode.cpp | 78 +++++++++++++++++++++++++++++---------- src/support/unicode.h | 29 +++++++++++++++ 3 files changed, 87 insertions(+), 28 deletions(-) diff --git a/src/support/docstring.cpp b/src/support/docstring.cpp index 727e1f594c..f44259d349 100644 --- a/src/support/docstring.cpp +++ b/src/support/docstring.cpp @@ -69,14 +69,6 @@ string const to_ascii(docstring const & ucs4) } -IconvProcessor & utf8ToUcs4() -{ - static IconvProcessor iconv(ucs4_codeset, "UTF-8"); - return iconv; -} - - - void utf8_to_ucs4(string const & utf8, docstring & ucs4) { size_t n = utf8.size(); diff --git a/src/support/unicode.cpp b/src/support/unicode.cpp index 95415a5538..343b8def2b 100644 --- a/src/support/unicode.cpp +++ b/src/support/unicode.cpp @@ -14,7 +14,8 @@ #include "support/unicode.h" #include "support/debug.h" -#include "support/mutex.h" + +#include #include @@ -66,8 +67,6 @@ struct IconvProcessor::Impl iconv_t cd; string tocode_; string fromcode_; - - Mutex mutex_; // iconv() is not thread save, see #7240 }; @@ -124,8 +123,6 @@ bool IconvProcessor::init() int IconvProcessor::convert(char const * buf, size_t buflen, char * outbuf, size_t maxoutsize) { - Mutex::Locker lock(&pimpl_->mutex_); - if (buflen == 0) return 0; @@ -228,7 +225,10 @@ iconv_convert(IconvProcessor & processor, InType const * buf, size_t buflen) char const * inbuf = reinterpret_cast(buf); size_t inbytesleft = buflen * sizeof(InType); - static std::vector outbuf(32768); + static QThreadStorage *> static_outbuf; + if (!static_outbuf.hasLocalData()) + static_outbuf.setLocalData(new std::vector(32768)); + std::vector & outbuf = *static_outbuf.localData(); // The number of UCS4 code points in buf is at most inbytesleft. // The output encoding will use at most // max_encoded_bytes(pimpl_->tocode_) per UCS4 code point. @@ -249,6 +249,15 @@ iconv_convert(IconvProcessor & processor, InType const * buf, size_t buflen) } // anon namespace +IconvProcessor & utf8ToUcs4() +{ + static QThreadStorage processor; + if (!processor.hasLocalData()) + processor.setLocalData(new IconvProcessor(ucs4_codeset, "UTF-8")); + return *processor.localData(); +} + + vector utf8_to_ucs4(vector const & utf8str) { if (utf8str.empty()) @@ -261,32 +270,43 @@ vector utf8_to_ucs4(vector const & utf8str) vector utf8_to_ucs4(char const * utf8str, size_t ls) { - static IconvProcessor processor(ucs4_codeset, "UTF-8"); - return iconv_convert(processor, utf8str, ls); + return iconv_convert(utf8ToUcs4(), utf8str, ls); } vector utf16_to_ucs4(unsigned short const * s, size_t ls) { - static IconvProcessor processor(ucs4_codeset, utf16_codeset); - return iconv_convert(processor, s, ls); + static QThreadStorage processor; + if (!processor.hasLocalData()) + processor.setLocalData(new IconvProcessor(ucs4_codeset, utf16_codeset)); + return iconv_convert(*processor.localData(), s, ls); } vector ucs4_to_utf16(char_type const * s, size_t ls) { - static IconvProcessor processor(utf16_codeset, ucs4_codeset); - return iconv_convert(processor, s, ls); + static QThreadStorage processor; + if (!processor.hasLocalData()) + processor.setLocalData(new IconvProcessor(utf16_codeset, ucs4_codeset)); + return iconv_convert(*processor.localData(), s, ls); +} + + +IconvProcessor & ucs4ToUtf8() +{ + static QThreadStorage processor; + if (!processor.hasLocalData()) + processor.setLocalData(new IconvProcessor("UTF-8", ucs4_codeset)); + return *processor.localData(); } vector ucs4_to_utf8(char_type c) { - static IconvProcessor processor("UTF-8", ucs4_codeset); - return iconv_convert(processor, &c, 1); + return iconv_convert(ucs4ToUtf8(), &c, 1); } @@ -303,15 +323,17 @@ ucs4_to_utf8(vector const & ucs4str) vector ucs4_to_utf8(char_type const * ucs4str, size_t ls) { - static IconvProcessor processor("UTF-8", ucs4_codeset); - return iconv_convert(processor, ucs4str, ls); + return iconv_convert(ucs4ToUtf8(), ucs4str, ls); } vector eightbit_to_ucs4(char const * s, size_t ls, string const & encoding) { - static map processors; + static QThreadStorage *> static_processors; + if (!static_processors.hasLocalData()) + static_processors.setLocalData(new map); + map & processors = *static_processors.localData(); if (processors.find(encoding) == processors.end()) { IconvProcessor processor(ucs4_codeset, encoding.c_str()); processors.insert(make_pair(encoding, processor)); @@ -320,10 +342,23 @@ eightbit_to_ucs4(char const * s, size_t ls, string const & encoding) } +namespace { + +map & ucs4To8bitProcessors() +{ + static QThreadStorage *> processors; + if (!processors.hasLocalData()) + processors.setLocalData(new map); + return *processors.localData(); +} + +} + + vector ucs4_to_eightbit(char_type const * ucs4str, size_t ls, string const & encoding) { - static map processors; + map & processors(ucs4To8bitProcessors()); if (processors.find(encoding) == processors.end()) { IconvProcessor processor(encoding.c_str(), ucs4_codeset); processors.insert(make_pair(encoding, processor)); @@ -334,7 +369,7 @@ ucs4_to_eightbit(char_type const * ucs4str, size_t ls, string const & encoding) char ucs4_to_eightbit(char_type ucs4, string const & encoding) { - static map processors; + map & processors(ucs4To8bitProcessors()); map::iterator it = processors.find(encoding); if (it == processors.end()) { IconvProcessor processor(encoding.c_str(), ucs4_codeset); @@ -352,7 +387,10 @@ char ucs4_to_eightbit(char_type ucs4, string const & encoding) void ucs4_to_multibytes(char_type ucs4, vector & out, string const & encoding) { - static map processors; + static QThreadStorage *> static_processors; + if (!static_processors.hasLocalData()) + static_processors.setLocalData(new map); + map & processors = *static_processors.localData(); map::iterator it = processors.find(encoding); if (it == processors.end()) { IconvProcessor processor(encoding.c_str(), ucs4_codeset); diff --git a/src/support/unicode.h b/src/support/unicode.h index 6e83180c10..ddcd6c8fd0 100644 --- a/src/support/unicode.h +++ b/src/support/unicode.h @@ -21,6 +21,27 @@ namespace lyx { +/** + * Wrapper for iconv(3). + * + * According to the POSIX standard, all specified functions are thread-safe, + * with some exceptions. The iconv() function is not listed as an exception: + * http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xsh_chap02.html#tag_22_02_09_09 + * http://man7.org/linux/man-pages/man7/pthreads.7.html + * + * Therefore, you can use as many instances of this class in parallel as you + * like. However, you need to ensure that each instance is only used by one + * thread at any given time. If this condition is not met you get nasty + * mixtures of different thread data as in bug 7240. + * + * From a performance point of view it is best to use one static instance + * per thread for each in/out encoding pair. This can e.g. be achieved by + * using helpers for thread-local storage such as QThreadStorage or + * boost::thread_specific_ptr. A single static instance protected by a mutex + * would work as well, and might be preferrable for exotic encoding pairs. + * Creating local IconvProcessor instances should be avoided because of the + * overhead in iconv_open(). + */ class IconvProcessor { public: @@ -51,6 +72,10 @@ private: Impl * pimpl_; }; +/// Get the global IconvProcessor instance of the current thread for +/// utf8->ucs4 conversions +IconvProcessor & utf8ToUcs4(); + // A single codepoint conversion for utf8_to_ucs4 does not make // sense, so that function is left out. @@ -66,6 +91,10 @@ std::vector utf16_to_ucs4(unsigned short const * s, size_t ls); std::vector ucs4_to_utf16(char_type const * s, size_t ls); +/// Get the global IconvProcessor instance of the current thread for +/// ucs4->utf8 conversions +IconvProcessor & ucs4ToUtf8(); + // ucs4_to_utf8 std::vector ucs4_to_utf8(char_type c); -- 2.39.2