X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Fsupport%2Funicode.h;h=6373e470ba0e446a192f08ea18f07f241b582437;hb=b7abd752faa1f5d4958b27045bcfb1678ec1b2ae;hp=16c4e00a03ad48da47387db36b7ea4701d3cfb92;hpb=c4320d24cd2d29c2e77958b4a8fd44f2bd587ca7;p=lyx.git diff --git a/src/support/unicode.h b/src/support/unicode.h index 16c4e00a03..6373e470ba 100644 --- a/src/support/unicode.h +++ b/src/support/unicode.h @@ -1,9 +1,10 @@ +// -*- C++ -*- /** * \file unicode.h * This file is part of LyX, the document processor. * Licence details can be found in the file COPYING. * - * \author Lars Gullik Bjønnes + * \author Lars Gullik Bjønnes * * Full author contact details are available in file CREDITS. * @@ -13,59 +14,68 @@ #ifndef LYX_SUPPORT_UNICODE_H #define LYX_SUPPORT_UNICODE_H -#include "support/types.h" - -#include +#include "support/strfwd.h" +#include "support/unique_ptr.h" +#include #include #include namespace lyx { +/** + * Wrapper for iconv(3). + * + * According to the POSIX standard, all specified functions are thread-safe, + * with some exceptions. The iconv() function is not listed as an exception: + * http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xsh_chap02.html#tag_22_02_09_09 + * http://man7.org/linux/man-pages/man7/pthreads.7.html + * + * Therefore, you can use as many instances of this class in parallel as you + * like. However, you need to ensure that each instance is only used by one + * thread at any given time. If this condition is not met you get nasty + * mixtures of different thread data as in bug 7240. + * + * From a performance point of view it is best to use one static instance + * per thread for each in/out encoding pair. This can e.g. be achieved by + * using helpers for thread-local storage such as QThreadStorage or + * boost::thread_specific_ptr. A single static instance protected by a mutex + * would work as well, and might be preferrable for exotic encoding pairs. + * Creating local IconvProcessor instances should be avoided because of the + * overhead in iconv_open(). + */ class IconvProcessor { -public: - IconvProcessor( - char const * tocode = "", - char const * fromcode = ""); - /// copy constructor needed because of pimpl_ - IconvProcessor(IconvProcessor const &); - /// assignment operator needed because of pimpl_ - IconvProcessor & operator=(IconvProcessor const &); - /// destructor (needs to be implemented in the .C file because the - /// boost::scoped_ptr destructor needs a fully defined type - ~IconvProcessor(); - - /// convert any data from \c fromcode to \c tocode unicode format. - /// \return the number of bytes of the converted output buffer. - int convert( - char const * in_buffer, - size_t in_size, - char * out_buffer, - size_t max_out_size); -private: /// open iconv. /// \return true if the processor is ready to use. bool init(); - - std::string tocode_; - std::string fromcode_; - - struct Private; - boost::scoped_ptr pimpl_; + std::string const tocode_; + std::string const fromcode_; + struct Handler; + unique_ptr h_; +public: + IconvProcessor(std::string tocode, std::string fromcode); + /// convert any data from \c fromcode to \c tocode unicode format. + /// \return the number of bytes of the converted output buffer. + int convert(char const * in_buffer, size_t in_size, + char * out_buffer, size_t max_out_size); + /// target encoding + std::string to() const { return tocode_; } + // required by g++ 4.7 + IconvProcessor(IconvProcessor &&) = default; }; -/// This is implemented in lyx_main.C for the LyX program -/// and in client.C for the LyX client program. -extern IconvProcessor & utf8ToUcs4(); +/// Get the global IconvProcessor instance of the current thread for +/// utf8->ucs4 conversions +IconvProcessor & utf8ToUcs4(); // A single codepoint conversion for utf8_to_ucs4 does not make // sense, so that function is left out. -std::vector utf8_to_ucs4(std::vector const & utf8str); +std::vector utf8_to_ucs4(std::vector const & utf8str); -std::vector utf8_to_ucs4(char const * utf8str, size_t ls); +std::vector utf8_to_ucs4(char const * utf8str, size_t ls); // utf16_to_ucs4 @@ -75,26 +85,40 @@ std::vector utf16_to_ucs4(unsigned short const * s, size_t ls); std::vector ucs4_to_utf16(char_type const * s, size_t ls); +/// Get the global IconvProcessor instance of the current thread for +/// ucs4->utf8 conversions +IconvProcessor & ucs4ToUtf8(); + // ucs4_to_utf8 -std::vector ucs4_to_utf8(lyx::char_type c); +std::vector ucs4_to_utf8(char_type c); -std::vector ucs4_to_utf8(std::vector const & ucs4str); +std::vector ucs4_to_utf8(std::vector const & ucs4str); -std::vector ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls); +std::vector ucs4_to_utf8(char_type const * ucs4str, size_t ls); /// convert \p s from encoding \p encoding to ucs4. /// \p encoding must be a valid iconv 8bit encoding -std::vector +std::vector eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding); /// convert \p s from ucs4 to encoding \p encoding. /// \p encoding must be a valid iconv 8bit encoding -std::vector -ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding); +std::vector ucs4_to_eightbit(char_type const * ucs4str, + size_t ls, std::string const & encoding); + +/// convert ucs4 character \p c to encoding \p encoding. +/// \p encoding must be a valid iconv 8bit encoding +char ucs4_to_eightbit(char_type c, std::string const & encoding); + +/// +void ucs4_to_multibytes(char_type ucs4, std::vector & out, + std::string const & encoding); extern char const * ucs4_codeset; +/// How many bytes does one UCS4 code point use at most in encoding \p encoding? +int max_encoded_bytes(std::string const & encoding); } // namespace lyx