+// -*- C++ -*-
/**
* \file unicode.h
* This file is part of LyX, the document processor.
#define LYX_SUPPORT_UNICODE_H
#include "support/strfwd.h"
+#include "support/unique_ptr.h"
+#include <cstddef>
+#include <string>
#include <vector>
namespace lyx {
+/**
+ * Wrapper for iconv(3).
+ *
+ * According to the POSIX standard, all specified functions are thread-safe,
+ * with some exceptions. The iconv() function is not listed as an exception:
+ * http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xsh_chap02.html#tag_22_02_09_09
+ * http://man7.org/linux/man-pages/man7/pthreads.7.html
+ *
+ * Therefore, you can use as many instances of this class in parallel as you
+ * like. However, you need to ensure that each instance is only used by one
+ * thread at any given time. If this condition is not met you get nasty
+ * mixtures of different thread data as in bug 7240.
+ *
+ * From a performance point of view it is best to use one static instance
+ * per thread for each in/out encoding pair. This can e.g. be achieved by
+ * using helpers for thread-local storage such as QThreadStorage or
+ * boost::thread_specific_ptr. A single static instance protected by a mutex
+ * would work as well, and might be preferrable for exotic encoding pairs.
+ * Creating local IconvProcessor instances should be avoided because of the
+ * overhead in iconv_open().
+ */
class IconvProcessor
{
+ /// open iconv.
+ /// \return true if the processor is ready to use.
+ bool init();
+ std::string const tocode_;
+ std::string const fromcode_;
+ struct Handler;
+ unique_ptr<Handler> h_;
public:
- IconvProcessor(char const * tocode = "", char const * fromcode = "");
- /// copy constructor needed because of pimpl_
- IconvProcessor(IconvProcessor const &);
- /// assignment operator needed because of pimpl_
- void operator=(IconvProcessor const &);
- /// destructor
- ~IconvProcessor();
-
+ IconvProcessor(std::string tocode, std::string fromcode);
/// convert any data from \c fromcode to \c tocode unicode format.
/// \return the number of bytes of the converted output buffer.
int convert(char const * in_buffer, size_t in_size,
char * out_buffer, size_t max_out_size);
-
- /// source encoding
- std::string from() const;
/// target encoding
- std::string to() const;
-
-private:
- /// open iconv.
- /// \return true if the processor is ready to use.
- bool init();
- /// hide internals
- struct Impl;
- Impl * pimpl_;
+ std::string to() const { return tocode_; }
+ // required by g++ 4.6
+ IconvProcessor(IconvProcessor && other);
};
+/// Get the global IconvProcessor instance of the current thread for
+/// utf8->ucs4 conversions
+IconvProcessor & utf8ToUcs4();
+
// A single codepoint conversion for utf8_to_ucs4 does not make
// sense, so that function is left out.
std::vector<unsigned short> ucs4_to_utf16(char_type const * s, size_t ls);
+/// Get the global IconvProcessor instance of the current thread for
+/// ucs4->utf8 conversions
+IconvProcessor & ucs4ToUtf8();
+
// ucs4_to_utf8
std::vector<char> ucs4_to_utf8(char_type c);