#include <cerrno>
#include <iomanip>
-#include <string>
+#include <map>
using std::endl;
-using std::string;
-namespace {
+namespace lyx {
#ifdef WORDS_BIGENDIAN
char const * ucs4_codeset = "UCS-4BE";
char const * ucs2_codeset = "UCS-2LE";
#endif
-std::vector<char>
-iconv_convert(std::string const & tocode, std::string const & fromcode,
- std::vector<char> const & buf)
+static const iconv_t invalid_cd = (iconv_t)(-1);
+
+
+struct IconvProcessor::Private {
+ Private(): cd(invalid_cd) {}
+ ~Private()
+ {
+ if (cd != invalid_cd) {
+ if (iconv_close(cd) == -1) {
+ lyxerr << "Error returned from iconv_close("
+ << errno << ")" << endl;
+ }
+ }
+ }
+ iconv_t cd;
+};
+
+
+IconvProcessor::IconvProcessor(char const * tocode,
+ char const * fromcode): tocode_(tocode), fromcode_(fromcode),
+ pimpl_(new IconvProcessor::Private)
{
- if (buf.empty())
- return std::vector<char>();
+}
+
+
+IconvProcessor::IconvProcessor(IconvProcessor const & other)
+ : tocode_(other.tocode_), fromcode_(other.fromcode_),
+ pimpl_(new IconvProcessor::Private)
+{
+}
+
+
+IconvProcessor & IconvProcessor::operator=(IconvProcessor const & other)
+{
+ if (&other == this)
+ return *this;
+ tocode_ = other.tocode_;
+ fromcode_ = other.fromcode_;
+ pimpl_.reset(new Private);
+ return *this;
+}
+
+
+IconvProcessor::~IconvProcessor() {}
+
- iconv_t cd = iconv_open(tocode.c_str(), fromcode.c_str());
- if (cd == (iconv_t)(-1)) {
- lyxerr << "Error returned from iconv_open" << endl;
- switch (errno) {
+bool IconvProcessor::init()
+{
+ if (pimpl_->cd != invalid_cd)
+ return true;
+
+ pimpl_->cd = iconv_open(tocode_.c_str(), fromcode_.c_str());
+ if (pimpl_->cd != invalid_cd)
+ return true;
+
+ lyxerr << "Error returned from iconv_open" << endl;
+ switch (errno) {
case EINVAL:
- lyxerr << "EINVAL The conversion from " << fromcode
- << " to " << tocode
- << " is not supported by the implementation."
- << endl;
+ lyxerr << "EINVAL The conversion from " << fromcode_
+ << " to " << tocode_
+ << " is not supported by the implementation."
+ << endl;
break;
default:
lyxerr << "\tSome other error: " << errno << endl;
break;
- }
}
+ return false;
+}
- char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(&buf[0]);
- size_t inbytesleft = buf.size();
- static char out[1000];
- char * outbuf = out;
- size_t outbytesleft = 1000;
- size_t res = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+int IconvProcessor::convert(char const * buf, size_t buflen,
+ char * outbuf, size_t maxoutsize)
+{
+ if (buflen == 0)
+ return 0;
+
+ if (pimpl_->cd == invalid_cd) {
+ if (!init())
+ return -1;
+ }
+
+ char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(buf);
+ size_t inbytesleft = buflen;
+ size_t outbytesleft = maxoutsize;
+
+ int res = iconv(pimpl_->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
- if (res == (size_t)(-1)) {
- lyxerr << "Error returned from iconv" << endl;
- switch (errno) {
+ //lyxerr << std::dec;
+ //lyxerr << "Inbytesleft: " << inbytesleft << endl;
+ //lyxerr << "Outbytesleft: " << outbytesleft << endl;
+
+ if (res != -1)
+ // Everything went well.
+ return maxoutsize - outbytesleft;
+
+ // There are some errors in the conversion
+ lyxerr << "Error returned from iconv" << endl;
+ switch (errno) {
case E2BIG:
lyxerr << "E2BIG There is not sufficient room at *outbuf." << endl;
break;
case EILSEQ:
lyxerr << "EILSEQ An invalid multibyte sequence"
- << " has been encountered in the input.\n"
- << "When converting from " << fromcode
- << " to " << tocode << ".\n";
+ << " has been encountered in the input.\n"
+ << "When converting from " << fromcode_
+ << " to " << tocode_ << ".\n";
lyxerr << "Input: " << std::hex;
- for (size_t i = 0; i < buf.size(); ++i) {
- unsigned char const b = buf[i];
- lyxerr << "0x" << int(b) << " ";
+ for (size_t i = 0; i < buflen; ++i) {
+ boost::uint32_t const b = buf[i];
+ lyxerr << "0x" << b << " ";
}
lyxerr << endl;
break;
case EINVAL:
lyxerr << "EINVAL An incomplete multibyte sequence"
- << " has been encountered in the input.\n"
- << "When converting from " << fromcode
- << " to " << tocode << ".\n";
+ << " has been encountered in the input.\n"
+ << "When converting from " << fromcode_
+ << " to " << tocode_ << ".\n";
lyxerr << "Input: " << std::hex;
- for (size_t i = 0; i < buf.size(); ++i) {
- unsigned char const b = buf[i];
- lyxerr << "0x" << int(b) << " ";
+ for (size_t i = 0; i < buflen; ++i) {
+ boost::uint32_t const b = buf[i];
+ lyxerr << "0x" << b << " ";
}
lyxerr << endl;
break;
default:
lyxerr << "\tSome other error: " << errno << endl;
break;
- }
}
-
- if (iconv_close(cd) == -1) {
+ // We got an error so we close down the conversion engine
+ if (iconv_close(pimpl_->cd) == -1) {
lyxerr << "Error returned from iconv_close("
- << errno << ")" << endl;
+ << errno << ")" << endl;
}
+ pimpl_->cd = invalid_cd;
+ return -1;
+}
- //lyxerr << std::dec;
- //lyxerr << "Inbytesleft: " << inbytesleft << endl;
- //lyxerr << "Outbytesleft: " << outbytesleft << endl;
- int bytes = 1000 - outbytesleft;
- std::vector<char> outvec(out, out + bytes);
- return outvec;
+namespace {
+
+
+template<typename RetType, typename InType>
+std::vector<RetType>
+iconv_convert(IconvProcessor & processor,
+ InType const * buf,
+ size_t buflen)
+{
+ if (buflen == 0)
+ return std::vector<RetType>();
+
+ char const * inbuf = reinterpret_cast<char const *>(buf);
+ size_t inbytesleft = buflen * sizeof(InType);
+
+ size_t const outsize = 32768;
+ static char out[outsize];
+ char * outbuf = out;
+
+ int bytes = processor.convert(inbuf, inbytesleft, outbuf, outsize);
+
+ RetType const * tmp = reinterpret_cast<RetType const *>(out);
+ return std::vector<RetType>(tmp, tmp + bytes / sizeof(RetType));
}
+} // anon namespace
+
-std::vector<boost::uint32_t> bytes_to_ucs4(std::vector<char> const & bytes)
+std::vector<lyx::char_type> utf8_to_ucs4(std::vector<char> const & utf8str)
{
- boost::uint32_t const * tmp = reinterpret_cast<uint32_t const *>(&bytes[0]);
- return std::vector<boost::uint32_t>(tmp, tmp + bytes.size() / 4);
+ if (utf8str.empty())
+ return std::vector<lyx::char_type>();
+
+ return utf8_to_ucs4(&utf8str[0], utf8str.size());
}
-std::vector<unsigned short> bytes_to_ucs2(std::vector<char> const & bytes)
+std::vector<lyx::char_type>
+utf8_to_ucs4(char const * utf8str, size_t ls)
{
- unsigned short const * tmp = reinterpret_cast<unsigned short const *>(&bytes[0]);
- return std::vector<unsigned short>(tmp, tmp + bytes.size() / 2);
+ static IconvProcessor processor(ucs4_codeset, "UTF-8");
+ return iconv_convert<lyx::char_type>(processor, utf8str, ls);
}
-} // anon namespace
+
+lyx::char_type
+ucs2_to_ucs4(unsigned short c)
+{
+ return ucs2_to_ucs4(&c, 1)[0];
+}
-std::vector<boost::uint32_t> utf8_to_ucs4(std::vector<char> const & utf8str)
+std::vector<lyx::char_type>
+ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
{
- //lyxerr << "Buff = " << string(utf8str.begin(), utf8str.end())
- // << " (" << utf8str.size() << ")" << endl;
- //lyxerr << "Res = " << string(res.begin(), res.end())
- // << " (" << res.size() << ")" << endl;
+ if (ucs2str.empty())
+ return std::vector<lyx::char_type>();
- std::vector<char> res = iconv_convert(ucs4_codeset, "UTF-8", utf8str);
- return bytes_to_ucs4(res);
+ return ucs2_to_ucs4(&ucs2str[0], ucs2str.size());
}
-std::vector<boost::uint32_t>
-ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
+std::vector<lyx::char_type>
+ucs2_to_ucs4(unsigned short const * ucs2str, size_t ls)
{
- char const * tin = reinterpret_cast<char const *>(&ucs2str[0]);
- std::vector<char> in(tin, tin + ucs2str.size() * 2);
- std::vector<char> res = iconv_convert(ucs4_codeset, ucs2_codeset, in);
- return bytes_to_ucs4(res);
+ static IconvProcessor processor(ucs4_codeset, ucs2_codeset);
+ return iconv_convert<lyx::char_type>(processor, ucs2str, ls);
+}
+
+
+unsigned short
+ucs4_to_ucs2(lyx::char_type c)
+{
+ return ucs4_to_ucs2(&c, 1)[0];
}
std::vector<unsigned short>
-ucs4_to_ucs2(std::vector<boost::uint32_t> const & ucs4str)
+ucs4_to_ucs2(std::vector<lyx::char_type> const & ucs4str)
{
- char const * tin = reinterpret_cast<char const *>(&ucs4str[0]);
- std::vector<char> in(tin, tin + ucs4str.size() * 4);
- std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
- return bytes_to_ucs2(res);
+ if (ucs4str.empty())
+ return std::vector<unsigned short>();
+
+ return ucs4_to_ucs2(&ucs4str[0], ucs4str.size());
}
std::vector<unsigned short>
-ucs4_to_ucs2(boost::uint32_t const * s, size_t ls)
+ucs4_to_ucs2(lyx::char_type const * s, size_t ls)
{
- char const * tin = reinterpret_cast<char const *>(s);
- std::vector<char> in(tin, tin + ls * 4);
- std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
- return bytes_to_ucs2(res);
+ static IconvProcessor processor(ucs2_codeset, ucs4_codeset);
+ return iconv_convert<unsigned short>(processor, s, ls);
}
-unsigned short
-ucs4_to_ucs2(boost::uint32_t c)
+std::vector<char>
+ucs4_to_utf8(lyx::char_type c)
{
- char const * tin = reinterpret_cast<char const *>(&c);
- std::vector<char> in(tin, tin + 4);
- std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
- return bytes_to_ucs2(res)[0];
+ static IconvProcessor processor("UTF-8", ucs4_codeset);
+ return iconv_convert<char>(processor, &c, 1);
}
-std::vector<char> ucs4_to_utf8(std::vector<boost::uint32_t> const & ucs4str)
+std::vector<char>
+ucs4_to_utf8(std::vector<lyx::char_type> const & ucs4str)
{
- char const * tin = reinterpret_cast<char const *>(&ucs4str[0]);
- std::vector<char> in(tin, tin + ucs4str.size() * 4);
- std::vector<char> res = iconv_convert("UTF-8", ucs4_codeset, in);
- return res;
+ if (ucs4str.empty())
+ return std::vector<char>();
+
+ return ucs4_to_utf8(&ucs4str[0], ucs4str.size());
}
-std::vector<char> ucs4_to_utf8(boost::uint32_t c)
+std::vector<char>
+ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls)
{
- char const * tin = reinterpret_cast<char const *>(&c);
- std::vector<char> in(tin, tin + 4);
- std::vector<char> res = iconv_convert("UTF-8", ucs4_codeset, in);
- return res;
+ static IconvProcessor processor("UTF-8", ucs4_codeset);
+ return iconv_convert<char>(processor, ucs4str, ls);
}
+
+
+std::vector<lyx::char_type>
+eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding)
+{
+ static std::map<std::string, IconvProcessor> processors;
+ if (processors.find(encoding) == processors.end()) {
+ IconvProcessor processor(ucs4_codeset, encoding.c_str());
+ processors.insert(std::make_pair(encoding, processor));
+ }
+ return iconv_convert<char_type>(processors[encoding], s, ls);
+}
+
+
+std::vector<char>
+ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding)
+{
+ static std::map<std::string, IconvProcessor> processors;
+ if (processors.find(encoding) == processors.end()) {
+ IconvProcessor processor(encoding.c_str(), ucs4_codeset);
+ processors.insert(std::make_pair(encoding, processor));
+ }
+ return iconv_convert<char>(processors[encoding], ucs4str, ls);
+}
+
+} // namespace lyx