#include <cerrno>
#include <iomanip>
-#include <string>
using std::endl;
-using std::string;
+
+#ifdef WORDS_BIGENDIAN
+ char const * ucs4_codeset = "UCS-4BE";
+ char const * ucs2_codeset = "UCS-2BE";
+#else
+ char const * ucs4_codeset = "UCS-4LE";
+ char const * ucs2_codeset = "UCS-2LE";
+#endif
namespace {
-std::vector<char>
-iconv_convert(std::string const & tocode, std::string const & fromcode,
- std::vector<char> const & buf)
+template<typename RetType, typename InType>
+std::vector<RetType>
+iconv_convert(iconv_t * cd,
+ char const * tocode,
+ char const * fromcode,
+ InType const * buf,
+ size_t buflen)
{
- if (buf.empty())
- return std::vector<char>();
-
- iconv_t cd = iconv_open(tocode.c_str(), fromcode.c_str());
- if (cd == (iconv_t)(-1)) {
- lyxerr << "Error returned from iconv_open" << endl;
- switch (errno) {
- case EINVAL:
- lyxerr << "EINVAL The conversion from " << fromcode
- << " to " << tocode
- << " is not supported by the implementation."
- << endl;
- break;
- default:
- lyxerr << "\tSome other error: " << errno << endl;
- break;
+ if (buflen == 0)
+ return std::vector<RetType>();
+
+ if (*cd == (iconv_t)(-1)) {
+ *cd = iconv_open(tocode, fromcode);
+ if (*cd == (iconv_t)(-1)) {
+ lyxerr << "Error returned from iconv_open" << endl;
+ switch (errno) {
+ case EINVAL:
+ lyxerr << "EINVAL The conversion from " << fromcode
+ << " to " << tocode
+ << " is not supported by the implementation."
+ << endl;
+ break;
+ default:
+ lyxerr << "\tSome other error: " << errno << endl;
+ break;
+ }
}
}
- char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(&buf[0]);
- size_t inbytesleft = buf.size();
- static char out[1000];
+ char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(reinterpret_cast<char const *>(buf));
+ size_t inbytesleft = buflen * sizeof(InType);
+ size_t const outsize = 1000;
+ static char out[outsize];
char * outbuf = out;
- size_t outbytesleft = 1000;
+ size_t outbytesleft = outsize;
- size_t res = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+ size_t res = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
if (res == (size_t)(-1)) {
lyxerr << "Error returned from iconv" << endl;
<< "When converting from " << fromcode
<< " to " << tocode << ".\n";
lyxerr << "Input: " << std::hex;
- for (size_t i = 0; i < buf.size(); ++i) {
- unsigned char const b = buf[i];
- lyxerr << "0x" << int(b) << " ";
+ for (size_t i = 0; i < buflen; ++i) {
+ boost::uint32_t const b = buf[i];
+ lyxerr << "0x" << b << " ";
}
lyxerr << endl;
break;
<< "When converting from " << fromcode
<< " to " << tocode << ".\n";
lyxerr << "Input: " << std::hex;
- for (size_t i = 0; i < buf.size(); ++i) {
- unsigned char const b = buf[i];
- lyxerr << "0x" << int(b) << " ";
+ for (size_t i = 0; i < buflen; ++i) {
+ boost::uint32_t const b = buf[i];
+ lyxerr << "0x" << b << " ";
}
lyxerr << endl;
break;
lyxerr << "\tSome other error: " << errno << endl;
break;
}
- }
-
- if (iconv_close(cd) == -1) {
- lyxerr << "Error returned from iconv_close("
- << errno << ")" << endl;
+ // We got an error so we close down the conversion engine
+ if (iconv_close(*cd) == -1) {
+ lyxerr << "Error returned from iconv_close("
+ << errno << ")" << endl;
+ }
+ *cd = (iconv_t)(-1);
}
//lyxerr << std::dec;
//lyxerr << "Inbytesleft: " << inbytesleft << endl;
//lyxerr << "Outbytesleft: " << outbytesleft << endl;
- int bytes = 1000 - outbytesleft;
+ int bytes = outsize - outbytesleft;
- std::vector<char> outvec(out, out + bytes);
- return outvec;
+ RetType const * tmp = reinterpret_cast<RetType const *>(out);
+ return std::vector<RetType>(tmp, tmp + bytes / sizeof(RetType));
}
+} // anon namespace
-std::vector<boost::uint32_t> bytes_to_ucs4(std::vector<char> const & bytes)
-{
- //lyxerr << "Outbuf =" << std::hex;
-
- std::vector<boost::uint32_t> ucs4;
- for (size_t i = 0; i < bytes.size(); i += 4) {
- unsigned char const b1 = bytes[i ];
- unsigned char const b2 = bytes[i + 1];
- unsigned char const b3 = bytes[i + 2];
- unsigned char const b4 = bytes[i + 3];
-
- boost::uint32_t c;
- char * cc = reinterpret_cast<char *>(&c);
- cc[3] = b1;
- cc[2] = b2;
- cc[1] = b3;
- cc[0] = b4;
-
- if (c > 0xffff) {
- lyxerr << "Strange ucs4 value encountered\n";
- lyxerr << "0x"
- << std::setw(2) << std::setfill('0') << int(b1)
- << std::setw(2) << std::setfill('0') << int(b2)
- << std::setw(2) << std::setfill('0') << int(b3)
- << std::setw(2) << std::setfill('0') << int(b4)
- << ' '
- << "(0x"
- << c
- << ") ";
- }
- ucs4.push_back(c);
- }
- //lyxerr << endl;
- return ucs4;
+std::vector<lyx::char_type> utf8_to_ucs4(std::vector<char> const & utf8str)
+{
+ return utf8_to_ucs4(&utf8str[0], utf8str.size());
}
-std::vector<unsigned short> bytes_to_ucs2(std::vector<char> const & bytes)
+std::vector<lyx::char_type>
+utf8_to_ucs4(char const * utf8str, size_t ls)
{
- //lyxerr << "Outbuf =" << std::hex;
-
- std::vector<unsigned short> ucs2;
- for (size_t i = 0; i < bytes.size(); i += 2) {
- unsigned char const b1 = bytes[i ];
- unsigned char const b2 = bytes[i + 1];
-
- unsigned short c;
- char * cc = reinterpret_cast<char *>(&c);
- cc[0] = b1;
- cc[1] = b2;
-
- //lyxerr << "0x"
- // << std::setw(2) << std::setfill('0') << int(b2)
- // << std::setw(2) << std::setfill('0') << int(b1)
- // << ' '
- // << "(0x"
- // << c
- // << ") ";
-
- ucs2.push_back(c);
- }
- //lyxerr << endl;
- return ucs2;
+ static iconv_t cd = (iconv_t)(-1);
+ return iconv_convert<lyx::char_type>(&cd, ucs4_codeset, "UTF-8",
+ utf8str, ls);
}
-} // anon namespace
-
-std::vector<boost::uint32_t> utf8_to_ucs4(std::vector<char> const & utf8str)
+lyx::char_type
+ucs2_to_ucs4(unsigned short c)
{
- //lyxerr << "Buff = " << string(utf8str.begin(), utf8str.end())
- // << " (" << utf8str.size() << ")" << endl;
- //lyxerr << "Res = " << string(res.begin(), res.end())
- // << " (" << res.size() << ")" << endl;
-
- std::vector<char> res = iconv_convert("UCS-4", "UTF-8", utf8str);
- return bytes_to_ucs4(res);
+ return ucs2_to_ucs4(&c, 1)[0];
}
-std::vector<boost::uint32_t>
+std::vector<lyx::char_type>
ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
{
- // TODO: Simplify and speed up.
- std::vector<char> in;
- std::vector<unsigned short>::const_iterator cit = ucs2str.begin();
- std::vector<unsigned short>::const_iterator end = ucs2str.end();
- //lyxerr << std::hex;
- for (; cit != end; ++cit) {
- unsigned short s = *cit;
- in.push_back(static_cast<char>(s & 0x00ff));
- in.push_back(static_cast<char>((s & 0xff00) >> 8));
- lyxerr << std::setw(2) << std::setfill('0') << (s & 0x00ff) << endl;
- lyxerr << std::setw(2) << std::setfill('0') << ((s & 0xff00) >> 8) << endl;
- }
+ return ucs2_to_ucs4(&ucs2str[0], ucs2str.size());
+}
- std::vector<char> res = iconv_convert("UCS-4", "UCS-2", in);
- return bytes_to_ucs4(res);
+
+std::vector<lyx::char_type>
+ucs2_to_ucs4(unsigned short const * ucs2str, size_t ls)
+{
+ static iconv_t cd = (iconv_t)(-1);
+ return iconv_convert<lyx::char_type>(&cd, ucs4_codeset, ucs2_codeset,
+ ucs2str, ls);
+}
+
+
+unsigned short
+ucs4_to_ucs2(lyx::char_type c)
+{
+ return ucs4_to_ucs2(&c, 1)[0];
}
std::vector<unsigned short>
-ucs4_to_ucs2(std::vector<boost::uint32_t> const & ucs4str)
+ucs4_to_ucs2(std::vector<lyx::char_type> const & ucs4str)
{
- std::vector<char> in;
- std::vector<boost::uint32_t>::const_iterator cit = ucs4str.begin();
- std::vector<boost::uint32_t>::const_iterator end = ucs4str.end();
- for (; cit != end; ++cit) {
- boost::uint32_t s = *cit;
- in.push_back(static_cast<char>((s & 0xff000000) >> 24));
- in.push_back(static_cast<char>((s & 0x00ff0000) >> 16));
- in.push_back(static_cast<char>((s & 0x0000ff00) >> 8));
- in.push_back(static_cast<char>(s & 0x000000ff));
- }
- std::vector<char> res = iconv_convert("UCS-2", "UCS-4", in);
- return bytes_to_ucs2(res);
+ return ucs4_to_ucs2(&ucs4str[0], ucs4str.size());
}
std::vector<unsigned short>
-ucs4_to_ucs2(boost::uint32_t const * s, size_t ls)
+ucs4_to_ucs2(lyx::char_type const * s, size_t ls)
{
- std::vector<char> in;
- for (size_t i = 0; i < ls; ++i) {
- in.push_back(static_cast<char>((s[i] & 0xff000000) >> 24));
- in.push_back(static_cast<char>((s[i] & 0x00ff0000) >> 16));
- in.push_back(static_cast<char>((s[i] & 0x0000ff00) >> 8));
- in.push_back(static_cast<char>(s[i] & 0x000000ff));
- }
- std::vector<char> res = iconv_convert("UCS-2", "UCS-4", in);
- return bytes_to_ucs2(res);
+ static iconv_t cd = (iconv_t)(-1);
+ return iconv_convert<unsigned short>(&cd, ucs2_codeset, ucs4_codeset,
+ s, ls);
}
-unsigned short
-ucs4_to_ucs2(boost::uint32_t c)
+std::vector<char>
+ucs4_to_utf8(lyx::char_type c)
{
- std::vector<char> in;
- in.push_back(static_cast<char>((c & 0xff000000) >> 24));
- in.push_back(static_cast<char>((c & 0x00ff0000) >> 16));
- in.push_back(static_cast<char>((c & 0x0000ff00) >> 8));
- in.push_back(static_cast<char>(c & 0x000000ff));
- std::vector<char> res = iconv_convert("UCS-2", "UCS-4", in);
- std::vector<unsigned short> us = bytes_to_ucs2(res);
- if (!us.empty())
- return us[0];
- else
- return 0xfffd; // unknown character
+ static iconv_t cd = (iconv_t)(-1);
+ return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset, &c, 1);
}
-std::vector<char> ucs4_to_utf8(std::vector<boost::uint32_t> const & ucs4str)
+std::vector<char>
+ucs4_to_utf8(std::vector<lyx::char_type> const & ucs4str)
{
- std::vector<char> in;
- std::vector<boost::uint32_t>::const_iterator cit = ucs4str.begin();
- std::vector<boost::uint32_t>::const_iterator end = ucs4str.end();
- for (; cit != end; ++cit) {
- boost::uint32_t s = *cit;
- in.push_back(static_cast<char>((s & 0xff000000) >> 24));
- in.push_back(static_cast<char>((s & 0x00ff0000) >> 16));
- in.push_back(static_cast<char>((s & 0x0000ff00) >> 8));
- in.push_back(static_cast<char>(s & 0x000000ff));
- }
- std::vector<char> res = iconv_convert("UTF-8", "UCS-4", in);
- return res;
+ return ucs4_to_utf8(&ucs4str[0], ucs4str.size());
}
-std::vector<char> ucs4_to_utf8(boost::uint32_t c)
+std::vector<char>
+ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls)
{
- std::vector<char> in;
- in.push_back(static_cast<char>((c & 0xff000000) >> 24));
- in.push_back(static_cast<char>((c & 0x00ff0000) >> 16));
- in.push_back(static_cast<char>((c & 0x0000ff00) >> 8));
- in.push_back(static_cast<char>(c & 0x000000ff));
- std::vector<char> res = iconv_convert("UTF-8", "UCS-4", in);
- return res;
+ static iconv_t cd = (iconv_t)(-1);
+ return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset,
+ ucs4str, ls);
}