]> git.lyx.org Git - lyx.git/blobdiff - src/support/unicode.C
MacOSX compile fix.
[lyx.git] / src / support / unicode.C
index 3f431dbab68f691ad87586b1aa99c29cd7777e75..1545e2ddc01b19e0bf9609ec751c9d208e8b6b8c 100644 (file)
 
 #include <cerrno>
 #include <iomanip>
-#include <string>
 
 using std::endl;
-using std::string;
-
-namespace {
 
 #ifdef WORDS_BIGENDIAN
        char const * ucs4_codeset = "UCS-4BE";
@@ -35,36 +31,45 @@ namespace {
        char const * ucs2_codeset = "UCS-2LE";
 #endif
 
-std::vector<char>
-iconv_convert(std::string const & tocode, std::string const & fromcode,
-             std::vector<char> const & buf)
-{
-       if (buf.empty())
-               return std::vector<char>();
+namespace {
 
-       iconv_t cd = iconv_open(tocode.c_str(), fromcode.c_str());
-       if (cd == (iconv_t)(-1)) {
-               lyxerr << "Error returned from iconv_open" << endl;
-               switch (errno) {
-               case EINVAL:
-                       lyxerr << "EINVAL The conversion from " << fromcode
-                              << " to " << tocode
-                              << " is not supported by the implementation."
-                              << endl;
-                       break;
-               default:
-                       lyxerr << "\tSome other error: " << errno << endl;
-                       break;
+template<typename RetType, typename InType>
+std::vector<RetType>
+iconv_convert(iconv_t * cd,
+             char const * tocode,
+             char const * fromcode,
+             InType const * buf,
+             size_t buflen)
+{
+       if (buflen == 0)
+               return std::vector<RetType>();
+
+       if (*cd == (iconv_t)(-1)) {
+               *cd = iconv_open(tocode, fromcode);
+               if (*cd == (iconv_t)(-1)) {
+                       lyxerr << "Error returned from iconv_open" << endl;
+                       switch (errno) {
+                       case EINVAL:
+                               lyxerr << "EINVAL The conversion from " << fromcode
+                                      << " to " << tocode
+                                      << " is not supported by the implementation."
+                                      << endl;
+                               break;
+                       default:
+                               lyxerr << "\tSome other error: " << errno << endl;
+                               break;
+                       }
                }
        }
 
-       char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(&buf[0]);
-       size_t inbytesleft = buf.size();
-       static char out[1000];
+       char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(reinterpret_cast<char const *>(buf));
+       size_t inbytesleft = buflen * sizeof(InType);
+       size_t const outsize = 1000;
+       static char out[outsize];
        char * outbuf = out;
-       size_t outbytesleft = 1000;
+       size_t outbytesleft = outsize;
 
-       size_t res = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+       size_t res = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
 
        if (res == (size_t)(-1)) {
                lyxerr << "Error returned from iconv" << endl;
@@ -78,9 +83,9 @@ iconv_convert(std::string const & tocode, std::string const & fromcode,
                               << "When converting from " << fromcode
                               << " to " << tocode << ".\n";
                        lyxerr << "Input: " << std::hex;
-                       for (size_t i = 0; i < buf.size(); ++i) {
-                               unsigned char const b = buf[i];
-                               lyxerr << "0x" << int(b) << " ";
+                       for (size_t i = 0; i < buflen; ++i) {
+                               boost::uint32_t const b = buf[i];
+                               lyxerr << "0x" << b << " ";
                        }
                        lyxerr << endl;
                        break;
@@ -90,9 +95,9 @@ iconv_convert(std::string const & tocode, std::string const & fromcode,
                               << "When converting from " << fromcode
                               << " to " << tocode << ".\n";
                        lyxerr << "Input: " << std::hex;
-                       for (size_t i = 0; i < buf.size(); ++i) {
-                               unsigned char const b = buf[i];
-                               lyxerr << "0x" << int(b) << " ";
+                       for (size_t i = 0; i < buflen; ++i) {
+                               boost::uint32_t const b = buf[i];
+                               lyxerr << "0x" << b << " ";
                        }
                        lyxerr << endl;
                        break;
@@ -100,104 +105,106 @@ iconv_convert(std::string const & tocode, std::string const & fromcode,
                        lyxerr << "\tSome other error: " << errno << endl;
                        break;
                }
-       }
-
-       if (iconv_close(cd) == -1) {
-               lyxerr << "Error returned from iconv_close("
-                      << errno << ")" << endl;
+               // We got an error so we close down the conversion engine
+               if (iconv_close(*cd) == -1) {
+                       lyxerr << "Error returned from iconv_close("
+                              << errno << ")" << endl;
+               }
+               *cd = (iconv_t)(-1);
        }
 
        //lyxerr << std::dec;
        //lyxerr << "Inbytesleft: " << inbytesleft << endl;
        //lyxerr << "Outbytesleft: " << outbytesleft << endl;
-       int bytes = 1000 - outbytesleft;
+       int bytes = outsize - outbytesleft;
 
-       std::vector<char> outvec(out, out + bytes);
-       return outvec;
+       RetType const * tmp = reinterpret_cast<RetType const *>(out);
+       return std::vector<RetType>(tmp, tmp + bytes / sizeof(RetType));
 }
 
+} // anon namespace
+
 
-std::vector<boost::uint32_t> bytes_to_ucs4(std::vector<char> const & bytes)
+std::vector<lyx::char_type> utf8_to_ucs4(std::vector<char> const & utf8str)
 {
-       boost::uint32_t const * tmp = reinterpret_cast<uint32_t const *>(&bytes[0]);
-       return std::vector<boost::uint32_t>(tmp, tmp + bytes.size() / 4);
+       return utf8_to_ucs4(&utf8str[0], utf8str.size());
 }
 
 
-std::vector<unsigned short> bytes_to_ucs2(std::vector<char> const & bytes)
+std::vector<lyx::char_type>
+utf8_to_ucs4(char const * utf8str, size_t ls)
 {
-       unsigned short const * tmp = reinterpret_cast<unsigned short const *>(&bytes[0]);
-       return std::vector<unsigned short>(tmp, tmp + bytes.size() / 2);
+       static iconv_t cd = (iconv_t)(-1);
+       return iconv_convert<lyx::char_type>(&cd, ucs4_codeset, "UTF-8",
+                                             utf8str, ls);
 }
 
-} // anon namespace
+
+lyx::char_type
+ucs2_to_ucs4(unsigned short c)
+{
+       return ucs2_to_ucs4(&c, 1)[0];
+}
 
 
-std::vector<boost::uint32_t> utf8_to_ucs4(std::vector<char> const & utf8str)
+std::vector<lyx::char_type>
+ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
 {
-       //lyxerr << "Buff = " << string(utf8str.begin(), utf8str.end())
-       //       << " (" << utf8str.size() << ")" << endl;
-       //lyxerr << "Res = " << string(res.begin(), res.end())
-       //       << " (" << res.size() << ")" << endl;
+       return ucs2_to_ucs4(&ucs2str[0], ucs2str.size());
+}
 
-       std::vector<char> res = iconv_convert(ucs4_codeset, "UTF-8", utf8str);
-       return bytes_to_ucs4(res);
+
+std::vector<lyx::char_type>
+ucs2_to_ucs4(unsigned short const * ucs2str, size_t ls)
+{
+       static iconv_t cd = (iconv_t)(-1);
+       return iconv_convert<lyx::char_type>(&cd, ucs4_codeset, ucs2_codeset,
+                                             ucs2str, ls);
 }
 
 
-std::vector<boost::uint32_t>
-ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
+unsigned short
+ucs4_to_ucs2(lyx::char_type c)
 {
-       char const * tin = reinterpret_cast<char const *>(&ucs2str[0]);
-       std::vector<char> in(tin, tin + ucs2str.size() * 2);
-       std::vector<char> res = iconv_convert(ucs4_codeset, ucs2_codeset, in);
-       return bytes_to_ucs4(res);
+       return ucs4_to_ucs2(&c, 1)[0];
 }
 
 
 std::vector<unsigned short>
-ucs4_to_ucs2(std::vector<boost::uint32_t> const & ucs4str)
+ucs4_to_ucs2(std::vector<lyx::char_type> const & ucs4str)
 {
-       char const * tin = reinterpret_cast<char const *>(&ucs4str[0]);
-       std::vector<char> in(tin, tin + ucs4str.size() * 4);
-       std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
-       return bytes_to_ucs2(res);
+       return ucs4_to_ucs2(&ucs4str[0], ucs4str.size());
 }
 
 
 std::vector<unsigned short>
-ucs4_to_ucs2(boost::uint32_t const * s, size_t ls)
+ucs4_to_ucs2(lyx::char_type const * s, size_t ls)
 {
-       char const * tin = reinterpret_cast<char const *>(s);
-       std::vector<char> in(tin, tin + ls * 4);
-       std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
-       return bytes_to_ucs2(res);
+       static iconv_t cd = (iconv_t)(-1);
+       return iconv_convert<unsigned short>(&cd, ucs2_codeset, ucs4_codeset,
+                                            s, ls);
 }
 
 
-unsigned short
-ucs4_to_ucs2(boost::uint32_t c)
+std::vector<char>
+ucs4_to_utf8(lyx::char_type c)
 {
-       char const * tin = reinterpret_cast<char const *>(&c);
-       std::vector<char> in(tin, tin + 4);
-       std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
-       return bytes_to_ucs2(res)[0];
+       static iconv_t cd = (iconv_t)(-1);
+       return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset, &c, 1);
 }
 
 
-std::vector<char> ucs4_to_utf8(std::vector<boost::uint32_t> const & ucs4str)
+std::vector<char>
+ucs4_to_utf8(std::vector<lyx::char_type> const & ucs4str)
 {
-       char const * tin = reinterpret_cast<char const *>(&ucs4str[0]);
-       std::vector<char> in(tin, tin + ucs4str.size() * 4);
-       std::vector<char> res = iconv_convert("UTF-8", ucs4_codeset, in);
-       return res;
+       return ucs4_to_utf8(&ucs4str[0], ucs4str.size());
 }
 
 
-std::vector<char> ucs4_to_utf8(boost::uint32_t c)
+std::vector<char>
+ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls)
 {
-       char const * tin = reinterpret_cast<char const *>(&c);
-       std::vector<char> in(tin, tin + 4);
-       std::vector<char> res = iconv_convert("UTF-8", ucs4_codeset, in);
-       return res;
+       static iconv_t cd = (iconv_t)(-1);
+       return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset,
+                                  ucs4str, ls);
 }