3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
8 * Full author contact details are available in file CREDITS.
10 * A collection of unicode conversion functions, using iconv.
30 #ifdef WORDS_BIGENDIAN
31 char const * ucs4_codeset = "UCS-4BE";
32 char const * ucs2_codeset = "UCS-2BE";
34 char const * ucs4_codeset = "UCS-4LE";
35 char const * ucs2_codeset = "UCS-2LE";
39 iconv_convert(std::string const & tocode, std::string const & fromcode,
40 std::vector<char> const & buf)
43 return std::vector<char>();
45 iconv_t cd = iconv_open(tocode.c_str(), fromcode.c_str());
46 if (cd == (iconv_t)(-1)) {
47 lyxerr << "Error returned from iconv_open" << endl;
50 lyxerr << "EINVAL The conversion from " << fromcode
52 << " is not supported by the implementation."
56 lyxerr << "\tSome other error: " << errno << endl;
61 char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(&buf[0]);
62 size_t inbytesleft = buf.size();
63 static char out[1000];
65 size_t outbytesleft = 1000;
67 size_t res = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
69 if (res == (size_t)(-1)) {
70 lyxerr << "Error returned from iconv" << endl;
73 lyxerr << "E2BIG There is not sufficient room at *outbuf." << endl;
76 lyxerr << "EILSEQ An invalid multibyte sequence"
77 << " has been encountered in the input.\n"
78 << "When converting from " << fromcode
79 << " to " << tocode << ".\n";
80 lyxerr << "Input: " << std::hex;
81 for (size_t i = 0; i < buf.size(); ++i) {
82 unsigned char const b = buf[i];
83 lyxerr << "0x" << int(b) << " ";
88 lyxerr << "EINVAL An incomplete multibyte sequence"
89 << " has been encountered in the input.\n"
90 << "When converting from " << fromcode
91 << " to " << tocode << ".\n";
92 lyxerr << "Input: " << std::hex;
93 for (size_t i = 0; i < buf.size(); ++i) {
94 unsigned char const b = buf[i];
95 lyxerr << "0x" << int(b) << " ";
100 lyxerr << "\tSome other error: " << errno << endl;
105 if (iconv_close(cd) == -1) {
106 lyxerr << "Error returned from iconv_close("
107 << errno << ")" << endl;
110 //lyxerr << std::dec;
111 //lyxerr << "Inbytesleft: " << inbytesleft << endl;
112 //lyxerr << "Outbytesleft: " << outbytesleft << endl;
113 int bytes = 1000 - outbytesleft;
115 std::vector<char> outvec(out, out + bytes);
120 std::vector<boost::uint32_t> bytes_to_ucs4(std::vector<char> const & bytes)
122 boost::uint32_t const * tmp = reinterpret_cast<uint32_t const *>(&bytes[0]);
123 return std::vector<boost::uint32_t>(tmp, tmp + bytes.size() / 4);
127 std::vector<unsigned short> bytes_to_ucs2(std::vector<char> const & bytes)
129 unsigned short const * tmp = reinterpret_cast<unsigned short const *>(&bytes[0]);
130 return std::vector<unsigned short>(tmp, tmp + bytes.size() / 2);
136 std::vector<boost::uint32_t> utf8_to_ucs4(std::vector<char> const & utf8str)
138 //lyxerr << "Buff = " << string(utf8str.begin(), utf8str.end())
139 // << " (" << utf8str.size() << ")" << endl;
140 //lyxerr << "Res = " << string(res.begin(), res.end())
141 // << " (" << res.size() << ")" << endl;
143 std::vector<char> res = iconv_convert(ucs4_codeset, "UTF-8", utf8str);
144 return bytes_to_ucs4(res);
148 std::vector<boost::uint32_t>
149 ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
151 char const * tin = reinterpret_cast<char const *>(&ucs2str[0]);
152 std::vector<char> in(tin, tin + ucs2str.size() * 2);
153 std::vector<char> res = iconv_convert(ucs4_codeset, ucs2_codeset, in);
154 return bytes_to_ucs4(res);
158 std::vector<unsigned short>
159 ucs4_to_ucs2(std::vector<boost::uint32_t> const & ucs4str)
161 char const * tin = reinterpret_cast<char const *>(&ucs4str[0]);
162 std::vector<char> in(tin, tin + ucs4str.size() * 4);
163 std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
164 return bytes_to_ucs2(res);
168 std::vector<unsigned short>
169 ucs4_to_ucs2(boost::uint32_t const * s, size_t ls)
171 char const * tin = reinterpret_cast<char const *>(s);
172 std::vector<char> in(tin, tin + ls * 4);
173 std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
174 return bytes_to_ucs2(res);
179 ucs4_to_ucs2(boost::uint32_t c)
181 char const * tin = reinterpret_cast<char const *>(&c);
182 std::vector<char> in(tin, tin + 4);
183 std::vector<char> res = iconv_convert(ucs2_codeset, ucs4_codeset, in);
184 return bytes_to_ucs2(res)[0];
188 std::vector<char> ucs4_to_utf8(std::vector<boost::uint32_t> const & ucs4str)
190 char const * tin = reinterpret_cast<char const *>(&ucs4str[0]);
191 std::vector<char> in(tin, tin + ucs4str.size() * 4);
192 std::vector<char> res = iconv_convert("UTF-8", ucs4_codeset, in);
197 std::vector<char> ucs4_to_utf8(boost::uint32_t c)
199 char const * tin = reinterpret_cast<char const *>(&c);
200 std::vector<char> in(tin, tin + 4);
201 std::vector<char> res = iconv_convert("UTF-8", ucs4_codeset, in);