3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
8 * Full author contact details are available in file CREDITS.
10 * A collection of unicode conversion functions, using iconv.
30 #ifdef WORDS_BIGENDIAN
31 char const * ucs4_codeset = "UCS-4BE";
32 char const * ucs2_codeset = "UCS-2BE";
34 char const * ucs4_codeset = "UCS-4LE";
35 char const * ucs2_codeset = "UCS-2LE";
38 int iconv_convert(int & cd,
40 char const * fromcode,
50 cd = (int)(iconv_open(tocode, fromcode));
52 lyxerr << "Error returned from iconv_open" << endl;
55 lyxerr << "EINVAL The conversion from " << fromcode
57 << " is not supported by the implementation."
61 lyxerr << "\tSome other error: " << errno << endl;
67 char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(buf);
68 size_t inbytesleft = buflen;
69 size_t outbytesleft = maxoutsize;
71 int res = iconv((iconv_t)(cd), &inbuf, &inbytesleft, &outbuf, &outbytesleft);
74 lyxerr << "Error returned from iconv" << endl;
77 lyxerr << "E2BIG There is not sufficient room at *outbuf." << endl;
80 lyxerr << "EILSEQ An invalid multibyte sequence"
81 << " has been encountered in the input.\n"
82 << "When converting from " << fromcode
83 << " to " << tocode << ".\n";
84 lyxerr << "Input: " << std::hex;
85 for (size_t i = 0; i < buflen; ++i) {
86 boost::uint32_t const b = buf[i];
87 lyxerr << "0x" << b << " ";
92 lyxerr << "EINVAL An incomplete multibyte sequence"
93 << " has been encountered in the input.\n"
94 << "When converting from " << fromcode
95 << " to " << tocode << ".\n";
96 lyxerr << "Input: " << std::hex;
97 for (size_t i = 0; i < buflen; ++i) {
98 boost::uint32_t const b = buf[i];
99 lyxerr << "0x" << b << " ";
104 lyxerr << "\tSome other error: " << errno << endl;
107 // We got an error so we close down the conversion engine
108 if (iconv_close((iconv_t)(cd)) == -1) {
109 lyxerr << "Error returned from iconv_close("
110 << errno << ")" << endl;
115 //lyxerr << std::dec;
116 //lyxerr << "Inbytesleft: " << inbytesleft << endl;
117 //lyxerr << "Outbytesleft: " << outbytesleft << endl;
119 return maxoutsize - outbytesleft;
126 template<typename RetType, typename InType>
128 iconv_convert(int & cd,
130 char const * fromcode,
135 return std::vector<RetType>();
137 char const * inbuf = reinterpret_cast<char const *>(buf);
138 size_t inbytesleft = buflen * sizeof(InType);
140 size_t const outsize = 32768;
141 static char out[outsize];
144 int bytes = lyx::iconv_convert(cd, tocode, fromcode, inbuf, inbytesleft, outbuf, outsize);
146 RetType const * tmp = reinterpret_cast<RetType const *>(out);
147 return std::vector<RetType>(tmp, tmp + bytes / sizeof(RetType));
153 std::vector<lyx::char_type> utf8_to_ucs4(std::vector<char> const & utf8str)
156 return std::vector<lyx::char_type>();
158 return utf8_to_ucs4(&utf8str[0], utf8str.size());
162 std::vector<lyx::char_type>
163 utf8_to_ucs4(char const * utf8str, size_t ls)
166 return iconv_convert<lyx::char_type>(cd, ucs4_codeset, "UTF-8",
172 ucs2_to_ucs4(unsigned short c)
174 return ucs2_to_ucs4(&c, 1)[0];
178 std::vector<lyx::char_type>
179 ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
182 return std::vector<lyx::char_type>();
184 return ucs2_to_ucs4(&ucs2str[0], ucs2str.size());
188 std::vector<lyx::char_type>
189 ucs2_to_ucs4(unsigned short const * ucs2str, size_t ls)
192 return iconv_convert<lyx::char_type>(cd, ucs4_codeset, ucs2_codeset,
198 ucs4_to_ucs2(lyx::char_type c)
200 return ucs4_to_ucs2(&c, 1)[0];
204 std::vector<unsigned short>
205 ucs4_to_ucs2(std::vector<lyx::char_type> const & ucs4str)
208 return std::vector<unsigned short>();
210 return ucs4_to_ucs2(&ucs4str[0], ucs4str.size());
214 std::vector<unsigned short>
215 ucs4_to_ucs2(lyx::char_type const * s, size_t ls)
218 return iconv_convert<unsigned short>(cd, ucs2_codeset, ucs4_codeset,
224 ucs4_to_utf8(lyx::char_type c)
227 return iconv_convert<char>(cd, "UTF-8", ucs4_codeset, &c, 1);
232 ucs4_to_utf8(std::vector<lyx::char_type> const & ucs4str)
235 return std::vector<char>();
237 return ucs4_to_utf8(&ucs4str[0], ucs4str.size());
242 ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls)
245 return iconv_convert<char>(cd, "UTF-8", ucs4_codeset,
250 std::vector<lyx::char_type>
251 eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding)
253 static std::map<std::string, int> cd;
254 if (cd.find(encoding) == cd.end())
256 return iconv_convert<char_type>(cd[encoding], ucs4_codeset,
257 encoding.c_str(), s, ls);
262 ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding)
264 static std::map<std::string, int> cd;
265 if (cd.find(encoding) == cd.end())
267 return iconv_convert<char>(cd[encoding], encoding.c_str(),
268 ucs4_codeset, ucs4str, ls);