3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
8 * Full author contact details are available in file CREDITS.
10 * A collection of unicode conversion functions, using iconv.
29 #ifdef WORDS_BIGENDIAN
30 char const * ucs4_codeset = "UCS-4BE";
31 char const * ucs2_codeset = "UCS-2BE";
33 char const * ucs4_codeset = "UCS-4LE";
34 char const * ucs2_codeset = "UCS-2LE";
39 template<typename RetType, typename InType>
41 iconv_convert(iconv_t * cd,
43 char const * fromcode,
48 return std::vector<RetType>();
50 if (*cd == (iconv_t)(-1)) {
51 *cd = iconv_open(tocode, fromcode);
52 if (*cd == (iconv_t)(-1)) {
53 lyxerr << "Error returned from iconv_open" << endl;
56 lyxerr << "EINVAL The conversion from " << fromcode
58 << " is not supported by the implementation."
62 lyxerr << "\tSome other error: " << errno << endl;
68 char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(reinterpret_cast<char const *>(buf));
69 size_t inbytesleft = buflen * sizeof(InType);
70 // The preamble of the user guide is more than 11.500 characters, so we go for 32kb
71 size_t const outsize = 32768;
72 static char out[outsize];
74 size_t outbytesleft = outsize;
76 size_t res = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
78 if (res == (size_t)(-1)) {
79 lyxerr << "Error returned from iconv" << endl;
82 lyxerr << "E2BIG There is not sufficient room at *outbuf." << endl;
85 lyxerr << "EILSEQ An invalid multibyte sequence"
86 << " has been encountered in the input.\n"
87 << "When converting from " << fromcode
88 << " to " << tocode << ".\n";
89 lyxerr << "Input: " << std::hex;
90 for (size_t i = 0; i < buflen; ++i) {
91 boost::uint32_t const b = buf[i];
92 lyxerr << "0x" << b << " ";
97 lyxerr << "EINVAL An incomplete multibyte sequence"
98 << " has been encountered in the input.\n"
99 << "When converting from " << fromcode
100 << " to " << tocode << ".\n";
101 lyxerr << "Input: " << std::hex;
102 for (size_t i = 0; i < buflen; ++i) {
103 boost::uint32_t const b = buf[i];
104 lyxerr << "0x" << b << " ";
109 lyxerr << "\tSome other error: " << errno << endl;
112 // We got an error so we close down the conversion engine
113 if (iconv_close(*cd) == -1) {
114 lyxerr << "Error returned from iconv_close("
115 << errno << ")" << endl;
120 //lyxerr << std::dec;
121 //lyxerr << "Inbytesleft: " << inbytesleft << endl;
122 //lyxerr << "Outbytesleft: " << outbytesleft << endl;
123 int bytes = outsize - outbytesleft;
125 RetType const * tmp = reinterpret_cast<RetType const *>(out);
126 return std::vector<RetType>(tmp, tmp + bytes / sizeof(RetType));
132 std::vector<lyx::char_type> utf8_to_ucs4(std::vector<char> const & utf8str)
135 return std::vector<lyx::char_type>();
137 return utf8_to_ucs4(&utf8str[0], utf8str.size());
141 std::vector<lyx::char_type>
142 utf8_to_ucs4(char const * utf8str, size_t ls)
144 static iconv_t cd = (iconv_t)(-1);
145 return iconv_convert<lyx::char_type>(&cd, ucs4_codeset, "UTF-8",
151 ucs2_to_ucs4(unsigned short c)
153 return ucs2_to_ucs4(&c, 1)[0];
157 std::vector<lyx::char_type>
158 ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
161 return std::vector<lyx::char_type>();
163 return ucs2_to_ucs4(&ucs2str[0], ucs2str.size());
167 std::vector<lyx::char_type>
168 ucs2_to_ucs4(unsigned short const * ucs2str, size_t ls)
170 static iconv_t cd = (iconv_t)(-1);
171 return iconv_convert<lyx::char_type>(&cd, ucs4_codeset, ucs2_codeset,
177 ucs4_to_ucs2(lyx::char_type c)
179 return ucs4_to_ucs2(&c, 1)[0];
183 std::vector<unsigned short>
184 ucs4_to_ucs2(std::vector<lyx::char_type> const & ucs4str)
187 return std::vector<unsigned short>();
189 return ucs4_to_ucs2(&ucs4str[0], ucs4str.size());
193 std::vector<unsigned short>
194 ucs4_to_ucs2(lyx::char_type const * s, size_t ls)
196 static iconv_t cd = (iconv_t)(-1);
197 return iconv_convert<unsigned short>(&cd, ucs2_codeset, ucs4_codeset,
203 ucs4_to_utf8(lyx::char_type c)
205 static iconv_t cd = (iconv_t)(-1);
206 return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset, &c, 1);
211 ucs4_to_utf8(std::vector<lyx::char_type> const & ucs4str)
214 return std::vector<char>();
216 return ucs4_to_utf8(&ucs4str[0], ucs4str.size());
221 ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls)
223 static iconv_t cd = (iconv_t)(-1);
224 return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset,