]> git.lyx.org Git - lyx.git/blob - src/support/unicode.C
hopefully fix tex2lyx linking.
[lyx.git] / src / support / unicode.C
1 /**
2  * \file unicode.C
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  *
8  * Full author contact details are available in file CREDITS.
9  *
10  * A collection of unicode conversion functions, using iconv.
11  */
12
13 #include <config.h>
14
15 #include "unicode.h"
16
17 #include "debug.h"
18
19 #include <iconv.h>
20
21 #include <cerrno>
22 #include <iomanip>
23 #include <map>
24
25 using std::endl;
26
27 namespace lyx {
28
29 #ifdef WORDS_BIGENDIAN
30         char const * ucs4_codeset = "UCS-4BE";
31         char const * ucs2_codeset = "UCS-2BE";
32 #else
33         char const * ucs4_codeset = "UCS-4LE";
34         char const * ucs2_codeset = "UCS-2LE";
35 #endif
36
37 static const iconv_t invalid_cd = (iconv_t)(-1);
38
39
40 struct IconvProcessor::Private {
41         Private(): cd(invalid_cd) {}
42         ~Private()
43         {
44                 if (cd != invalid_cd) {
45                         if (iconv_close(cd) == -1) {
46                                 lyxerr << "Error returned from iconv_close("
47                                        << errno << ")" << endl;
48                         }
49                 }
50         }
51         iconv_t cd;
52 };
53
54
55 IconvProcessor::IconvProcessor(char const * tocode,
56                 char const * fromcode): tocode_(tocode), fromcode_(fromcode),
57                 pimpl_(new IconvProcessor::Private)
58 {
59 }
60
61
62 IconvProcessor::IconvProcessor(IconvProcessor const & other)
63         : tocode_(other.tocode_), fromcode_(other.fromcode_),
64           pimpl_(new IconvProcessor::Private)
65 {
66 }
67
68
69 IconvProcessor & IconvProcessor::operator=(IconvProcessor const & other)
70 {
71         if (&other == this)
72                 return *this;
73         tocode_ = other.tocode_;
74         fromcode_ = other.fromcode_;
75         pimpl_.reset(new Private);
76         return *this;
77 }
78
79
80 IconvProcessor::~IconvProcessor() {}
81
82
83 bool IconvProcessor::init()
84 {
85         if (pimpl_->cd != invalid_cd)
86                 return true;
87
88         pimpl_->cd = iconv_open(tocode_.c_str(), fromcode_.c_str());
89         if (pimpl_->cd != invalid_cd)
90                 return true;
91
92         lyxerr << "Error returned from iconv_open" << endl;
93         switch (errno) {
94                 case EINVAL:
95                         lyxerr << "EINVAL The conversion from " << fromcode_
96                                 << " to " << tocode_
97                                 << " is not supported by the implementation."
98                                 << endl;
99                         break;
100                 default:
101                         lyxerr << "\tSome other error: " << errno << endl;
102                         break;
103         }
104         return false;
105 }
106
107
108 int IconvProcessor::convert(char const * buf, size_t buflen,
109                 char * outbuf, size_t maxoutsize)
110 {
111         if (buflen == 0)
112                 return 0;
113
114         if (pimpl_->cd == invalid_cd) {
115                 if (!init())
116                         return -1;
117         }
118
119         char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(buf);
120         size_t inbytesleft = buflen;
121         size_t outbytesleft = maxoutsize;
122
123         int res = iconv(pimpl_->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
124
125         //lyxerr << std::dec;
126         //lyxerr << "Inbytesleft: " << inbytesleft << endl;
127         //lyxerr << "Outbytesleft: " << outbytesleft << endl;
128
129         if (res != -1)
130                 // Everything went well.
131                 return maxoutsize - outbytesleft;
132
133         // There are some errors in the conversion
134         lyxerr << "Error returned from iconv" << endl;
135         switch (errno) {
136                 case E2BIG:
137                         lyxerr << "E2BIG  There is not sufficient room at *outbuf." << endl;
138                         break;
139                 case EILSEQ:
140                         lyxerr << "EILSEQ An invalid multibyte sequence"
141                                 << " has been encountered in the input.\n"
142                                 << "When converting from " << fromcode_
143                                 << " to " << tocode_ << ".\n";
144                         lyxerr << "Input: " << std::hex;
145                         for (size_t i = 0; i < buflen; ++i) {
146                                 boost::uint32_t const b = buf[i];
147                                 lyxerr << "0x" << b << " ";
148                         }
149                         lyxerr << endl;
150                         break;
151                 case EINVAL:
152                         lyxerr << "EINVAL An incomplete multibyte sequence"
153                                 << " has been encountered in the input.\n"
154                                 << "When converting from " << fromcode_
155                                 << " to " << tocode_ << ".\n";
156                         lyxerr << "Input: " << std::hex;
157                         for (size_t i = 0; i < buflen; ++i) {
158                                 boost::uint32_t const b = buf[i];
159                                 lyxerr << "0x" << b << " ";
160                         }
161                         lyxerr << endl;
162                         break;
163                 default:
164                         lyxerr << "\tSome other error: " << errno << endl;
165                         break;
166         }
167         // We got an error so we close down the conversion engine
168         if (iconv_close(pimpl_->cd) == -1) {
169                 lyxerr << "Error returned from iconv_close("
170                         << errno << ")" << endl;
171         }
172         pimpl_->cd = invalid_cd;
173         return -1;
174 }
175
176
177 namespace {
178
179
180 template<typename RetType, typename InType>
181 std::vector<RetType>
182 iconv_convert(IconvProcessor & processor,
183               InType const * buf,
184               size_t buflen)
185 {
186         if (buflen == 0)
187                 return std::vector<RetType>();
188
189         char const * inbuf = reinterpret_cast<char const *>(buf);
190         size_t inbytesleft = buflen * sizeof(InType);
191
192         size_t const outsize = 32768;
193         static char out[outsize];
194         char * outbuf = out;
195
196         int bytes = processor.convert(inbuf, inbytesleft, outbuf, outsize);
197
198         RetType const * tmp = reinterpret_cast<RetType const *>(out);
199         return std::vector<RetType>(tmp, tmp + bytes / sizeof(RetType));
200 }
201
202 } // anon namespace
203
204
205 std::vector<lyx::char_type> utf8_to_ucs4(std::vector<char> const & utf8str)
206 {
207         if (utf8str.empty())
208                 return std::vector<lyx::char_type>();
209
210         return utf8_to_ucs4(&utf8str[0], utf8str.size());
211 }
212
213
214 std::vector<lyx::char_type>
215 utf8_to_ucs4(char const * utf8str, size_t ls)
216 {
217         static IconvProcessor processor(ucs4_codeset, "UTF-8");
218         return iconv_convert<lyx::char_type>(processor, utf8str, ls);
219 }
220
221
222 lyx::char_type
223 ucs2_to_ucs4(unsigned short c)
224 {
225         return ucs2_to_ucs4(&c, 1)[0];
226 }
227
228
229 std::vector<lyx::char_type>
230 ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
231 {
232         if (ucs2str.empty())
233                 return std::vector<lyx::char_type>();
234
235         return ucs2_to_ucs4(&ucs2str[0], ucs2str.size());
236 }
237
238
239 std::vector<lyx::char_type>
240 ucs2_to_ucs4(unsigned short const * ucs2str, size_t ls)
241 {
242         static IconvProcessor processor(ucs4_codeset, ucs2_codeset);
243         return iconv_convert<lyx::char_type>(processor, ucs2str, ls);
244 }
245
246
247 unsigned short
248 ucs4_to_ucs2(lyx::char_type c)
249 {
250         return ucs4_to_ucs2(&c, 1)[0];
251 }
252
253
254 std::vector<unsigned short>
255 ucs4_to_ucs2(std::vector<lyx::char_type> const & ucs4str)
256 {
257         if (ucs4str.empty())
258                 return std::vector<unsigned short>();
259
260         return ucs4_to_ucs2(&ucs4str[0], ucs4str.size());
261 }
262
263
264 std::vector<unsigned short>
265 ucs4_to_ucs2(lyx::char_type const * s, size_t ls)
266 {
267         static IconvProcessor processor(ucs2_codeset, ucs4_codeset);
268         return iconv_convert<unsigned short>(processor, s, ls);
269 }
270
271
272 std::vector<char>
273 ucs4_to_utf8(lyx::char_type c)
274 {
275         static IconvProcessor processor("UTF-8", ucs4_codeset);
276         return iconv_convert<char>(processor, &c, 1);
277 }
278
279
280 std::vector<char>
281 ucs4_to_utf8(std::vector<lyx::char_type> const & ucs4str)
282 {
283         if (ucs4str.empty())
284                 return std::vector<char>();
285
286         return ucs4_to_utf8(&ucs4str[0], ucs4str.size());
287 }
288
289
290 std::vector<char>
291 ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls)
292 {
293         static IconvProcessor processor("UTF-8", ucs4_codeset);
294         return iconv_convert<char>(processor, ucs4str, ls);
295 }
296
297
298 std::vector<lyx::char_type>
299 eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding)
300 {
301         static std::map<std::string, IconvProcessor> processors;
302         if (processors.find(encoding) == processors.end()) {
303                 IconvProcessor processor(ucs4_codeset, encoding.c_str());
304                 processors.insert(std::make_pair(encoding, processor));
305         }
306         return iconv_convert<char_type>(processors[encoding], s, ls);
307 }
308
309
310 std::vector<char>
311 ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding)
312 {
313         static std::map<std::string, IconvProcessor> processors;
314         if (processors.find(encoding) == processors.end()) {
315                 IconvProcessor processor(encoding.c_str(), ucs4_codeset);
316                 processors.insert(std::make_pair(encoding, processor));
317         }
318         return iconv_convert<char>(processors[encoding], ucs4str, ls);
319 }
320
321 } // namespace lyx