]> git.lyx.org Git - lyx.git/blob - src/support/unicode.C
Provide do_put methods for inserting all remaining basic type values
[lyx.git] / src / support / unicode.C
1 /**
2  * \file unicode.C
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  *
8  * Full author contact details are available in file CREDITS.
9  *
10  * A collection of unicode conversion functions, using iconv.
11  */
12
13 #include <config.h>
14
15 #include "unicode.h"
16
17 #include "debug.h"
18
19 #include <iconv.h>
20
21 #include <cerrno>
22 #include <iomanip>
23 #include <map>
24
25 using std::endl;
26
27 namespace {
28
29 #ifdef WORDS_BIGENDIAN
30         char const * utf16_codeset = "UTF16-BE";
31 #else
32         char const * utf16_codeset = "UTF16-LE";
33 #endif
34
35 }
36
37
38 namespace lyx {
39
40 #ifdef WORDS_BIGENDIAN
41         char const * ucs4_codeset = "UCS-4BE";
42 #else
43         char const * ucs4_codeset = "UCS-4LE";
44 #endif
45
46 static const iconv_t invalid_cd = (iconv_t)(-1);
47
48
49 struct IconvProcessor::Private {
50         Private(): cd(invalid_cd) {}
51         ~Private()
52         {
53                 if (cd != invalid_cd) {
54                         if (iconv_close(cd) == -1) {
55                                 lyxerr << "Error returned from iconv_close("
56                                        << errno << ")" << endl;
57                         }
58                 }
59         }
60         iconv_t cd;
61 };
62
63
64 IconvProcessor::IconvProcessor(char const * tocode,
65                 char const * fromcode): tocode_(tocode), fromcode_(fromcode),
66                 pimpl_(new IconvProcessor::Private)
67 {
68 }
69
70
71 IconvProcessor::IconvProcessor(IconvProcessor const & other)
72         : tocode_(other.tocode_), fromcode_(other.fromcode_),
73           pimpl_(new IconvProcessor::Private)
74 {
75 }
76
77
78 IconvProcessor & IconvProcessor::operator=(IconvProcessor const & other)
79 {
80         if (&other == this)
81                 return *this;
82         tocode_ = other.tocode_;
83         fromcode_ = other.fromcode_;
84         pimpl_.reset(new Private);
85         return *this;
86 }
87
88
89 IconvProcessor::~IconvProcessor() {}
90
91
92 bool IconvProcessor::init()
93 {
94         if (pimpl_->cd != invalid_cd)
95                 return true;
96
97         pimpl_->cd = iconv_open(tocode_.c_str(), fromcode_.c_str());
98         if (pimpl_->cd != invalid_cd)
99                 return true;
100
101         lyxerr << "Error returned from iconv_open" << endl;
102         switch (errno) {
103                 case EINVAL:
104                         lyxerr << "EINVAL The conversion from " << fromcode_
105                                 << " to " << tocode_
106                                 << " is not supported by the implementation."
107                                 << endl;
108                         break;
109                 default:
110                         lyxerr << "\tSome other error: " << errno << endl;
111                         break;
112         }
113         return false;
114 }
115
116
117 int IconvProcessor::convert(char const * buf, size_t buflen,
118                 char * outbuf, size_t maxoutsize)
119 {
120         if (buflen == 0)
121                 return 0;
122
123         if (pimpl_->cd == invalid_cd) {
124                 if (!init())
125                         return -1;
126         }
127
128         char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(buf);
129         size_t inbytesleft = buflen;
130         size_t outbytesleft = maxoutsize;
131
132         int res = iconv(pimpl_->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
133
134         //lyxerr << std::dec;
135         //lyxerr << "Inbytesleft: " << inbytesleft << endl;
136         //lyxerr << "Outbytesleft: " << outbytesleft << endl;
137
138         if (res != -1)
139                 // Everything went well.
140                 return maxoutsize - outbytesleft;
141
142         // There are some errors in the conversion
143         lyxerr << "Error returned from iconv" << endl;
144         switch (errno) {
145                 case E2BIG:
146                         lyxerr << "E2BIG  There is not sufficient room at *outbuf." << endl;
147                         break;
148                 case EILSEQ:
149                         lyxerr << "EILSEQ An invalid multibyte sequence"
150                                 << " has been encountered in the input.\n"
151                                 << "When converting from " << fromcode_
152                                 << " to " << tocode_ << ".\n";
153                         lyxerr << "Input: " << std::hex;
154                         for (size_t i = 0; i < buflen; ++i) {
155                                 boost::uint32_t const b = buf[i];
156                                 lyxerr << "0x" << b << " ";
157                         }
158                         lyxerr << endl;
159                         break;
160                 case EINVAL:
161                         lyxerr << "EINVAL An incomplete multibyte sequence"
162                                 << " has been encountered in the input.\n"
163                                 << "When converting from " << fromcode_
164                                 << " to " << tocode_ << ".\n";
165                         lyxerr << "Input: " << std::hex;
166                         for (size_t i = 0; i < buflen; ++i) {
167                                 boost::uint32_t const b = buf[i];
168                                 lyxerr << "0x" << b << " ";
169                         }
170                         lyxerr << endl;
171                         break;
172                 default:
173                         lyxerr << "\tSome other error: " << errno << endl;
174                         break;
175         }
176         // We got an error so we close down the conversion engine
177         if (iconv_close(pimpl_->cd) == -1) {
178                 lyxerr << "Error returned from iconv_close("
179                         << errno << ")" << endl;
180         }
181         pimpl_->cd = invalid_cd;
182         return -1;
183 }
184
185
186 namespace {
187
188
189 template<typename RetType, typename InType>
190 std::vector<RetType>
191 iconv_convert(IconvProcessor & processor,
192               InType const * buf,
193               size_t buflen)
194 {
195         if (buflen == 0)
196                 return std::vector<RetType>();
197
198         char const * inbuf = reinterpret_cast<char const *>(buf);
199         size_t inbytesleft = buflen * sizeof(InType);
200
201         size_t const outsize = 32768;
202         static char out[outsize];
203         char * outbuf = out;
204
205         int bytes = processor.convert(inbuf, inbytesleft, outbuf, outsize);
206
207         RetType const * tmp = reinterpret_cast<RetType const *>(out);
208         return std::vector<RetType>(tmp, tmp + bytes / sizeof(RetType));
209 }
210
211 } // anon namespace
212
213
214 std::vector<lyx::char_type> utf8_to_ucs4(std::vector<char> const & utf8str)
215 {
216         if (utf8str.empty())
217                 return std::vector<lyx::char_type>();
218
219         return utf8_to_ucs4(&utf8str[0], utf8str.size());
220 }
221
222
223 std::vector<lyx::char_type>
224 utf8_to_ucs4(char const * utf8str, size_t ls)
225 {
226         static IconvProcessor processor(ucs4_codeset, "UTF-8");
227         return iconv_convert<lyx::char_type>(processor, utf8str, ls);
228 }
229
230
231 std::vector<char_type>
232 utf16_to_ucs4(unsigned short const * s, size_t ls)
233 {
234         static IconvProcessor processor(ucs4_codeset, utf16_codeset);
235         return iconv_convert<char_type>(processor, s, ls);
236 }
237
238
239 std::vector<unsigned short>
240 ucs4_to_utf16(char_type const * s, size_t ls)
241 {
242         static IconvProcessor processor(utf16_codeset, ucs4_codeset);
243         return iconv_convert<unsigned short>(processor, s, ls);
244 }
245
246
247 std::vector<char>
248 ucs4_to_utf8(lyx::char_type c)
249 {
250         static IconvProcessor processor("UTF-8", ucs4_codeset);
251         return iconv_convert<char>(processor, &c, 1);
252 }
253
254
255 std::vector<char>
256 ucs4_to_utf8(std::vector<lyx::char_type> const & ucs4str)
257 {
258         if (ucs4str.empty())
259                 return std::vector<char>();
260
261         return ucs4_to_utf8(&ucs4str[0], ucs4str.size());
262 }
263
264
265 std::vector<char>
266 ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls)
267 {
268         static IconvProcessor processor("UTF-8", ucs4_codeset);
269         return iconv_convert<char>(processor, ucs4str, ls);
270 }
271
272
273 std::vector<lyx::char_type>
274 eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding)
275 {
276         static std::map<std::string, IconvProcessor> processors;
277         if (processors.find(encoding) == processors.end()) {
278                 IconvProcessor processor(ucs4_codeset, encoding.c_str());
279                 processors.insert(std::make_pair(encoding, processor));
280         }
281         return iconv_convert<char_type>(processors[encoding], s, ls);
282 }
283
284
285 std::vector<char>
286 ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding)
287 {
288         static std::map<std::string, IconvProcessor> processors;
289         if (processors.find(encoding) == processors.end()) {
290                 IconvProcessor processor(encoding.c_str(), ucs4_codeset);
291                 processors.insert(std::make_pair(encoding, processor));
292         }
293         return iconv_convert<char>(processors[encoding], ucs4str, ls);
294 }
295
296 } // namespace lyx