2 * Copyright (C) 1999-2003, 2005-2006, 2008 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <http://www.gnu.org/licenses/>.
20 /* This file defines the conversion loop via Unicode as a pivot encoding. */
22 /* Attempt to transliterate wc. Return code as in xxx_wctomb. */
23 static int unicode_transliterate (conv_t cd, ucs4_t wc,
24 unsigned char* outptr, size_t outleft)
26 if (cd->oflags & HAVE_HANGUL_JAMO) {
27 /* Decompose Hangul into Jamo. Use double-width Jamo (contained
28 in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
29 (contained in Unicode only). */
31 int ret = johab_hangul_decompose(cd,buf,wc);
32 if (ret != RET_ILUNI) {
33 /* we know 1 <= ret <= 3 */
34 state_t backup_state = cd->ostate;
35 unsigned char* backup_outptr = outptr;
36 size_t backup_outleft = outleft;
38 for (i = 0; i < ret; i++) {
40 sub_outcount = RET_TOOSMALL;
41 goto johab_hangul_failed;
43 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
44 if (sub_outcount <= RET_ILUNI)
45 goto johab_hangul_failed;
46 if (!(sub_outcount <= outleft)) abort();
47 outptr += sub_outcount; outleft -= sub_outcount;
49 return outptr-backup_outptr;
51 cd->ostate = backup_state;
52 outptr = backup_outptr;
53 outleft = backup_outleft;
54 if (sub_outcount != RET_ILUNI)
59 /* Try to use a variant, but postfix it with
60 U+303E IDEOGRAPHIC VARIATION INDICATOR
61 (cf. Ken Lunde's "CJKV information processing", p. 188). */
65 else if (wc == 0x30f6)
67 else if (wc >= 0x4e00 && wc < 0xa000)
68 indx = cjk_variants_indx[wc-0x4e00];
72 unsigned short variant = cjk_variants[indx];
73 unsigned short last = variant & 0x8000;
76 buf[0] = variant; buf[1] = 0x303e;
78 state_t backup_state = cd->ostate;
79 unsigned char* backup_outptr = outptr;
80 size_t backup_outleft = outleft;
82 for (i = 0; i < 2; i++) {
84 sub_outcount = RET_TOOSMALL;
87 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
88 if (sub_outcount <= RET_ILUNI)
90 if (!(sub_outcount <= outleft)) abort();
91 outptr += sub_outcount; outleft -= sub_outcount;
93 return outptr-backup_outptr;
95 cd->ostate = backup_state;
96 outptr = backup_outptr;
97 outleft = backup_outleft;
98 if (sub_outcount != RET_ILUNI)
106 if (wc >= 0x2018 && wc <= 0x201a) {
107 /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
109 (cd->oflags & HAVE_QUOTATION_MARKS
110 ? (wc == 0x201a ? 0x2018 : wc)
111 : (cd->oflags & HAVE_ACCENTS
112 ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
113 : 0x0027 /* use apostrophe */
115 int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
116 if (outcount != RET_ILUNI)
120 /* Use the transliteration table. */
121 int indx = translit_index(wc);
123 const unsigned int * cp = &translit_data[indx];
124 unsigned int num = *cp++;
125 state_t backup_state = cd->ostate;
126 unsigned char* backup_outptr = outptr;
127 size_t backup_outleft = outleft;
130 for (i = 0; i < num; i++) {
132 sub_outcount = RET_TOOSMALL;
133 goto translit_failed;
135 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
136 if (sub_outcount == RET_ILUNI)
137 /* Recursive transliteration. */
138 sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft);
139 if (sub_outcount <= RET_ILUNI)
140 goto translit_failed;
141 if (!(sub_outcount <= outleft)) abort();
142 outptr += sub_outcount; outleft -= sub_outcount;
144 return outptr-backup_outptr;
146 cd->ostate = backup_state;
147 outptr = backup_outptr;
148 outleft = backup_outleft;
149 if (sub_outcount != RET_ILUNI)
156 #ifndef LIBICONV_PLUG
158 struct uc_to_mb_fallback_locals {
159 unsigned char* l_outbuf;
160 size_t l_outbytesleft;
164 static void uc_to_mb_write_replacement (const char *buf, size_t buflen,
167 struct uc_to_mb_fallback_locals * plocals =
168 (struct uc_to_mb_fallback_locals *) callback_arg;
169 /* Do nothing if already encountered an error in a previous call. */
170 if (plocals->l_errno == 0) {
171 /* Attempt to copy the passed buffer to the output buffer. */
172 if (plocals->l_outbytesleft < buflen)
173 plocals->l_errno = E2BIG;
175 memcpy(plocals->l_outbuf, buf, buflen);
176 plocals->l_outbuf += buflen;
177 plocals->l_outbytesleft -= buflen;
182 struct mb_to_uc_fallback_locals {
184 unsigned char* l_outbuf;
185 size_t l_outbytesleft;
189 static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen,
192 struct mb_to_uc_fallback_locals * plocals =
193 (struct mb_to_uc_fallback_locals *) callback_arg;
194 /* Do nothing if already encountered an error in a previous call. */
195 if (plocals->l_errno == 0) {
196 /* Attempt to convert the passed buffer to the target encoding. */
197 conv_t cd = plocals->l_cd;
198 unsigned char* outptr = plocals->l_outbuf;
199 size_t outleft = plocals->l_outbytesleft;
200 for (; buflen > 0; buf++, buflen--) {
204 plocals->l_errno = E2BIG;
207 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
208 if (outcount != RET_ILUNI)
210 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
211 if ((wc >> 7) == (0xe0000 >> 7))
213 /* Try transliteration. */
214 if (cd->transliterate) {
215 outcount = unicode_transliterate(cd,wc,outptr,outleft);
216 if (outcount != RET_ILUNI)
219 if (cd->discard_ilseq) {
223 #ifndef LIBICONV_PLUG
224 else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
225 struct uc_to_mb_fallback_locals locals;
226 locals.l_outbuf = outptr;
227 locals.l_outbytesleft = outleft;
229 cd->fallbacks.uc_to_mb_fallback(wc,
230 uc_to_mb_write_replacement,
233 if (locals.l_errno != 0) {
234 plocals->l_errno = locals.l_errno;
237 outptr = locals.l_outbuf;
238 outleft = locals.l_outbytesleft;
243 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
244 if (outcount != RET_ILUNI)
246 plocals->l_errno = EILSEQ;
250 plocals->l_errno = E2BIG;
253 #ifndef LIBICONV_PLUG
254 if (cd->hooks.uc_hook)
255 (*cd->hooks.uc_hook)(wc, cd->hooks.data);
257 if (!(outcount <= outleft)) abort();
258 outptr += outcount; outleft -= outcount;
261 plocals->l_outbuf = outptr;
262 plocals->l_outbytesleft = outleft;
266 #endif /* !LIBICONV_PLUG */
268 static size_t unicode_loop_convert (iconv_t icd,
269 const char* * inbuf, size_t *inbytesleft,
270 char* * outbuf, size_t *outbytesleft)
272 conv_t cd = (conv_t) icd;
274 const unsigned char* inptr = (const unsigned char*) *inbuf;
275 size_t inleft = *inbytesleft;
276 unsigned char* outptr = (unsigned char*) *outbuf;
277 size_t outleft = *outbytesleft;
279 state_t last_istate = cd->istate;
283 incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
285 if ((unsigned int)(-1-incount) % 2 == (unsigned int)(-1-RET_ILSEQ) % 2) {
286 /* Case 1: invalid input, possibly after a shift sequence */
287 incount = DECODE_SHIFT_ILSEQ(incount);
288 if (cd->discard_ilseq) {
289 switch (cd->iindex) {
290 case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
291 case ei_utf32: case ei_utf32be: case ei_utf32le:
292 case ei_ucs4internal: case ei_ucs4swapped:
294 case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
295 case ei_utf16: case ei_utf16be: case ei_utf16le:
296 case ei_ucs2internal: case ei_ucs2swapped:
303 #ifndef LIBICONV_PLUG
304 else if (cd->fallbacks.mb_to_uc_fallback != NULL) {
305 unsigned int incount2;
306 struct mb_to_uc_fallback_locals locals;
307 switch (cd->iindex) {
308 case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
309 case ei_utf32: case ei_utf32be: case ei_utf32le:
310 case ei_ucs4internal: case ei_ucs4swapped:
312 case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
313 case ei_utf16: case ei_utf16be: case ei_utf16le:
314 case ei_ucs2internal: case ei_ucs2swapped:
320 locals.l_outbuf = outptr;
321 locals.l_outbytesleft = outleft;
323 cd->fallbacks.mb_to_uc_fallback((const char*)inptr+incount, incount2,
324 mb_to_uc_write_replacement,
327 if (locals.l_errno != 0) {
328 inptr += incount; inleft -= incount;
329 errno = locals.l_errno;
334 outptr = locals.l_outbuf;
335 outleft = locals.l_outbytesleft;
340 inptr += incount; inleft -= incount;
345 if (incount == RET_TOOFEW(0)) {
346 /* Case 2: not enough bytes available to detect anything */
351 /* Case 3: k bytes read, but only a shift sequence */
352 incount = DECODE_TOOFEW(incount);
354 /* Case 4: k bytes read, making up a wide character */
356 cd->istate = last_istate;
361 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
362 if (outcount != RET_ILUNI)
364 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
365 if ((wc >> 7) == (0xe0000 >> 7))
367 /* Try transliteration. */
369 if (cd->transliterate) {
370 outcount = unicode_transliterate(cd,wc,outptr,outleft);
371 if (outcount != RET_ILUNI)
374 if (cd->discard_ilseq) {
378 #ifndef LIBICONV_PLUG
379 else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
380 struct uc_to_mb_fallback_locals locals;
381 locals.l_outbuf = outptr;
382 locals.l_outbytesleft = outleft;
384 cd->fallbacks.uc_to_mb_fallback(wc,
385 uc_to_mb_write_replacement,
388 if (locals.l_errno != 0) {
389 cd->istate = last_istate;
390 errno = locals.l_errno;
393 outptr = locals.l_outbuf;
394 outleft = locals.l_outbytesleft;
399 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
400 if (outcount != RET_ILUNI)
402 cd->istate = last_istate;
408 cd->istate = last_istate;
413 #ifndef LIBICONV_PLUG
414 if (cd->hooks.uc_hook)
415 (*cd->hooks.uc_hook)(wc, cd->hooks.data);
417 if (!(outcount <= outleft)) abort();
418 outptr += outcount; outleft -= outcount;
421 if (!(incount <= inleft)) abort();
422 inptr += incount; inleft -= incount;
424 *inbuf = (const char*) inptr;
425 *inbytesleft = inleft;
426 *outbuf = (char*) outptr;
427 *outbytesleft = outleft;
431 static size_t unicode_loop_reset (iconv_t icd,
432 char* * outbuf, size_t *outbytesleft)
434 conv_t cd = (conv_t) icd;
435 if (outbuf == NULL || *outbuf == NULL) {
436 /* Reset the states. */
437 memset(&cd->istate,'\0',sizeof(state_t));
438 memset(&cd->ostate,'\0',sizeof(state_t));
442 if (cd->ifuncs.xxx_flushwc) {
443 state_t last_istate = cd->istate;
445 if (cd->ifuncs.xxx_flushwc(cd, &wc)) {
446 unsigned char* outptr = (unsigned char*) *outbuf;
447 size_t outleft = *outbytesleft;
448 int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
449 if (outcount != RET_ILUNI)
451 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
452 if ((wc >> 7) == (0xe0000 >> 7))
454 /* Try transliteration. */
456 if (cd->transliterate) {
457 outcount = unicode_transliterate(cd,wc,outptr,outleft);
458 if (outcount != RET_ILUNI)
461 if (cd->discard_ilseq) {
465 #ifndef LIBICONV_PLUG
466 else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
467 struct uc_to_mb_fallback_locals locals;
468 locals.l_outbuf = outptr;
469 locals.l_outbytesleft = outleft;
471 cd->fallbacks.uc_to_mb_fallback(wc,
472 uc_to_mb_write_replacement,
475 if (locals.l_errno != 0) {
476 cd->istate = last_istate;
477 errno = locals.l_errno;
480 outptr = locals.l_outbuf;
481 outleft = locals.l_outbytesleft;
486 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
487 if (outcount != RET_ILUNI)
489 cd->istate = last_istate;
494 cd->istate = last_istate;
498 #ifndef LIBICONV_PLUG
499 if (cd->hooks.uc_hook)
500 (*cd->hooks.uc_hook)(wc, cd->hooks.data);
502 if (!(outcount <= outleft)) abort();
506 *outbuf = (char*) outptr;
507 *outbytesleft = outleft;
510 if (cd->ofuncs.xxx_reset) {
511 unsigned char* outptr = (unsigned char*) *outbuf;
512 size_t outleft = *outbytesleft;
513 int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft);
518 if (!(outcount <= outleft)) abort();
519 *outbuf = (char*) (outptr + outcount);
520 *outbytesleft = outleft - outcount;
522 memset(&cd->istate,'\0',sizeof(state_t));
523 memset(&cd->ostate,'\0',sizeof(state_t));