]> git.lyx.org Git - lyx.git/blob - 3rdparty/libiconv/1.15/lib/loop_unicode.h
Implement auto-nesting.
[lyx.git] / 3rdparty / libiconv / 1.15 / lib / loop_unicode.h
1 /*
2  * Copyright (C) 1999-2003, 2005-2006, 2008 Free Software Foundation, Inc.
3  * This file is part of the GNU LIBICONV Library.
4  *
5  * The GNU LIBICONV Library is free software; you can redistribute it
6  * and/or modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * The GNU LIBICONV Library is distributed in the hope that it will be
11  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public
16  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17  * If not, see <http://www.gnu.org/licenses/>.
18  */
19
20 /* This file defines the conversion loop via Unicode as a pivot encoding. */
21
22 /* Attempt to transliterate wc. Return code as in xxx_wctomb. */
23 static int unicode_transliterate (conv_t cd, ucs4_t wc,
24                                   unsigned char* outptr, size_t outleft)
25 {
26   if (cd->oflags & HAVE_HANGUL_JAMO) {
27     /* Decompose Hangul into Jamo. Use double-width Jamo (contained
28        in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
29        (contained in Unicode only). */
30     ucs4_t buf[3];
31     int ret = johab_hangul_decompose(cd,buf,wc);
32     if (ret != RET_ILUNI) {
33       /* we know 1 <= ret <= 3 */
34       state_t backup_state = cd->ostate;
35       unsigned char* backup_outptr = outptr;
36       size_t backup_outleft = outleft;
37       int i, sub_outcount;
38       for (i = 0; i < ret; i++) {
39         if (outleft == 0) {
40           sub_outcount = RET_TOOSMALL;
41           goto johab_hangul_failed;
42         }
43         sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
44         if (sub_outcount <= RET_ILUNI)
45           goto johab_hangul_failed;
46         if (!(sub_outcount <= outleft)) abort();
47         outptr += sub_outcount; outleft -= sub_outcount;
48       }
49       return outptr-backup_outptr;
50     johab_hangul_failed:
51       cd->ostate = backup_state;
52       outptr = backup_outptr;
53       outleft = backup_outleft;
54       if (sub_outcount != RET_ILUNI)
55         return RET_TOOSMALL;
56     }
57   }
58   {
59     /* Try to use a variant, but postfix it with
60        U+303E IDEOGRAPHIC VARIATION INDICATOR
61        (cf. Ken Lunde's "CJKV information processing", p. 188). */
62     int indx = -1;
63     if (wc == 0x3006)
64       indx = 0;
65     else if (wc == 0x30f6)
66       indx = 1;
67     else if (wc >= 0x4e00 && wc < 0xa000)
68       indx = cjk_variants_indx[wc-0x4e00];
69     if (indx >= 0) {
70       for (;; indx++) {
71         ucs4_t buf[2];
72         unsigned short variant = cjk_variants[indx];
73         unsigned short last = variant & 0x8000;
74         variant &= 0x7fff;
75         variant += 0x3000;
76         buf[0] = variant; buf[1] = 0x303e;
77         {
78           state_t backup_state = cd->ostate;
79           unsigned char* backup_outptr = outptr;
80           size_t backup_outleft = outleft;
81           int i, sub_outcount;
82           for (i = 0; i < 2; i++) {
83             if (outleft == 0) {
84               sub_outcount = RET_TOOSMALL;
85               goto variant_failed;
86             }
87             sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
88             if (sub_outcount <= RET_ILUNI)
89               goto variant_failed;
90             if (!(sub_outcount <= outleft)) abort();
91             outptr += sub_outcount; outleft -= sub_outcount;
92           }
93           return outptr-backup_outptr;
94         variant_failed:
95           cd->ostate = backup_state;
96           outptr = backup_outptr;
97           outleft = backup_outleft;
98           if (sub_outcount != RET_ILUNI)
99             return RET_TOOSMALL;
100         }
101         if (last)
102           break;
103       }
104     }
105   }
106   if (wc >= 0x2018 && wc <= 0x201a) {
107     /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
108     ucs4_t substitute =
109       (cd->oflags & HAVE_QUOTATION_MARKS
110        ? (wc == 0x201a ? 0x2018 : wc)
111        : (cd->oflags & HAVE_ACCENTS
112           ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
113           : 0x0027 /* use apostrophe */
114       )  );
115     int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
116     if (outcount != RET_ILUNI)
117       return outcount;
118   }
119   {
120     /* Use the transliteration table. */
121     int indx = translit_index(wc);
122     if (indx >= 0) {
123       const unsigned int * cp = &translit_data[indx];
124       unsigned int num = *cp++;
125       state_t backup_state = cd->ostate;
126       unsigned char* backup_outptr = outptr;
127       size_t backup_outleft = outleft;
128       unsigned int i;
129       int sub_outcount;
130       for (i = 0; i < num; i++) {
131         if (outleft == 0) {
132           sub_outcount = RET_TOOSMALL;
133           goto translit_failed;
134         }
135         sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
136         if (sub_outcount == RET_ILUNI)
137           /* Recursive transliteration. */
138           sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft);
139         if (sub_outcount <= RET_ILUNI)
140           goto translit_failed;
141         if (!(sub_outcount <= outleft)) abort();
142         outptr += sub_outcount; outleft -= sub_outcount;
143       }
144       return outptr-backup_outptr;
145     translit_failed:
146       cd->ostate = backup_state;
147       outptr = backup_outptr;
148       outleft = backup_outleft;
149       if (sub_outcount != RET_ILUNI)
150         return RET_TOOSMALL;
151     }
152   }
153   return RET_ILUNI;
154 }
155
156 #ifndef LIBICONV_PLUG
157
158 struct uc_to_mb_fallback_locals {
159   unsigned char* l_outbuf;
160   size_t l_outbytesleft;
161   int l_errno;
162 };
163
164 static void uc_to_mb_write_replacement (const char *buf, size_t buflen,
165                                         void* callback_arg)
166 {
167   struct uc_to_mb_fallback_locals * plocals =
168     (struct uc_to_mb_fallback_locals *) callback_arg;
169   /* Do nothing if already encountered an error in a previous call. */
170   if (plocals->l_errno == 0) {
171     /* Attempt to copy the passed buffer to the output buffer. */
172     if (plocals->l_outbytesleft < buflen)
173       plocals->l_errno = E2BIG;
174     else {
175       memcpy(plocals->l_outbuf, buf, buflen);
176       plocals->l_outbuf += buflen;
177       plocals->l_outbytesleft -= buflen;
178     }
179   }
180 }
181
182 struct mb_to_uc_fallback_locals {
183   conv_t l_cd;
184   unsigned char* l_outbuf;
185   size_t l_outbytesleft;
186   int l_errno;
187 };
188
189 static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen,
190                                         void* callback_arg)
191 {
192   struct mb_to_uc_fallback_locals * plocals =
193     (struct mb_to_uc_fallback_locals *) callback_arg;
194   /* Do nothing if already encountered an error in a previous call. */
195   if (plocals->l_errno == 0) {
196     /* Attempt to convert the passed buffer to the target encoding. */
197     conv_t cd = plocals->l_cd;
198     unsigned char* outptr = plocals->l_outbuf;
199     size_t outleft = plocals->l_outbytesleft;
200     for (; buflen > 0; buf++, buflen--) {
201       ucs4_t wc = *buf;
202       int outcount;
203       if (outleft == 0) {
204         plocals->l_errno = E2BIG;
205         break;
206       }
207       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
208       if (outcount != RET_ILUNI)
209         goto outcount_ok;
210       /* Handle Unicode tag characters (range U+E0000..U+E007F). */
211       if ((wc >> 7) == (0xe0000 >> 7))
212         goto outcount_zero;
213       /* Try transliteration. */
214       if (cd->transliterate) {
215         outcount = unicode_transliterate(cd,wc,outptr,outleft);
216         if (outcount != RET_ILUNI)
217           goto outcount_ok;
218       }
219       if (cd->discard_ilseq) {
220         outcount = 0;
221         goto outcount_ok;
222       }
223       #ifndef LIBICONV_PLUG
224       else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
225         struct uc_to_mb_fallback_locals locals;
226         locals.l_outbuf = outptr;
227         locals.l_outbytesleft = outleft;
228         locals.l_errno = 0;
229         cd->fallbacks.uc_to_mb_fallback(wc,
230                                         uc_to_mb_write_replacement,
231                                         &locals,
232                                         cd->fallbacks.data);
233         if (locals.l_errno != 0) {
234           plocals->l_errno = locals.l_errno;
235           break;
236         }
237         outptr = locals.l_outbuf;
238         outleft = locals.l_outbytesleft;
239         outcount = 0;
240         goto outcount_ok;
241       }
242       #endif
243       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
244       if (outcount != RET_ILUNI)
245         goto outcount_ok;
246       plocals->l_errno = EILSEQ;
247       break;
248     outcount_ok:
249       if (outcount < 0) {
250         plocals->l_errno = E2BIG;
251         break;
252       }
253       #ifndef LIBICONV_PLUG
254       if (cd->hooks.uc_hook)
255         (*cd->hooks.uc_hook)(wc, cd->hooks.data);
256       #endif
257       if (!(outcount <= outleft)) abort();
258       outptr += outcount; outleft -= outcount;
259     outcount_zero: ;
260     }
261     plocals->l_outbuf = outptr;
262     plocals->l_outbytesleft = outleft;
263   }
264 }
265
266 #endif /* !LIBICONV_PLUG */
267
268 static size_t unicode_loop_convert (iconv_t icd,
269                                     const char* * inbuf, size_t *inbytesleft,
270                                     char* * outbuf, size_t *outbytesleft)
271 {
272   conv_t cd = (conv_t) icd;
273   size_t result = 0;
274   const unsigned char* inptr = (const unsigned char*) *inbuf;
275   size_t inleft = *inbytesleft;
276   unsigned char* outptr = (unsigned char*) *outbuf;
277   size_t outleft = *outbytesleft;
278   while (inleft > 0) {
279     state_t last_istate = cd->istate;
280     ucs4_t wc;
281     int incount;
282     int outcount;
283     incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
284     if (incount < 0) {
285       if ((unsigned int)(-1-incount) % 2 == (unsigned int)(-1-RET_ILSEQ) % 2) {
286         /* Case 1: invalid input, possibly after a shift sequence */
287         incount = DECODE_SHIFT_ILSEQ(incount);
288         if (cd->discard_ilseq) {
289           switch (cd->iindex) {
290             case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
291             case ei_utf32: case ei_utf32be: case ei_utf32le:
292             case ei_ucs4internal: case ei_ucs4swapped:
293               incount += 4; break;
294             case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
295             case ei_utf16: case ei_utf16be: case ei_utf16le:
296             case ei_ucs2internal: case ei_ucs2swapped:
297               incount += 2; break;
298             default:
299               incount += 1; break;
300           }
301           goto outcount_zero;
302         }
303         #ifndef LIBICONV_PLUG
304         else if (cd->fallbacks.mb_to_uc_fallback != NULL) {
305           unsigned int incount2;
306           struct mb_to_uc_fallback_locals locals;
307           switch (cd->iindex) {
308             case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
309             case ei_utf32: case ei_utf32be: case ei_utf32le:
310             case ei_ucs4internal: case ei_ucs4swapped:
311               incount2 = 4; break;
312             case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
313             case ei_utf16: case ei_utf16be: case ei_utf16le:
314             case ei_ucs2internal: case ei_ucs2swapped:
315               incount2 = 2; break;
316             default:
317               incount2 = 1; break;
318           }
319           locals.l_cd = cd;
320           locals.l_outbuf = outptr;
321           locals.l_outbytesleft = outleft;
322           locals.l_errno = 0;
323           cd->fallbacks.mb_to_uc_fallback((const char*)inptr+incount, incount2,
324                                           mb_to_uc_write_replacement,
325                                           &locals,
326                                           cd->fallbacks.data);
327           if (locals.l_errno != 0) {
328             inptr += incount; inleft -= incount;
329             errno = locals.l_errno;
330             result = -1;
331             break;
332           }
333           incount += incount2;
334           outptr = locals.l_outbuf;
335           outleft = locals.l_outbytesleft;
336           result += 1;
337           goto outcount_zero;
338         }
339         #endif
340         inptr += incount; inleft -= incount;
341         errno = EILSEQ;
342         result = -1;
343         break;
344       }
345       if (incount == RET_TOOFEW(0)) {
346         /* Case 2: not enough bytes available to detect anything */
347         errno = EINVAL;
348         result = -1;
349         break;
350       }
351       /* Case 3: k bytes read, but only a shift sequence */
352       incount = DECODE_TOOFEW(incount);
353     } else {
354       /* Case 4: k bytes read, making up a wide character */
355       if (outleft == 0) {
356         cd->istate = last_istate;
357         errno = E2BIG;
358         result = -1;
359         break;
360       }
361       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
362       if (outcount != RET_ILUNI)
363         goto outcount_ok;
364       /* Handle Unicode tag characters (range U+E0000..U+E007F). */
365       if ((wc >> 7) == (0xe0000 >> 7))
366         goto outcount_zero;
367       /* Try transliteration. */
368       result++;
369       if (cd->transliterate) {
370         outcount = unicode_transliterate(cd,wc,outptr,outleft);
371         if (outcount != RET_ILUNI)
372           goto outcount_ok;
373       }
374       if (cd->discard_ilseq) {
375         outcount = 0;
376         goto outcount_ok;
377       }
378       #ifndef LIBICONV_PLUG
379       else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
380         struct uc_to_mb_fallback_locals locals;
381         locals.l_outbuf = outptr;
382         locals.l_outbytesleft = outleft;
383         locals.l_errno = 0;
384         cd->fallbacks.uc_to_mb_fallback(wc,
385                                         uc_to_mb_write_replacement,
386                                         &locals,
387                                         cd->fallbacks.data);
388         if (locals.l_errno != 0) {
389           cd->istate = last_istate;
390           errno = locals.l_errno;
391           return -1;
392         }
393         outptr = locals.l_outbuf;
394         outleft = locals.l_outbytesleft;
395         outcount = 0;
396         goto outcount_ok;
397       }
398       #endif
399       outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
400       if (outcount != RET_ILUNI)
401         goto outcount_ok;
402       cd->istate = last_istate;
403       errno = EILSEQ;
404       result = -1;
405       break;
406     outcount_ok:
407       if (outcount < 0) {
408         cd->istate = last_istate;
409         errno = E2BIG;
410         result = -1;
411         break;
412       }
413       #ifndef LIBICONV_PLUG
414       if (cd->hooks.uc_hook)
415         (*cd->hooks.uc_hook)(wc, cd->hooks.data);
416       #endif
417       if (!(outcount <= outleft)) abort();
418       outptr += outcount; outleft -= outcount;
419     }
420   outcount_zero:
421     if (!(incount <= inleft)) abort();
422     inptr += incount; inleft -= incount;
423   }
424   *inbuf = (const char*) inptr;
425   *inbytesleft = inleft;
426   *outbuf = (char*) outptr;
427   *outbytesleft = outleft;
428   return result;
429 }
430
431 static size_t unicode_loop_reset (iconv_t icd,
432                                   char* * outbuf, size_t *outbytesleft)
433 {
434   conv_t cd = (conv_t) icd;
435   if (outbuf == NULL || *outbuf == NULL) {
436     /* Reset the states. */
437     memset(&cd->istate,'\0',sizeof(state_t));
438     memset(&cd->ostate,'\0',sizeof(state_t));
439     return 0;
440   } else {
441     size_t result = 0;
442     if (cd->ifuncs.xxx_flushwc) {
443       state_t last_istate = cd->istate;
444       ucs4_t wc;
445       if (cd->ifuncs.xxx_flushwc(cd, &wc)) {
446         unsigned char* outptr = (unsigned char*) *outbuf;
447         size_t outleft = *outbytesleft;
448         int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
449         if (outcount != RET_ILUNI)
450           goto outcount_ok;
451         /* Handle Unicode tag characters (range U+E0000..U+E007F). */
452         if ((wc >> 7) == (0xe0000 >> 7))
453           goto outcount_zero;
454         /* Try transliteration. */
455         result++;
456         if (cd->transliterate) {
457           outcount = unicode_transliterate(cd,wc,outptr,outleft);
458           if (outcount != RET_ILUNI)
459             goto outcount_ok;
460         }
461         if (cd->discard_ilseq) {
462           outcount = 0;
463           goto outcount_ok;
464         }
465         #ifndef LIBICONV_PLUG
466         else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
467           struct uc_to_mb_fallback_locals locals;
468           locals.l_outbuf = outptr;
469           locals.l_outbytesleft = outleft;
470           locals.l_errno = 0;
471           cd->fallbacks.uc_to_mb_fallback(wc,
472                                           uc_to_mb_write_replacement,
473                                           &locals,
474                                           cd->fallbacks.data);
475           if (locals.l_errno != 0) {
476             cd->istate = last_istate;
477             errno = locals.l_errno;
478             return -1;
479           }
480           outptr = locals.l_outbuf;
481           outleft = locals.l_outbytesleft;
482           outcount = 0;
483           goto outcount_ok;
484         }
485         #endif
486         outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
487         if (outcount != RET_ILUNI)
488           goto outcount_ok;
489         cd->istate = last_istate;
490         errno = EILSEQ;
491         return -1;
492       outcount_ok:
493         if (outcount < 0) {
494           cd->istate = last_istate;
495           errno = E2BIG;
496           return -1;
497         }
498         #ifndef LIBICONV_PLUG
499         if (cd->hooks.uc_hook)
500           (*cd->hooks.uc_hook)(wc, cd->hooks.data);
501         #endif
502         if (!(outcount <= outleft)) abort();
503         outptr += outcount;
504         outleft -= outcount;
505       outcount_zero:
506         *outbuf = (char*) outptr;
507         *outbytesleft = outleft;
508       }
509     }
510     if (cd->ofuncs.xxx_reset) {
511       unsigned char* outptr = (unsigned char*) *outbuf;
512       size_t outleft = *outbytesleft;
513       int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft);
514       if (outcount < 0) {
515         errno = E2BIG;
516         return -1;
517       }
518       if (!(outcount <= outleft)) abort();
519       *outbuf = (char*) (outptr + outcount);
520       *outbytesleft = outleft - outcount;
521     }
522     memset(&cd->istate,'\0',sizeof(state_t));
523     memset(&cd->ostate,'\0',sizeof(state_t));
524     return result;
525   }
526 }