2 * Copyright (C) 1999-2001, 2008, 2016 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <http://www.gnu.org/licenses/>.
24 /* Specification: RFC 1922 */
31 * The state is composed of one of the following values
34 #define STATE_TWOBYTE 1
36 * and one of the following values, << 8
39 #define STATE2_DESIGNATED_GB2312 1
40 #define STATE2_DESIGNATED_CNS11643_1 2
41 #define STATE2_DESIGNATED_ISO_IR_165 3
43 * and one of the following values, << 16
46 #define STATE3_DESIGNATED_CNS11643_2 1
48 * and one of the following values, << 24
51 #define STATE4_DESIGNATED_CNS11643_3 1
52 #define STATE4_DESIGNATED_CNS11643_4 2
53 #define STATE4_DESIGNATED_CNS11643_5 3
54 #define STATE4_DESIGNATED_CNS11643_6 4
55 #define STATE4_DESIGNATED_CNS11643_7 5
58 unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = (state >> 16) & 0xff, state4 = state >> 24
59 #define COMBINE_STATE \
60 state = (state4 << 24) | (state3 << 16) | (state2 << 8) | state1
63 iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
65 state_t state = conv->istate;
77 state2 = STATE2_DESIGNATED_GB2312;
84 state2 = STATE2_DESIGNATED_CNS11643_1;
91 state2 = STATE2_DESIGNATED_ISO_IR_165;
100 state3 = STATE3_DESIGNATED_CNS11643_2;
109 state4 = STATE4_DESIGNATED_CNS11643_3;
116 state4 = STATE4_DESIGNATED_CNS11643_4;
123 state4 = STATE4_DESIGNATED_CNS11643_5;
130 state4 = STATE4_DESIGNATED_CNS11643_6;
137 state4 = STATE4_DESIGNATED_CNS11643_7;
149 case STATE3_DESIGNATED_CNS11643_2:
150 if (s[2] < 0x80 && s[3] < 0x80) {
151 int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
152 if (ret == RET_ILSEQ)
154 if (ret != 2) abort();
156 conv->istate = state;
167 case STATE4_DESIGNATED_CNS11643_3:
168 if (s[2] < 0x80 && s[3] < 0x80) {
169 int ret = cns11643_3_mbtowc(conv,pwc,s+2,2);
170 if (ret == RET_ILSEQ)
172 if (ret != 2) abort();
174 conv->istate = state;
178 case STATE4_DESIGNATED_CNS11643_4:
179 if (s[2] < 0x80 && s[3] < 0x80) {
180 int ret = cns11643_4_mbtowc(conv,pwc,s+2,2);
181 if (ret == RET_ILSEQ)
183 if (ret != 2) abort();
185 conv->istate = state;
189 case STATE4_DESIGNATED_CNS11643_5:
190 if (s[2] < 0x80 && s[3] < 0x80) {
191 int ret = cns11643_5_mbtowc(conv,pwc,s+2,2);
192 if (ret == RET_ILSEQ)
194 if (ret != 2) abort();
196 conv->istate = state;
200 case STATE4_DESIGNATED_CNS11643_6:
201 if (s[2] < 0x80 && s[3] < 0x80) {
202 int ret = cns11643_6_mbtowc(conv,pwc,s+2,2);
203 if (ret == RET_ILSEQ)
205 if (ret != 2) abort();
207 conv->istate = state;
211 case STATE4_DESIGNATED_CNS11643_7:
212 if (s[2] < 0x80 && s[3] < 0x80) {
213 int ret = cns11643_7_mbtowc(conv,pwc,s+2,2);
214 if (ret == RET_ILSEQ)
216 if (ret != 2) abort();
218 conv->istate = state;
228 if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1 && state2 != STATE2_DESIGNATED_ISO_IR_165)
230 state1 = STATE_TWOBYTE;
237 state1 = STATE_ASCII;
248 int ret = ascii_mbtowc(conv,pwc,s,1);
249 if (ret == RET_ILSEQ)
251 if (ret != 1) abort();
252 if (*pwc == 0x000a || *pwc == 0x000d) {
253 state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
256 conv->istate = state;
263 if (s[0] < 0x80 && s[1] < 0x80) {
268 case STATE2_DESIGNATED_GB2312:
269 ret = gb2312_mbtowc(conv,pwc,s,2); break;
270 case STATE2_DESIGNATED_CNS11643_1:
271 ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
272 case STATE2_DESIGNATED_ISO_IR_165:
273 ret = isoir165_mbtowc(conv,pwc,s,2); break;
276 if (ret == RET_ILSEQ)
278 if (ret != 2) abort();
280 conv->istate = state;
289 conv->istate = state;
290 return RET_TOOFEW(count);
294 conv->istate = state;
295 return RET_SHIFT_ILSEQ(count);
299 iso2022_cn_ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
301 state_t state = conv->ostate;
303 unsigned char buf[3];
306 /* There is no need to handle Unicode 3.1 tag characters and to look for
307 "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
310 ret = ascii_wctomb(conv,buf,wc,1);
311 if (ret != RET_ILUNI) {
312 if (ret != 1) abort();
314 int count = (state1 == STATE_ASCII ? 1 : 2);
317 if (state1 != STATE_ASCII) {
320 state1 = STATE_ASCII;
323 if (wc == 0x000a || wc == 0x000d) {
324 state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
327 conv->ostate = state;
332 /* Try GB 2312-1980. */
333 ret = gb2312_wctomb(conv,buf,wc,2);
334 if (ret != RET_ILUNI) {
335 if (ret != 2) abort();
336 if (buf[0] < 0x80 && buf[1] < 0x80) {
337 int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
340 if (state2 != STATE2_DESIGNATED_GB2312) {
346 state2 = STATE2_DESIGNATED_GB2312;
348 if (state1 != STATE_TWOBYTE) {
351 state1 = STATE_TWOBYTE;
356 conv->ostate = state;
361 ret = cns11643_wctomb(conv,buf,wc,3);
362 if (ret != RET_ILUNI) {
363 if (ret != 3) abort();
365 /* Try CNS 11643-1992 Plane 1. */
366 if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
367 int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
370 if (state2 != STATE2_DESIGNATED_CNS11643_1) {
376 state2 = STATE2_DESIGNATED_CNS11643_1;
378 if (state1 != STATE_TWOBYTE) {
381 state1 = STATE_TWOBYTE;
386 conv->ostate = state;
390 /* Try CNS 11643-1992 Plane 2. */
391 if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
392 int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
395 if (state3 != STATE3_DESIGNATED_CNS11643_2) {
401 state3 = STATE3_DESIGNATED_CNS11643_2;
408 conv->ostate = state;
412 /* Try CNS 11643-1992 Plane 3. */
413 if (buf[0] == 3 && buf[1] < 0x80 && buf[2] < 0x80) {
414 int count = (state4 == STATE4_DESIGNATED_CNS11643_3 ? 0 : 4) + 4;
417 if (state4 != STATE4_DESIGNATED_CNS11643_3) {
423 state4 = STATE4_DESIGNATED_CNS11643_3;
430 conv->ostate = state;
434 /* Try CNS 11643-1992 Plane 4. */
435 if (buf[0] == 4 && buf[1] < 0x80 && buf[2] < 0x80) {
436 int count = (state4 == STATE4_DESIGNATED_CNS11643_4 ? 0 : 4) + 4;
439 if (state4 != STATE4_DESIGNATED_CNS11643_4) {
445 state4 = STATE4_DESIGNATED_CNS11643_4;
452 conv->ostate = state;
456 /* Try CNS 11643-1992 Plane 5. */
457 if (buf[0] == 5 && buf[1] < 0x80 && buf[2] < 0x80) {
458 int count = (state4 == STATE4_DESIGNATED_CNS11643_5 ? 0 : 4) + 4;
461 if (state4 != STATE4_DESIGNATED_CNS11643_5) {
467 state4 = STATE4_DESIGNATED_CNS11643_5;
474 conv->ostate = state;
478 /* Try CNS 11643-1992 Plane 6. */
479 if (buf[0] == 6 && buf[1] < 0x80 && buf[2] < 0x80) {
480 int count = (state4 == STATE4_DESIGNATED_CNS11643_6 ? 0 : 4) + 4;
483 if (state4 != STATE4_DESIGNATED_CNS11643_6) {
489 state4 = STATE4_DESIGNATED_CNS11643_6;
496 conv->ostate = state;
500 /* Try CNS 11643-1992 Plane 7. */
501 if (buf[0] == 7 && buf[1] < 0x80 && buf[2] < 0x80) {
502 int count = (state4 == STATE4_DESIGNATED_CNS11643_7 ? 0 : 4) + 4;
505 if (state4 != STATE4_DESIGNATED_CNS11643_7) {
511 state4 = STATE4_DESIGNATED_CNS11643_7;
518 conv->ostate = state;
524 /* Try ISO-IR-165. */
525 ret = isoir165_wctomb(conv,buf,wc,2);
526 if (ret != RET_ILUNI) {
527 if (ret != 2) abort();
528 if (buf[0] < 0x80 && buf[1] < 0x80) {
529 int count = (state2 == STATE2_DESIGNATED_ISO_IR_165 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
532 if (state2 != STATE2_DESIGNATED_ISO_IR_165) {
538 state2 = STATE2_DESIGNATED_ISO_IR_165;
540 if (state1 != STATE_TWOBYTE) {
543 state1 = STATE_TWOBYTE;
548 conv->ostate = state;
557 iso2022_cn_ext_reset (conv_t conv, unsigned char *r, size_t n)
559 state_t state = conv->ostate;
564 if (state1 != STATE_ASCII) {
568 /* conv->ostate = 0; will be done by the caller */
576 #undef STATE4_DESIGNATED_CNS11643_7
577 #undef STATE4_DESIGNATED_CNS11643_6
578 #undef STATE4_DESIGNATED_CNS11643_5
579 #undef STATE4_DESIGNATED_CNS11643_4
580 #undef STATE4_DESIGNATED_CNS11643_3
582 #undef STATE3_DESIGNATED_CNS11643_2
584 #undef STATE2_DESIGNATED_ISO_IR_165
585 #undef STATE2_DESIGNATED_CNS11643_1
586 #undef STATE2_DESIGNATED_GB2312