2 * Copyright (C) 1999-2001, 2004, 2016 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <http://www.gnu.org/licenses/>.
27 static const unsigned char cp1258_comb_table[] = {
28 0xcc, 0xec, 0xde, 0xd2, 0xf2,
31 /* The possible bases in viet_comp_table_data:
32 0x0041..0x0045, 0x0047..0x0049, 0x004B..0x0050, 0x0052..0x0057,
33 0x0059..0x005A, 0x0061..0x0065, 0x0067..0x0069, 0x006B..0x0070,
34 0x0072..0x0077, 0x0079..0x007A, 0x00A5, 0x00A8, 0x00C2, 0x00C5..0x00C7,
35 0x00CA, 0x00CF, 0x00D3..0x00D4, 0x00D6, 0x00D8, 0x00DA, 0x00DC, 0x00E2,
36 0x00E5..0x00E7, 0x00EA, 0x00EF, 0x00F3..0x00F4, 0x00F6, 0x00F8, 0x00FA,
37 0x00FC, 0x0102..0x0103, 0x01A0..0x01A1, 0x01AF..0x01B0. */
38 static const unsigned int cp1258_comp_bases[] = {
39 0x06fdfbbe, 0x06fdfbbe, 0x00000000, 0x00000120, 0x155884e4, 0x155884e4,
40 0x0000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00018003
43 static const unsigned short cp1258_2uni[128] = {
45 0x20ac, 0xfffd, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
46 0x02c6, 0x2030, 0xfffd, 0x2039, 0x0152, 0xfffd, 0xfffd, 0xfffd,
48 0xfffd, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
49 0x02dc, 0x2122, 0xfffd, 0x203a, 0x0153, 0xfffd, 0xfffd, 0x0178,
51 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
52 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
54 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
55 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
57 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
58 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x0300, 0x00cd, 0x00ce, 0x00cf,
60 0x0110, 0x00d1, 0x0309, 0x00d3, 0x00d4, 0x01a0, 0x00d6, 0x00d7,
61 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x01af, 0x0303, 0x00df,
63 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
64 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x0301, 0x00ed, 0x00ee, 0x00ef,
66 0x0111, 0x00f1, 0x0323, 0x00f3, 0x00f4, 0x01a1, 0x00f6, 0x00f7,
67 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x01b0, 0x20ab, 0x00ff,
70 /* In the CP1258 to Unicode direction, the state contains a buffered
71 character, or 0 if none. */
74 cp1258_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
78 unsigned short last_wc;
82 wc = cp1258_2uni[c-0x80];
86 last_wc = conv->istate;
88 if (wc >= 0x0300 && wc < 0x0340) {
89 /* See whether last_wc and wc can be combined. */
93 case 0x0300: k = 0; break;
94 case 0x0301: k = 1; break;
95 case 0x0303: k = 2; break;
96 case 0x0309: k = 3; break;
97 case 0x0323: k = 4; break;
100 i1 = viet_comp_table[k].idx;
101 i2 = i1 + viet_comp_table[k].len-1;
102 if (last_wc >= viet_comp_table_data[i1].base
103 && last_wc <= viet_comp_table_data[i2].base) {
107 if (last_wc == viet_comp_table_data[i].base)
109 if (last_wc < viet_comp_table_data[i].base) {
118 if (last_wc == viet_comp_table_data[i].base)
124 last_wc = viet_comp_table_data[i].composed;
125 /* Output the combined character. */
127 *pwc = (ucs4_t) last_wc;
132 /* Output the buffered character. */
134 *pwc = (ucs4_t) last_wc;
135 return 0; /* Don't advance the input pointer. */
137 if (wc >= 0x0041 && wc <= 0x01b0
138 && ((cp1258_comp_bases[(wc - 0x0040) >> 5] >> (wc & 0x1f)) & 1)) {
139 /* wc is a possible match in viet_comp_table_data. Buffer it. */
141 return RET_TOOFEW(1);
143 /* Output wc immediately. */
149 #define cp1258_flushwc normal_flushwc
151 static const unsigned char cp1258_page00[88] = {
152 0xc0, 0xc1, 0xc2, 0x00, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
153 0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
154 0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7, /* 0xd0-0xd7 */
155 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf, /* 0xd8-0xdf */
156 0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
157 0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef, /* 0xe8-0xef */
158 0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7, /* 0xf0-0xf7 */
159 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff, /* 0xf8-0xff */
161 0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */
162 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */
163 0xd0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */
165 static const unsigned char cp1258_page01[104] = {
166 0x00, 0x00, 0x8c, 0x9c, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */
167 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */
168 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */
169 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */
170 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */
171 0x9f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */
172 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */
173 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */
174 0x00, 0x00, 0x83, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */
175 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */
176 0xd5, 0xf5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */
177 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdd, /* 0xa8-0xaf */
178 0xfd, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */
180 static const unsigned char cp1258_page02[32] = {
181 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x00, /* 0xc0-0xc7 */
182 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
183 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */
184 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, /* 0xd8-0xdf */
186 static const unsigned char cp1258_page03[40] = {
187 0xcc, 0xec, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */
188 0x00, 0xd2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */
189 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */
190 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */
191 0x00, 0x00, 0x00, 0xf2, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */
193 static const unsigned char cp1258_page20[48] = {
194 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00, /* 0x10-0x17 */
195 0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00, /* 0x18-0x1f */
196 0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00, /* 0x20-0x27 */
197 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */
198 0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */
199 0x00, 0x8b, 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */
203 cp1258_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
210 else if (wc >= 0x00a0 && wc < 0x00c0)
212 else if (wc >= 0x00c0 && wc < 0x0118)
213 c = cp1258_page00[wc-0x00c0];
214 else if (wc >= 0x0150 && wc < 0x01b8)
215 c = cp1258_page01[wc-0x0150];
216 else if (wc >= 0x02c0 && wc < 0x02e0)
217 c = cp1258_page02[wc-0x02c0];
218 else if (wc >= 0x0300 && wc < 0x0328)
219 c = cp1258_page03[wc-0x0300];
220 else if (wc >= 0x0340 && wc < 0x0342) /* deprecated Vietnamese tone marks */
221 c = cp1258_page03[wc-0x0340];
222 else if (wc >= 0x2010 && wc < 0x2040)
223 c = cp1258_page20[wc-0x2010];
224 else if (wc == 0x20ab)
226 else if (wc == 0x20ac)
228 else if (wc == 0x2122)
234 /* Try canonical decomposition. */
236 /* Binary search through viet_decomp_table. */
238 unsigned int i2 = sizeof(viet_decomp_table)/sizeof(viet_decomp_table[0])-1;
239 if (wc >= viet_decomp_table[i1].composed
240 && wc <= viet_decomp_table[i2].composed) {
243 /* Here i2 - i1 > 0. */
245 if (wc == viet_decomp_table[i].composed)
247 if (wc < viet_decomp_table[i].composed) {
250 /* Here i1 < i < i2. */
253 /* Here i1 <= i < i2. */
257 /* Here i2 - i1 = 1. */
259 if (wc == viet_decomp_table[i].composed)
266 /* Found a canonical decomposition. */
267 wc = viet_decomp_table[i].base;
268 /* wc is one of 0x0020, 0x0041..0x005a, 0x0061..0x007a, 0x00a5, 0x00a8,
269 0x00c2, 0x00c5..0x00c7, 0x00ca, 0x00cf, 0x00d3, 0x00d4, 0x00d6,
270 0x00d8, 0x00da, 0x00dc, 0x00e2, 0x00e5..0x00e7, 0x00ea, 0x00ef,
271 0x00f3, 0x00f4, 0x00f6, 0x00f8, 0x00fc, 0x0102, 0x0103, 0x01a0,
272 0x01a1, 0x01af, 0x01b0. */
275 else if (wc < 0x0118)
276 c = cp1258_page00[wc-0x00c0];
278 c = cp1258_page01[wc-0x0150];
282 r[1] = cp1258_comb_table[viet_decomp_table[i].comb1];