2 * Copyright (C) 1999-2008, 2011, 2016 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <http://www.gnu.org/licenses/>.
26 #include "localcharset.h"
29 #include <cygwin/version.h>
34 * Consider all system dependent encodings, for any system,
35 * and the extra encodings.
43 * Consider those system dependent encodings that are needed for the
49 #if defined(__osf__) || defined(VMS)
52 #if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
58 * Data type for general conversion loop.
61 size_t (*loop_convert) (iconv_t icd,
62 const char* * inbuf, size_t *inbytesleft,
63 char* * outbuf, size_t *outbytesleft);
64 size_t (*loop_reset) (iconv_t icd,
65 char* * outbuf, size_t *outbytesleft);
71 #include "converters.h"
74 * Transliteration tables.
76 #include "cjk_variants.h"
80 * Table of all supported encodings.
83 struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
84 struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
85 int oflags; /* flags for unicode -> multibyte conversion */
87 #define DEFALIAS(xxx_alias,xxx) /* nothing */
89 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
91 #include "encodings.def"
93 # include "encodings_aix.def"
96 # include "encodings_osf1.def"
99 # include "encodings_dos.def"
102 # include "encodings_extra.def"
104 #include "encodings_local.def"
106 ei_for_broken_compilers_that_dont_like_trailing_commas
109 static struct encoding const all_encodings[] = {
110 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
111 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
112 #include "encodings.def"
114 # include "encodings_aix.def"
117 # include "encodings_osf1.def"
120 # include "encodings_dos.def"
123 # include "encodings_extra.def"
126 #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
127 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
128 #include "encodings_local.def"
139 * Alias lookup function.
141 * struct alias { int name; unsigned int encoding_index; };
142 * const struct alias * aliases_lookup (const char *str, unsigned int len);
143 * #define MAX_WORD_LENGTH ...
146 # include "aliases_sysaix.h"
147 #elif defined hpux || defined __hpux
148 # include "aliases_syshpux.h"
149 #elif defined __osf__
150 # include "aliases_sysosf1.h"
152 # include "aliases_syssolaris.h"
154 # include "aliases.h"
158 * System dependent alias lookup function.
160 * const struct alias * aliases2_lookup (const char *str);
162 #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
163 struct stringpool2_t {
164 #define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)];
165 #include "aliases2.h"
168 static const struct stringpool2_t stringpool2_contents = {
169 #define S(tag,name,encoding_index) name,
170 #include "aliases2.h"
173 #define stringpool2 ((const char *) &stringpool2_contents)
174 static const struct alias sysdep_aliases[] = {
175 #define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index },
176 #include "aliases2.h"
186 static const struct alias *
187 aliases2_lookup (register const char *str)
189 const struct alias * ptr;
191 for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
192 if (!strcmp(str, stringpool2 + ptr->name))
197 #define aliases2_lookup(str) NULL
198 #define stringpool2 NULL
202 /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
203 and the first string can be assumed to be in uppercase. */
204 static int strequal (const char* str1, const char* str2)
209 c1 = * (unsigned char *) str1++;
210 c2 = * (unsigned char *) str2++;
213 if (c2 >= 'a' && c2 <= 'z')
222 iconv_t iconv_open (const char* tocode, const char* fromcode)
224 struct conv_struct * cd;
225 unsigned int from_index;
227 unsigned int to_index;
232 #include "iconv_open1.h"
234 cd = (struct conv_struct *) malloc(from_wchar != to_wchar
235 ? sizeof(struct wchar_conv_struct)
236 : sizeof(struct conv_struct));
239 return (iconv_t)(-1);
242 #include "iconv_open2.h"
247 return (iconv_t)(-1);
250 size_t iconv (iconv_t icd,
251 ICONV_CONST char* * inbuf, size_t *inbytesleft,
252 char* * outbuf, size_t *outbytesleft)
254 conv_t cd = (conv_t) icd;
255 if (inbuf == NULL || *inbuf == NULL)
256 return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
258 return cd->lfuncs.loop_convert(icd,
259 (const char* *)inbuf,inbytesleft,
260 outbuf,outbytesleft);
263 int iconv_close (iconv_t icd)
265 conv_t cd = (conv_t) icd;
270 #ifndef LIBICONV_PLUG
273 * Verify that a 'struct conv_struct' and a 'struct wchar_conv_struct' each
274 * fit in an iconv_allocation_t.
275 * If this verification fails, iconv_allocation_t must be made larger and
276 * the major version in LIBICONV_VERSION_INFO must be bumped.
277 * Currently 'struct conv_struct' has 21 integer/pointer fields, and
278 * 'struct wchar_conv_struct' additionally has an 'mbstate_t' field.
280 typedef int verify_size_1[2 * (sizeof (struct conv_struct) <= sizeof (iconv_allocation_t)) - 1];
281 typedef int verify_size_2[2 * (sizeof (struct wchar_conv_struct) <= sizeof (iconv_allocation_t)) - 1];
283 int iconv_open_into (const char* tocode, const char* fromcode,
284 iconv_allocation_t* resultp)
286 struct conv_struct * cd;
287 unsigned int from_index;
289 unsigned int to_index;
294 #include "iconv_open1.h"
296 cd = (struct conv_struct *) resultp;
298 #include "iconv_open2.h"
306 int iconvctl (iconv_t icd, int request, void* argument)
308 conv_t cd = (conv_t) icd;
312 ((cd->lfuncs.loop_convert == unicode_loop_convert
313 && cd->iindex == cd->oindex)
314 || cd->lfuncs.loop_convert == wchar_id_loop_convert
317 case ICONV_GET_TRANSLITERATE:
318 *(int *)argument = cd->transliterate;
320 case ICONV_SET_TRANSLITERATE:
321 cd->transliterate = (*(const int *)argument ? 1 : 0);
323 case ICONV_GET_DISCARD_ILSEQ:
324 *(int *)argument = cd->discard_ilseq;
326 case ICONV_SET_DISCARD_ILSEQ:
327 cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
329 case ICONV_SET_HOOKS:
330 if (argument != NULL) {
331 cd->hooks = *(const struct iconv_hooks *)argument;
333 cd->hooks.uc_hook = NULL;
334 cd->hooks.wc_hook = NULL;
335 cd->hooks.data = NULL;
338 case ICONV_SET_FALLBACKS:
339 if (argument != NULL) {
340 cd->fallbacks = *(const struct iconv_fallbacks *)argument;
342 cd->fallbacks.mb_to_uc_fallback = NULL;
343 cd->fallbacks.uc_to_mb_fallback = NULL;
344 cd->fallbacks.mb_to_wc_fallback = NULL;
345 cd->fallbacks.wc_to_mb_fallback = NULL;
346 cd->fallbacks.data = NULL;
355 /* An alias after its name has been converted from 'int' to 'const char*'. */
356 struct nalias { const char* name; unsigned int encoding_index; };
358 static int compare_by_index (const void * arg1, const void * arg2)
360 const struct nalias * alias1 = (const struct nalias *) arg1;
361 const struct nalias * alias2 = (const struct nalias *) arg2;
362 return (int)alias1->encoding_index - (int)alias2->encoding_index;
365 static int compare_by_name (const void * arg1, const void * arg2)
367 const char * name1 = *(const char **)arg1;
368 const char * name2 = *(const char **)arg2;
369 /* Compare alphabetically, but put "CS" names at the end. */
370 int sign = strcmp(name1,name2);
372 sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
373 * 4 + (sign >= 0 ? 1 : -1);
378 void iconvlist (int (*do_one) (unsigned int namescount,
379 const char * const * names,
383 #define aliascount1 sizeof(aliases)/sizeof(aliases[0])
384 #ifndef aliases2_lookup
385 #define aliascount2 sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
387 #define aliascount2 0
389 #define aliascount (aliascount1+aliascount2)
390 struct nalias aliasbuf[aliascount];
391 const char * namesbuf[aliascount];
394 /* Put all existing aliases into a buffer. */
398 for (i = 0; i < aliascount1; i++) {
399 const struct alias * p = &aliases[i];
401 && p->encoding_index != ei_local_char
402 && p->encoding_index != ei_local_wchar_t) {
403 aliasbuf[j].name = stringpool + p->name;
404 aliasbuf[j].encoding_index = p->encoding_index;
408 #ifndef aliases2_lookup
409 for (i = 0; i < aliascount2; i++) {
410 aliasbuf[j].name = stringpool2 + sysdep_aliases[i].name;
411 aliasbuf[j].encoding_index = sysdep_aliases[i].encoding_index;
417 /* Sort by encoding_index. */
419 qsort(aliasbuf, num_aliases, sizeof(struct nalias), compare_by_index);
421 /* Process all aliases with the same encoding_index together. */
424 while (j < num_aliases) {
425 unsigned int ei = aliasbuf[j].encoding_index;
428 namesbuf[i++] = aliasbuf[j++].name;
429 while (j < num_aliases && aliasbuf[j].encoding_index == ei);
431 qsort(namesbuf, i, sizeof(const char *), compare_by_name);
432 /* Call the callback. */
433 if (do_one(i,namesbuf,data))
443 * Table of canonical names of encodings.
444 * Instead of strings, it contains offsets into stringpool and stringpool2.
446 static const unsigned short all_canonical[] = {
448 # include "canonical_sysaix.h"
449 #elif defined hpux || defined __hpux
450 # include "canonical_syshpux.h"
451 #elif defined __osf__
452 # include "canonical_sysosf1.h"
454 # include "canonical_syssolaris.h"
456 # include "canonical.h"
460 # include "canonical_aix_sysaix.h"
462 # include "canonical_aix.h"
467 # include "canonical_osf1_sysosf1.h"
469 # include "canonical_osf1.h"
473 # include "canonical_dos.h"
476 # include "canonical_extra.h"
479 # include "canonical_local_sysaix.h"
480 #elif defined hpux || defined __hpux
481 # include "canonical_local_syshpux.h"
482 #elif defined __osf__
483 # include "canonical_local_sysosf1.h"
485 # include "canonical_local_syssolaris.h"
487 # include "canonical_local.h"
491 const char * iconv_canonicalize (const char * name)
494 char buf[MAX_WORD_LENGTH+10+1];
497 const struct alias * ap;
502 /* Before calling aliases_lookup, convert the input string to upper case,
503 * and check whether it's entirely ASCII (we call gperf with option "-7"
504 * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
505 * or if it's too long, it is not a valid encoding name.
507 for (code = name;;) {
508 /* Search code in the table. */
509 for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
510 unsigned char c = * (unsigned char *) cp;
513 if (c >= 'a' && c <= 'z')
522 if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
527 if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
534 if (buf[0] == '\0') {
535 code = locale_charset();
536 /* Avoid an endless loop that could occur when using an older version
537 of localcharset.c. */
543 ap = aliases_lookup(buf,bp-buf);
546 ap = aliases2_lookup(buf);
550 if (ap->encoding_index == ei_local_char) {
551 code = locale_charset();
552 /* Avoid an endless loop that could occur when using an older version
553 of localcharset.c. */
558 if (ap->encoding_index == ei_local_wchar_t) {
559 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
560 This is also the case on native Woe32 systems and Cygwin >= 1.7, where
561 we know that it is UTF-16. */
562 #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
563 if (sizeof(wchar_t) == 4) {
564 index = ei_ucs4internal;
567 if (sizeof(wchar_t) == 2) {
568 # if WORDS_LITTLEENDIAN
575 #elif __STDC_ISO_10646__
576 if (sizeof(wchar_t) == 4) {
577 index = ei_ucs4internal;
580 if (sizeof(wchar_t) == 2) {
581 index = ei_ucs2internal;
584 if (sizeof(wchar_t) == 1) {
585 index = ei_iso8859_1;
590 index = ap->encoding_index;
593 return all_canonical[index] + pool;
598 int _libiconv_version = _LIBICONV_VERSION;
600 #if defined __FreeBSD__ && !defined __gnu_freebsd__
601 /* GNU libiconv is the native FreeBSD iconv implementation since 2002.
602 It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'. */
603 #define strong_alias(name, aliasname) _strong_alias(name, aliasname)
604 #define _strong_alias(name, aliasname) \
605 extern __typeof (name) aliasname __attribute__ ((alias (#name)));
609 strong_alias (libiconv_open, iconv_open)
610 strong_alias (libiconv, iconv)
611 strong_alias (libiconv_close, iconv_close)