1 /* Determine the current selected locale.
2 Copyright (C) 1995-1999, 2000-2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU Library General Public License as published
6 by the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
19 /* Written by Ulrich Drepper <drepper@gnu.org>, 1995. */
20 /* Win32 code written by Tor Lillqvist <tml@iki.fi>. */
21 /* MacOS X code written by Bruno Haible <bruno@clisp.org>. */
30 #if HAVE_CFLOCALECOPYCURRENT || HAVE_CFPREFERENCESCOPYAPPVALUE
32 # include <CoreFoundation/CFString.h>
33 # if HAVE_CFLOCALECOPYCURRENT
34 # include <CoreFoundation/CFLocale.h>
35 # elif HAVE_CFPREFERENCESCOPYAPPVALUE
36 # include <CoreFoundation/CFPreferences.h>
40 #if defined _WIN32 || defined __WIN32__
45 # define WIN32_LEAN_AND_MEAN
47 /* List of language codes, sorted by value:
70 0x17 LANG_RHAETO_ROMANCE
73 0x1a LANG_CROATIAN == LANG_SERBIAN
165 /* Mingw headers don't have latest language and sublanguage codes. */
166 # ifndef LANG_AFRIKAANS
167 # define LANG_AFRIKAANS 0x36
169 # ifndef LANG_ALBANIAN
170 # define LANG_ALBANIAN 0x1c
172 # ifndef LANG_AMHARIC
173 # define LANG_AMHARIC 0x5e
176 # define LANG_ARABIC 0x01
178 # ifndef LANG_ARMENIAN
179 # define LANG_ARMENIAN 0x2b
181 # ifndef LANG_ASSAMESE
182 # define LANG_ASSAMESE 0x4d
185 # define LANG_AZERI 0x2c
188 # define LANG_BASQUE 0x2d
190 # ifndef LANG_BELARUSIAN
191 # define LANG_BELARUSIAN 0x23
193 # ifndef LANG_BENGALI
194 # define LANG_BENGALI 0x45
196 # ifndef LANG_BURMESE
197 # define LANG_BURMESE 0x55
199 # ifndef LANG_CAMBODIAN
200 # define LANG_CAMBODIAN 0x53
202 # ifndef LANG_CATALAN
203 # define LANG_CATALAN 0x03
205 # ifndef LANG_CHEROKEE
206 # define LANG_CHEROKEE 0x5c
209 # define LANG_DIVEHI 0x65
212 # define LANG_EDO 0x66
214 # ifndef LANG_ESTONIAN
215 # define LANG_ESTONIAN 0x25
217 # ifndef LANG_FAEROESE
218 # define LANG_FAEROESE 0x38
221 # define LANG_FARSI 0x29
223 # ifndef LANG_FRISIAN
224 # define LANG_FRISIAN 0x62
226 # ifndef LANG_FULFULDE
227 # define LANG_FULFULDE 0x67
230 # define LANG_GAELIC 0x3c
232 # ifndef LANG_GALICIAN
233 # define LANG_GALICIAN 0x56
235 # ifndef LANG_GEORGIAN
236 # define LANG_GEORGIAN 0x37
238 # ifndef LANG_GUARANI
239 # define LANG_GUARANI 0x74
241 # ifndef LANG_GUJARATI
242 # define LANG_GUJARATI 0x47
245 # define LANG_HAUSA 0x68
247 # ifndef LANG_HAWAIIAN
248 # define LANG_HAWAIIAN 0x75
251 # define LANG_HEBREW 0x0d
254 # define LANG_HINDI 0x39
257 # define LANG_IBIBIO 0x69
260 # define LANG_IGBO 0x70
262 # ifndef LANG_INDONESIAN
263 # define LANG_INDONESIAN 0x21
265 # ifndef LANG_INUKTITUT
266 # define LANG_INUKTITUT 0x5d
268 # ifndef LANG_KANNADA
269 # define LANG_KANNADA 0x4b
272 # define LANG_KANURI 0x71
274 # ifndef LANG_KASHMIRI
275 # define LANG_KASHMIRI 0x60
278 # define LANG_KAZAK 0x3f
280 # ifndef LANG_KONKANI
281 # define LANG_KONKANI 0x57
284 # define LANG_KYRGYZ 0x40
287 # define LANG_LAO 0x54
290 # define LANG_LATIN 0x76
292 # ifndef LANG_LATVIAN
293 # define LANG_LATVIAN 0x26
295 # ifndef LANG_LITHUANIAN
296 # define LANG_LITHUANIAN 0x27
298 # ifndef LANG_MACEDONIAN
299 # define LANG_MACEDONIAN 0x2f
302 # define LANG_MALAY 0x3e
304 # ifndef LANG_MALAYALAM
305 # define LANG_MALAYALAM 0x4c
307 # ifndef LANG_MALTESE
308 # define LANG_MALTESE 0x3a
310 # ifndef LANG_MANIPURI
311 # define LANG_MANIPURI 0x58
313 # ifndef LANG_MARATHI
314 # define LANG_MARATHI 0x4e
316 # ifndef LANG_MONGOLIAN
317 # define LANG_MONGOLIAN 0x50
320 # define LANG_NEPALI 0x61
323 # define LANG_ORIYA 0x48
326 # define LANG_OROMO 0x72
328 # ifndef LANG_PAPIAMENTU
329 # define LANG_PAPIAMENTU 0x79
332 # define LANG_PASHTO 0x63
334 # ifndef LANG_PUNJABI
335 # define LANG_PUNJABI 0x46
337 # ifndef LANG_RHAETO_ROMANCE
338 # define LANG_RHAETO_ROMANCE 0x17
341 # define LANG_SAAMI 0x3b
343 # ifndef LANG_SANSKRIT
344 # define LANG_SANSKRIT 0x4f
346 # ifndef LANG_SERBIAN
347 # define LANG_SERBIAN 0x1a
350 # define LANG_SINDHI 0x59
352 # ifndef LANG_SINHALESE
353 # define LANG_SINHALESE 0x5b
356 # define LANG_SLOVAK 0x1b
359 # define LANG_SOMALI 0x77
361 # ifndef LANG_SORBIAN
362 # define LANG_SORBIAN 0x2e
365 # define LANG_SUTU 0x30
367 # ifndef LANG_SWAHILI
368 # define LANG_SWAHILI 0x41
371 # define LANG_SYRIAC 0x5a
373 # ifndef LANG_TAGALOG
374 # define LANG_TAGALOG 0x64
377 # define LANG_TAJIK 0x28
379 # ifndef LANG_TAMAZIGHT
380 # define LANG_TAMAZIGHT 0x5f
383 # define LANG_TAMIL 0x49
386 # define LANG_TATAR 0x44
389 # define LANG_TELUGU 0x4a
392 # define LANG_THAI 0x1e
394 # ifndef LANG_TIBETAN
395 # define LANG_TIBETAN 0x51
397 # ifndef LANG_TIGRINYA
398 # define LANG_TIGRINYA 0x73
401 # define LANG_TSONGA 0x31
404 # define LANG_TSWANA 0x32
406 # ifndef LANG_TURKMEN
407 # define LANG_TURKMEN 0x42
409 # ifndef LANG_UKRAINIAN
410 # define LANG_UKRAINIAN 0x22
413 # define LANG_URDU 0x20
416 # define LANG_UZBEK 0x43
419 # define LANG_VENDA 0x33
421 # ifndef LANG_VIETNAMESE
422 # define LANG_VIETNAMESE 0x2a
425 # define LANG_WELSH 0x52
428 # define LANG_XHOSA 0x34
431 # define LANG_YI 0x78
433 # ifndef LANG_YIDDISH
434 # define LANG_YIDDISH 0x3d
437 # define LANG_YORUBA 0x6a
440 # define LANG_ZULU 0x35
442 # ifndef SUBLANG_ARABIC_SAUDI_ARABIA
443 # define SUBLANG_ARABIC_SAUDI_ARABIA 0x01
445 # ifndef SUBLANG_ARABIC_IRAQ
446 # define SUBLANG_ARABIC_IRAQ 0x02
448 # ifndef SUBLANG_ARABIC_EGYPT
449 # define SUBLANG_ARABIC_EGYPT 0x03
451 # ifndef SUBLANG_ARABIC_LIBYA
452 # define SUBLANG_ARABIC_LIBYA 0x04
454 # ifndef SUBLANG_ARABIC_ALGERIA
455 # define SUBLANG_ARABIC_ALGERIA 0x05
457 # ifndef SUBLANG_ARABIC_MOROCCO
458 # define SUBLANG_ARABIC_MOROCCO 0x06
460 # ifndef SUBLANG_ARABIC_TUNISIA
461 # define SUBLANG_ARABIC_TUNISIA 0x07
463 # ifndef SUBLANG_ARABIC_OMAN
464 # define SUBLANG_ARABIC_OMAN 0x08
466 # ifndef SUBLANG_ARABIC_YEMEN
467 # define SUBLANG_ARABIC_YEMEN 0x09
469 # ifndef SUBLANG_ARABIC_SYRIA
470 # define SUBLANG_ARABIC_SYRIA 0x0a
472 # ifndef SUBLANG_ARABIC_JORDAN
473 # define SUBLANG_ARABIC_JORDAN 0x0b
475 # ifndef SUBLANG_ARABIC_LEBANON
476 # define SUBLANG_ARABIC_LEBANON 0x0c
478 # ifndef SUBLANG_ARABIC_KUWAIT
479 # define SUBLANG_ARABIC_KUWAIT 0x0d
481 # ifndef SUBLANG_ARABIC_UAE
482 # define SUBLANG_ARABIC_UAE 0x0e
484 # ifndef SUBLANG_ARABIC_BAHRAIN
485 # define SUBLANG_ARABIC_BAHRAIN 0x0f
487 # ifndef SUBLANG_ARABIC_QATAR
488 # define SUBLANG_ARABIC_QATAR 0x10
490 # ifndef SUBLANG_AZERI_LATIN
491 # define SUBLANG_AZERI_LATIN 0x01
493 # ifndef SUBLANG_AZERI_CYRILLIC
494 # define SUBLANG_AZERI_CYRILLIC 0x02
496 # ifndef SUBLANG_BENGALI_INDIA
497 # define SUBLANG_BENGALI_INDIA 0x00
499 # ifndef SUBLANG_BENGALI_BANGLADESH
500 # define SUBLANG_BENGALI_BANGLADESH 0x01
502 # ifndef SUBLANG_CHINESE_MACAU
503 # define SUBLANG_CHINESE_MACAU 0x05
505 # ifndef SUBLANG_ENGLISH_SOUTH_AFRICA
506 # define SUBLANG_ENGLISH_SOUTH_AFRICA 0x07
508 # ifndef SUBLANG_ENGLISH_JAMAICA
509 # define SUBLANG_ENGLISH_JAMAICA 0x08
511 # ifndef SUBLANG_ENGLISH_CARIBBEAN
512 # define SUBLANG_ENGLISH_CARIBBEAN 0x09
514 # ifndef SUBLANG_ENGLISH_BELIZE
515 # define SUBLANG_ENGLISH_BELIZE 0x0a
517 # ifndef SUBLANG_ENGLISH_TRINIDAD
518 # define SUBLANG_ENGLISH_TRINIDAD 0x0b
520 # ifndef SUBLANG_ENGLISH_ZIMBABWE
521 # define SUBLANG_ENGLISH_ZIMBABWE 0x0c
523 # ifndef SUBLANG_ENGLISH_PHILIPPINES
524 # define SUBLANG_ENGLISH_PHILIPPINES 0x0d
526 # ifndef SUBLANG_ENGLISH_INDONESIA
527 # define SUBLANG_ENGLISH_INDONESIA 0x0e
529 # ifndef SUBLANG_ENGLISH_HONGKONG
530 # define SUBLANG_ENGLISH_HONGKONG 0x0f
532 # ifndef SUBLANG_ENGLISH_INDIA
533 # define SUBLANG_ENGLISH_INDIA 0x10
535 # ifndef SUBLANG_ENGLISH_MALAYSIA
536 # define SUBLANG_ENGLISH_MALAYSIA 0x11
538 # ifndef SUBLANG_ENGLISH_SINGAPORE
539 # define SUBLANG_ENGLISH_SINGAPORE 0x12
541 # ifndef SUBLANG_FRENCH_LUXEMBOURG
542 # define SUBLANG_FRENCH_LUXEMBOURG 0x05
544 # ifndef SUBLANG_FRENCH_MONACO
545 # define SUBLANG_FRENCH_MONACO 0x06
547 # ifndef SUBLANG_FRENCH_WESTINDIES
548 # define SUBLANG_FRENCH_WESTINDIES 0x07
550 # ifndef SUBLANG_FRENCH_REUNION
551 # define SUBLANG_FRENCH_REUNION 0x08
553 # ifndef SUBLANG_FRENCH_CONGO
554 # define SUBLANG_FRENCH_CONGO 0x09
556 # ifndef SUBLANG_FRENCH_SENEGAL
557 # define SUBLANG_FRENCH_SENEGAL 0x0a
559 # ifndef SUBLANG_FRENCH_CAMEROON
560 # define SUBLANG_FRENCH_CAMEROON 0x0b
562 # ifndef SUBLANG_FRENCH_COTEDIVOIRE
563 # define SUBLANG_FRENCH_COTEDIVOIRE 0x0c
565 # ifndef SUBLANG_FRENCH_MALI
566 # define SUBLANG_FRENCH_MALI 0x0d
568 # ifndef SUBLANG_FRENCH_MOROCCO
569 # define SUBLANG_FRENCH_MOROCCO 0x0e
571 # ifndef SUBLANG_FRENCH_HAITI
572 # define SUBLANG_FRENCH_HAITI 0x0f
574 # ifndef SUBLANG_GERMAN_LUXEMBOURG
575 # define SUBLANG_GERMAN_LUXEMBOURG 0x04
577 # ifndef SUBLANG_GERMAN_LIECHTENSTEIN
578 # define SUBLANG_GERMAN_LIECHTENSTEIN 0x05
580 # ifndef SUBLANG_KASHMIRI_INDIA
581 # define SUBLANG_KASHMIRI_INDIA 0x02
583 # ifndef SUBLANG_MALAY_MALAYSIA
584 # define SUBLANG_MALAY_MALAYSIA 0x01
586 # ifndef SUBLANG_MALAY_BRUNEI_DARUSSALAM
587 # define SUBLANG_MALAY_BRUNEI_DARUSSALAM 0x02
589 # ifndef SUBLANG_NEPALI_INDIA
590 # define SUBLANG_NEPALI_INDIA 0x02
592 # ifndef SUBLANG_PUNJABI_INDIA
593 # define SUBLANG_PUNJABI_INDIA 0x00
595 # ifndef SUBLANG_PUNJABI_PAKISTAN
596 # define SUBLANG_PUNJABI_PAKISTAN 0x01
598 # ifndef SUBLANG_ROMANIAN_ROMANIA
599 # define SUBLANG_ROMANIAN_ROMANIA 0x00
601 # ifndef SUBLANG_ROMANIAN_MOLDOVA
602 # define SUBLANG_ROMANIAN_MOLDOVA 0x01
604 # ifndef SUBLANG_SERBIAN_LATIN
605 # define SUBLANG_SERBIAN_LATIN 0x02
607 # ifndef SUBLANG_SERBIAN_CYRILLIC
608 # define SUBLANG_SERBIAN_CYRILLIC 0x03
610 # ifndef SUBLANG_SINDHI_INDIA
611 # define SUBLANG_SINDHI_INDIA 0x00
613 # ifndef SUBLANG_SINDHI_PAKISTAN
614 # define SUBLANG_SINDHI_PAKISTAN 0x01
616 # ifndef SUBLANG_SPANISH_GUATEMALA
617 # define SUBLANG_SPANISH_GUATEMALA 0x04
619 # ifndef SUBLANG_SPANISH_COSTA_RICA
620 # define SUBLANG_SPANISH_COSTA_RICA 0x05
622 # ifndef SUBLANG_SPANISH_PANAMA
623 # define SUBLANG_SPANISH_PANAMA 0x06
625 # ifndef SUBLANG_SPANISH_DOMINICAN_REPUBLIC
626 # define SUBLANG_SPANISH_DOMINICAN_REPUBLIC 0x07
628 # ifndef SUBLANG_SPANISH_VENEZUELA
629 # define SUBLANG_SPANISH_VENEZUELA 0x08
631 # ifndef SUBLANG_SPANISH_COLOMBIA
632 # define SUBLANG_SPANISH_COLOMBIA 0x09
634 # ifndef SUBLANG_SPANISH_PERU
635 # define SUBLANG_SPANISH_PERU 0x0a
637 # ifndef SUBLANG_SPANISH_ARGENTINA
638 # define SUBLANG_SPANISH_ARGENTINA 0x0b
640 # ifndef SUBLANG_SPANISH_ECUADOR
641 # define SUBLANG_SPANISH_ECUADOR 0x0c
643 # ifndef SUBLANG_SPANISH_CHILE
644 # define SUBLANG_SPANISH_CHILE 0x0d
646 # ifndef SUBLANG_SPANISH_URUGUAY
647 # define SUBLANG_SPANISH_URUGUAY 0x0e
649 # ifndef SUBLANG_SPANISH_PARAGUAY
650 # define SUBLANG_SPANISH_PARAGUAY 0x0f
652 # ifndef SUBLANG_SPANISH_BOLIVIA
653 # define SUBLANG_SPANISH_BOLIVIA 0x10
655 # ifndef SUBLANG_SPANISH_EL_SALVADOR
656 # define SUBLANG_SPANISH_EL_SALVADOR 0x11
658 # ifndef SUBLANG_SPANISH_HONDURAS
659 # define SUBLANG_SPANISH_HONDURAS 0x12
661 # ifndef SUBLANG_SPANISH_NICARAGUA
662 # define SUBLANG_SPANISH_NICARAGUA 0x13
664 # ifndef SUBLANG_SPANISH_PUERTO_RICO
665 # define SUBLANG_SPANISH_PUERTO_RICO 0x14
667 # ifndef SUBLANG_SWEDISH_FINLAND
668 # define SUBLANG_SWEDISH_FINLAND 0x02
670 # ifndef SUBLANG_TAMAZIGHT_ARABIC
671 # define SUBLANG_TAMAZIGHT_ARABIC 0x01
673 # ifndef SUBLANG_TAMAZIGHT_LATIN
674 # define SUBLANG_TAMAZIGHT_LATIN 0x02
676 # ifndef SUBLANG_TIGRINYA_ETHIOPIA
677 # define SUBLANG_TIGRINYA_ETHIOPIA 0x00
679 # ifndef SUBLANG_TIGRINYA_ERITREA
680 # define SUBLANG_TIGRINYA_ERITREA 0x01
682 # ifndef SUBLANG_URDU_PAKISTAN
683 # define SUBLANG_URDU_PAKISTAN 0x01
685 # ifndef SUBLANG_URDU_INDIA
686 # define SUBLANG_URDU_INDIA 0x02
688 # ifndef SUBLANG_UZBEK_LATIN
689 # define SUBLANG_UZBEK_LATIN 0x01
691 # ifndef SUBLANG_UZBEK_CYRILLIC
692 # define SUBLANG_UZBEK_CYRILLIC 0x02
696 # if HAVE_CFLOCALECOPYCURRENT || HAVE_CFPREFERENCESCOPYAPPVALUE
697 /* MacOS X 10.2 or newer */
699 /* Canonicalize a MacOS X locale name to a Unix locale name.
700 NAME is a sufficiently large buffer.
701 On input, it contains the MacOS X locale name.
702 On output, it contains the Unix locale name. */
704 _nl_locale_name_canonicalize (char *name)
706 /* This conversion is based on a posting by
707 Deborah GoldSmith <goldsmit@apple.com> on 2005-03-08,
708 http://lists.apple.com/archives/carbon-dev/2005/Mar/msg00293.html */
710 /* Convert legacy (NeXTstep inherited) English names to Unix (ISO 639 and
711 ISO 3166) names. Prior to MacOS X 10.3, there is no API for doing this.
712 Therefore we do it ourselves, using a table based on the results of the
713 MacOS X 10.3.8 function
714 CFLocaleCreateCanonicalLocaleIdentifierFromString(). */
715 typedef struct { const char legacy[21+1]; const char unixy[5+1]; }
717 static const legacy_entry legacy_table[] = {
718 { "Afrikaans", "af" },
719 { "Albanian", "sq" },
722 { "Armenian", "hy" },
723 { "Assamese", "as" },
725 { "Azerbaijani", "az" },
727 { "Belarusian", "be" },
728 { "Belorussian", "be" },
730 { "Brazilian Portugese", "pt_BR" },
731 { "Brazilian Portuguese", "pt_BR" },
733 { "Bulgarian", "bg" },
735 { "Byelorussian", "be" },
738 { "Chichewa", "ny" },
740 { "Chinese, Simplified", "zh_CN" },
741 { "Chinese, Traditional", "zh_TW" },
742 { "Chinese, Tradtional", "zh_TW" },
743 { "Croatian", "hr" },
747 { "Dzongkha", "dz" },
749 { "Esperanto", "eo" },
750 { "Estonian", "et" },
754 { "Flemish", "nl_BE" },
756 { "Galician", "gl" },
757 { "Gallegan", "gl" },
758 { "Georgian", "ka" },
761 { "Greenlandic", "kl" },
763 { "Gujarati", "gu" },
764 { "Hawaiian", "haw" }, /* Yes, "haw", not "cpe". */
767 { "Hungarian", "hu" },
768 { "Icelandic", "is" },
769 { "Indonesian", "id" },
770 { "Inuktitut", "iu" },
773 { "Japanese", "ja" },
774 { "Javanese", "jv" },
775 { "Kalaallisut", "kl" },
777 { "Kashmiri", "ks" },
780 { "Kinyarwanda", "rw" },
786 { "Lithuanian", "lt" },
787 { "Macedonian", "mk" },
788 { "Malagasy", "mg" },
790 { "Malayalam", "ml" },
794 { "Moldavian", "mo" },
795 { "Mongolian", "mn" },
797 { "Norwegian", "nb" }, /* Yes, "nb", not the obsolete "no". */
806 { "Portuguese", "pt" },
807 { "Portuguese, Brazilian", "pt_BR" },
811 { "Romanian", "ro" },
815 { "Sami", "se_NO" }, /* Not just "se". */
816 { "Sanskrit", "sa" },
817 { "Scottish", "gd" },
819 { "Simplified Chinese", "zh_CN" },
821 { "Sinhalese", "si" },
823 { "Slovenian", "sl" },
826 { "Sundanese", "su" },
837 { "Tigrinya", "ti" },
839 { "Traditional Chinese", "zh_TW" },
843 { "Ukrainian", "uk" },
846 { "Vietnamese", "vi" },
851 /* Convert new-style locale names with language tags (ISO 639 and ISO 15924)
852 to Unix (ISO 639 and ISO 3166) names. */
853 typedef struct { const char langtag[7+1]; const char unixy[12+1]; }
855 static const langtag_entry langtag_table[] = {
856 /* MacOS X has "az-Arab", "az-Cyrl", "az-Latn".
857 The default script for az on Unix is Latin. */
859 /* MacOS X has "ga-dots". Does not yet exist on Unix. */
861 /* MacOS X has "kk-Cyrl". Does not yet exist on Unix. */
862 /* MacOS X has "mn-Cyrl", "mn-Mong".
863 The default script for mn on Unix is Cyrillic. */
865 /* MacOS X has "ms-Arab", "ms-Latn".
866 The default script for ms on Unix is Latin. */
868 /* MacOS X has "tg-Cyrl".
869 The default script for tg on Unix is Cyrillic. */
871 /* MacOS X has "tk-Cyrl". Does not yet exist on Unix. */
872 /* MacOS X has "tt-Cyrl".
873 The default script for tt on Unix is Cyrillic. */
875 /* MacOS X has "zh-Hans", "zh-Hant".
876 Country codes are used to distinguish these on Unix. */
877 { "zh-Hans", "zh_CN" },
878 { "zh-Hant", "zh_TW" }
881 /* Convert script names (ISO 15924) to Unix conventions.
882 See http://www.unicode.org/iso15924/iso15924-codes.html */
883 typedef struct { const char script[4+1]; const char unixy[9+1]; }
885 static const script_entry script_table[] = {
886 { "Arab", "arabic" },
887 { "Cyrl", "cyrillic" },
888 { "Mong", "mongolian" }
891 /* Step 1: Convert using legacy_table. */
892 if (name[0] >= 'A' && name[0] <= 'Z')
896 i2 = sizeof (legacy_table) / sizeof (legacy_entry);
899 /* At this point we know that if name occurs in legacy_table,
900 its index must be >= i1 and < i2. */
901 unsigned int i = (i1 + i2) >> 1;
902 const legacy_entry *p = &legacy_table[i];
903 if (strcmp (name, p->legacy) < 0)
908 if (strcmp (name, legacy_table[i1].legacy) == 0)
910 strcpy (name, legacy_table[i1].unixy);
915 /* Step 2: Convert using langtag_table and script_table. */
916 if (strlen (name) == 7 && name[2] == '-')
920 i2 = sizeof (langtag_table) / sizeof (langtag_entry);
923 /* At this point we know that if name occurs in langtag_table,
924 its index must be >= i1 and < i2. */
925 unsigned int i = (i1 + i2) >> 1;
926 const langtag_entry *p = &langtag_table[i];
927 if (strcmp (name, p->langtag) < 0)
932 if (strcmp (name, langtag_table[i1].langtag) == 0)
934 strcpy (name, langtag_table[i1].unixy);
939 i2 = sizeof (script_table) / sizeof (script_entry);
942 /* At this point we know that if (name + 3) occurs in script_table,
943 its index must be >= i1 and < i2. */
944 unsigned int i = (i1 + i2) >> 1;
945 const script_entry *p = &script_table[i];
946 if (strcmp (name + 3, p->script) < 0)
951 if (strcmp (name + 3, script_table[i1].script) == 0)
954 strcpy (name + 3, script_table[i1].unixy);
959 /* Step 3: Convert new-style dash to Unix underscore. */
962 for (p = name; *p != '\0'; p++)
970 /* XPG3 defines the result of 'setlocale (category, NULL)' as:
971 "Directs 'setlocale()' to query 'category' and return the current
973 However it does not specify the exact format. Neither do SUSV2 and
974 ISO C 99. So we can use this feature only on selected systems (e.g.
975 those using GNU C Library). */
976 #if defined _LIBC || (defined __GLIBC__ && __GLIBC__ >= 2)
977 # define HAVE_LOCALE_NULL
980 /* Determine the current locale's name, and canonicalize it into XPG syntax
981 language[_territory][.codeset][@modifier]
982 The codeset part in the result is not reliable; the locale_charset()
983 should be used for codeset information instead.
984 The result must not be freed; it is statically allocated. */
987 _nl_locale_name_posix (int category, const char *categoryname)
989 /* Use the POSIX methods of looking to 'LC_ALL', 'LC_xxx', and 'LANG'.
990 On some systems this can be done by the 'setlocale' function itself. */
991 #if defined HAVE_SETLOCALE && defined HAVE_LC_MESSAGES && defined HAVE_LOCALE_NULL
992 return setlocale (category, NULL);
996 /* Setting of LC_ALL overrides all other. */
997 retval = getenv ("LC_ALL");
998 if (retval != NULL && retval[0] != '\0')
1000 /* Next comes the name of the desired category. */
1001 retval = getenv (categoryname);
1002 if (retval != NULL && retval[0] != '\0')
1004 /* Last possibility is the LANG environment variable. */
1005 retval = getenv ("LANG");
1006 if (retval != NULL && retval[0] != '\0')
1014 _nl_locale_name_default (void)
1017 "All implementations shall define a locale as the default locale, to be
1018 invoked when no environment variables are set, or set to the empty
1019 string. This default locale can be the POSIX locale or any other
1020 implementation-defined locale. Some implementations may provide
1021 facilities for local installation administrators to set the default
1022 locale, customizing it for each location. POSIX:2001 does not require
1025 #if !(HAVE_CFLOCALECOPYCURRENT || HAVE_CFPREFERENCESCOPYAPPVALUE || defined(WIN32_NATIVE))
1027 /* The system does not have a way of setting the locale, other than the
1028 POSIX specified environment variables. We use C as default locale. */
1033 /* Return an XPG style locale name language[_territory][@modifier].
1034 Don't even bother determining the codeset; it's not useful in this
1035 context, because message catalogs are not specific to a single
1038 # if HAVE_CFLOCALECOPYCURRENT || HAVE_CFPREFERENCESCOPYAPPVALUE
1039 /* MacOS X 10.2 or newer */
1041 /* Cache the locale name, since CoreFoundation calls are expensive. */
1042 static const char *cached_localename;
1044 if (cached_localename == NULL)
1047 # if HAVE_CFLOCALECOPYCURRENT /* MacOS X 10.3 or newer */
1048 CFLocaleRef locale = CFLocaleCopyCurrent ();
1049 CFStringRef name = CFLocaleGetIdentifier (locale);
1051 if (CFStringGetCString (name, namebuf, sizeof(namebuf),
1052 kCFStringEncodingASCII))
1054 _nl_locale_name_canonicalize (namebuf);
1055 cached_localename = strdup (namebuf);
1058 # elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.2 or newer */
1060 CFPreferencesCopyAppValue (CFSTR ("AppleLocale"),
1061 kCFPreferencesCurrentApplication);
1063 && CFGetTypeID (value) == CFStringGetTypeID ()
1064 && CFStringGetCString ((CFStringRef)value, namebuf, sizeof(namebuf),
1065 kCFStringEncodingASCII))
1067 _nl_locale_name_canonicalize (namebuf);
1068 cached_localename = strdup (namebuf);
1071 if (cached_localename == NULL)
1072 cached_localename = "C";
1074 return cached_localename;
1079 # if defined(WIN32_NATIVE) /* WIN32, not Cygwin */
1085 /* Use native Win32 API locale ID. */
1086 lcid = GetThreadLocale ();
1088 /* Strip off the sorting rules, keep only the language part. */
1089 langid = LANGIDFROMLCID (lcid);
1091 /* Split into language and territory part. */
1092 primary = PRIMARYLANGID (langid);
1093 sub = SUBLANGID (langid);
1095 /* Dispatch on language.
1096 See also http://www.unicode.org/unicode/onlinedat/languages.html .
1097 For details about languages, see http://www.ethnologue.com/ . */
1100 case LANG_AFRIKAANS: return "af_ZA";
1101 case LANG_ALBANIAN: return "sq_AL";
1102 case LANG_AMHARIC: return "am_ET";
1106 case SUBLANG_ARABIC_SAUDI_ARABIA: return "ar_SA";
1107 case SUBLANG_ARABIC_IRAQ: return "ar_IQ";
1108 case SUBLANG_ARABIC_EGYPT: return "ar_EG";
1109 case SUBLANG_ARABIC_LIBYA: return "ar_LY";
1110 case SUBLANG_ARABIC_ALGERIA: return "ar_DZ";
1111 case SUBLANG_ARABIC_MOROCCO: return "ar_MA";
1112 case SUBLANG_ARABIC_TUNISIA: return "ar_TN";
1113 case SUBLANG_ARABIC_OMAN: return "ar_OM";
1114 case SUBLANG_ARABIC_YEMEN: return "ar_YE";
1115 case SUBLANG_ARABIC_SYRIA: return "ar_SY";
1116 case SUBLANG_ARABIC_JORDAN: return "ar_JO";
1117 case SUBLANG_ARABIC_LEBANON: return "ar_LB";
1118 case SUBLANG_ARABIC_KUWAIT: return "ar_KW";
1119 case SUBLANG_ARABIC_UAE: return "ar_AE";
1120 case SUBLANG_ARABIC_BAHRAIN: return "ar_BH";
1121 case SUBLANG_ARABIC_QATAR: return "ar_QA";
1124 case LANG_ARMENIAN: return "hy_AM";
1125 case LANG_ASSAMESE: return "as_IN";
1129 /* FIXME: Adjust this when Azerbaijani locales appear on Unix. */
1130 case SUBLANG_AZERI_LATIN: return "az_AZ@latin";
1131 case SUBLANG_AZERI_CYRILLIC: return "az_AZ@cyrillic";
1137 case SUBLANG_DEFAULT: return "eu_ES";
1139 return "eu"; /* Ambiguous: could be "eu_ES" or "eu_FR". */
1140 case LANG_BELARUSIAN: return "be_BY";
1144 case SUBLANG_BENGALI_INDIA: return "bn_IN";
1145 case SUBLANG_BENGALI_BANGLADESH: return "bn_BD";
1148 case LANG_BULGARIAN: return "bg_BG";
1149 case LANG_BURMESE: return "my_MM";
1150 case LANG_CAMBODIAN: return "km_KH";
1151 case LANG_CATALAN: return "ca_ES";
1152 case LANG_CHEROKEE: return "chr_US";
1156 case SUBLANG_CHINESE_TRADITIONAL: return "zh_TW";
1157 case SUBLANG_CHINESE_SIMPLIFIED: return "zh_CN";
1158 case SUBLANG_CHINESE_HONGKONG: return "zh_HK";
1159 case SUBLANG_CHINESE_SINGAPORE: return "zh_SG";
1160 case SUBLANG_CHINESE_MACAU: return "zh_MO";
1163 case LANG_CROATIAN: /* LANG_CROATIAN == LANG_SERBIAN
1164 * What used to be called Serbo-Croatian
1165 * should really now be two separate
1166 * languages because of political reasons.
1167 * (Says tml, who knows nothing about Serbian
1169 * (I can feel those flames coming already.)
1173 case SUBLANG_DEFAULT: return "hr_HR";
1174 case SUBLANG_SERBIAN_LATIN: return "sr_CS";
1175 case SUBLANG_SERBIAN_CYRILLIC: return "sr_CS@cyrillic";
1178 case LANG_CZECH: return "cs_CZ";
1179 case LANG_DANISH: return "da_DK";
1180 case LANG_DIVEHI: return "dv_MV";
1184 case SUBLANG_DUTCH: return "nl_NL";
1185 case SUBLANG_DUTCH_BELGIAN: /* FLEMISH, VLAAMS */ return "nl_BE";
1188 case LANG_EDO: return "bin_NG";
1192 /* SUBLANG_ENGLISH_US == SUBLANG_DEFAULT. Heh. I thought
1193 * English was the language spoken in England.
1196 case SUBLANG_ENGLISH_US: return "en_US";
1197 case SUBLANG_ENGLISH_UK: return "en_GB";
1198 case SUBLANG_ENGLISH_AUS: return "en_AU";
1199 case SUBLANG_ENGLISH_CAN: return "en_CA";
1200 case SUBLANG_ENGLISH_NZ: return "en_NZ";
1201 case SUBLANG_ENGLISH_EIRE: return "en_IE";
1202 case SUBLANG_ENGLISH_SOUTH_AFRICA: return "en_ZA";
1203 case SUBLANG_ENGLISH_JAMAICA: return "en_JM";
1204 case SUBLANG_ENGLISH_CARIBBEAN: return "en_GD"; /* Grenada? */
1205 case SUBLANG_ENGLISH_BELIZE: return "en_BZ";
1206 case SUBLANG_ENGLISH_TRINIDAD: return "en_TT";
1207 case SUBLANG_ENGLISH_ZIMBABWE: return "en_ZW";
1208 case SUBLANG_ENGLISH_PHILIPPINES: return "en_PH";
1209 case SUBLANG_ENGLISH_INDONESIA: return "en_ID";
1210 case SUBLANG_ENGLISH_HONGKONG: return "en_HK";
1211 case SUBLANG_ENGLISH_INDIA: return "en_IN";
1212 case SUBLANG_ENGLISH_MALAYSIA: return "en_MY";
1213 case SUBLANG_ENGLISH_SINGAPORE: return "en_SG";
1216 case LANG_ESTONIAN: return "et_EE";
1217 case LANG_FAEROESE: return "fo_FO";
1218 case LANG_FARSI: return "fa_IR";
1219 case LANG_FINNISH: return "fi_FI";
1223 case SUBLANG_FRENCH: return "fr_FR";
1224 case SUBLANG_FRENCH_BELGIAN: /* WALLOON */ return "fr_BE";
1225 case SUBLANG_FRENCH_CANADIAN: return "fr_CA";
1226 case SUBLANG_FRENCH_SWISS: return "fr_CH";
1227 case SUBLANG_FRENCH_LUXEMBOURG: return "fr_LU";
1228 case SUBLANG_FRENCH_MONACO: return "fr_MC";
1229 case SUBLANG_FRENCH_WESTINDIES: return "fr"; /* Caribbean? */
1230 case SUBLANG_FRENCH_REUNION: return "fr_RE";
1231 case SUBLANG_FRENCH_CONGO: return "fr_CG";
1232 case SUBLANG_FRENCH_SENEGAL: return "fr_SN";
1233 case SUBLANG_FRENCH_CAMEROON: return "fr_CM";
1234 case SUBLANG_FRENCH_COTEDIVOIRE: return "fr_CI";
1235 case SUBLANG_FRENCH_MALI: return "fr_ML";
1236 case SUBLANG_FRENCH_MOROCCO: return "fr_MA";
1237 case SUBLANG_FRENCH_HAITI: return "fr_HT";
1240 case LANG_FRISIAN: return "fy_NL";
1242 /* Spoken in Nigeria, Guinea, Senegal, Mali, Niger, Cameroon, Benin. */
1247 case 0x01: /* SCOTTISH */ return "gd_GB";
1248 case 0x02: /* IRISH */ return "ga_IE";
1251 case LANG_GALICIAN: return "gl_ES";
1252 case LANG_GEORGIAN: return "ka_GE";
1256 case SUBLANG_GERMAN: return "de_DE";
1257 case SUBLANG_GERMAN_SWISS: return "de_CH";
1258 case SUBLANG_GERMAN_AUSTRIAN: return "de_AT";
1259 case SUBLANG_GERMAN_LUXEMBOURG: return "de_LU";
1260 case SUBLANG_GERMAN_LIECHTENSTEIN: return "de_LI";
1263 case LANG_GREEK: return "el_GR";
1264 case LANG_GUARANI: return "gn_PY";
1265 case LANG_GUJARATI: return "gu_IN";
1266 case LANG_HAUSA: return "ha_NG";
1268 /* FIXME: Do they mean Hawaiian ("haw_US", 1000 speakers)
1269 or Hawaii Creole English ("cpe_US", 600000 speakers)? */
1271 case LANG_HEBREW: return "he_IL";
1272 case LANG_HINDI: return "hi_IN";
1273 case LANG_HUNGARIAN: return "hu_HU";
1274 case LANG_IBIBIO: return "nic_NG";
1275 case LANG_ICELANDIC: return "is_IS";
1276 case LANG_IGBO: return "ig_NG";
1277 case LANG_INDONESIAN: return "id_ID";
1278 case LANG_INUKTITUT: return "iu_CA";
1282 case SUBLANG_ITALIAN: return "it_IT";
1283 case SUBLANG_ITALIAN_SWISS: return "it_CH";
1286 case LANG_JAPANESE: return "ja_JP";
1287 case LANG_KANNADA: return "kn_IN";
1288 case LANG_KANURI: return "kr_NG";
1292 case SUBLANG_DEFAULT: return "ks_PK";
1293 case SUBLANG_KASHMIRI_INDIA: return "ks_IN";
1296 case LANG_KAZAK: return "kk_KZ";
1298 /* FIXME: Adjust this when such locales appear on Unix. */
1300 case LANG_KOREAN: return "ko_KR";
1301 case LANG_KYRGYZ: return "ky_KG";
1302 case LANG_LAO: return "lo_LA";
1303 case LANG_LATIN: return "la_VA";
1304 case LANG_LATVIAN: return "lv_LV";
1305 case LANG_LITHUANIAN: return "lt_LT";
1306 case LANG_MACEDONIAN: return "mk_MK";
1310 case SUBLANG_MALAY_MALAYSIA: return "ms_MY";
1311 case SUBLANG_MALAY_BRUNEI_DARUSSALAM: return "ms_BN";
1314 case LANG_MALAYALAM: return "ml_IN";
1315 case LANG_MALTESE: return "mt_MT";
1317 /* FIXME: Adjust this when such locales appear on Unix. */
1319 case LANG_MARATHI: return "mr_IN";
1320 case LANG_MONGOLIAN:
1323 case SUBLANG_DEFAULT: return "mn_MN";
1325 return "mn"; /* Ambiguous: could be "mn_CN" or "mn_MN". */
1329 case SUBLANG_DEFAULT: return "ne_NP";
1330 case SUBLANG_NEPALI_INDIA: return "ne_IN";
1333 case LANG_NORWEGIAN:
1336 case SUBLANG_NORWEGIAN_BOKMAL: return "nb_NO";
1337 case SUBLANG_NORWEGIAN_NYNORSK: return "nn_NO";
1340 case LANG_ORIYA: return "or_IN";
1341 case LANG_OROMO: return "om_ET";
1342 case LANG_PAPIAMENTU: return "pap_AN";
1344 return "ps"; /* Ambiguous: could be "ps_PK" or "ps_AF". */
1345 case LANG_POLISH: return "pl_PL";
1346 case LANG_PORTUGUESE:
1349 case SUBLANG_PORTUGUESE: return "pt_PT";
1350 /* Hmm. SUBLANG_PORTUGUESE_BRAZILIAN == SUBLANG_DEFAULT.
1351 Same phenomenon as SUBLANG_ENGLISH_US == SUBLANG_DEFAULT. */
1352 case SUBLANG_PORTUGUESE_BRAZILIAN: return "pt_BR";
1358 case SUBLANG_PUNJABI_INDIA: return "pa_IN"; /* Gurmukhi script */
1359 case SUBLANG_PUNJABI_PAKISTAN: return "pa_PK"; /* Arabic script */
1362 case LANG_RHAETO_ROMANCE: return "rm_CH";
1366 case SUBLANG_ROMANIAN_ROMANIA: return "ro_RO";
1367 case SUBLANG_ROMANIAN_MOLDOVA: return "ro_MD";
1373 case SUBLANG_DEFAULT: return "ru_RU";
1375 return "ru"; /* Ambiguous: could be "ru_RU" or "ru_UA" or "ru_MD". */
1376 case LANG_SAAMI: /* actually Northern Sami */ return "se_NO";
1377 case LANG_SANSKRIT: return "sa_IN";
1381 case SUBLANG_SINDHI_INDIA: return "sd_IN";
1382 case SUBLANG_SINDHI_PAKISTAN: return "sd_PK";
1385 case LANG_SINHALESE: return "si_LK";
1386 case LANG_SLOVAK: return "sk_SK";
1387 case LANG_SLOVENIAN: return "sl_SI";
1388 case LANG_SOMALI: return "so_SO";
1390 /* FIXME: Adjust this when such locales appear on Unix. */
1395 case SUBLANG_SPANISH: return "es_ES";
1396 case SUBLANG_SPANISH_MEXICAN: return "es_MX";
1397 case SUBLANG_SPANISH_MODERN:
1398 return "es_ES@modern"; /* not seen on Unix */
1399 case SUBLANG_SPANISH_GUATEMALA: return "es_GT";
1400 case SUBLANG_SPANISH_COSTA_RICA: return "es_CR";
1401 case SUBLANG_SPANISH_PANAMA: return "es_PA";
1402 case SUBLANG_SPANISH_DOMINICAN_REPUBLIC: return "es_DO";
1403 case SUBLANG_SPANISH_VENEZUELA: return "es_VE";
1404 case SUBLANG_SPANISH_COLOMBIA: return "es_CO";
1405 case SUBLANG_SPANISH_PERU: return "es_PE";
1406 case SUBLANG_SPANISH_ARGENTINA: return "es_AR";
1407 case SUBLANG_SPANISH_ECUADOR: return "es_EC";
1408 case SUBLANG_SPANISH_CHILE: return "es_CL";
1409 case SUBLANG_SPANISH_URUGUAY: return "es_UY";
1410 case SUBLANG_SPANISH_PARAGUAY: return "es_PY";
1411 case SUBLANG_SPANISH_BOLIVIA: return "es_BO";
1412 case SUBLANG_SPANISH_EL_SALVADOR: return "es_SV";
1413 case SUBLANG_SPANISH_HONDURAS: return "es_HN";
1414 case SUBLANG_SPANISH_NICARAGUA: return "es_NI";
1415 case SUBLANG_SPANISH_PUERTO_RICO: return "es_PR";
1418 case LANG_SUTU: return "bnt_TZ"; /* or "st_LS" or "nso_ZA"? */
1419 case LANG_SWAHILI: return "sw_KE";
1423 case SUBLANG_DEFAULT: return "sv_SE";
1424 case SUBLANG_SWEDISH_FINLAND: return "sv_FI";
1427 case LANG_SYRIAC: return "syr_TR"; /* An extinct language. */
1428 case LANG_TAGALOG: return "tl_PH";
1429 case LANG_TAJIK: return "tg_TJ";
1430 case LANG_TAMAZIGHT:
1433 /* FIXME: Adjust this when Tamazight locales appear on Unix. */
1434 case SUBLANG_TAMAZIGHT_ARABIC: return "ber_MA@arabic";
1435 case SUBLANG_TAMAZIGHT_LATIN: return "ber_MA@latin";
1441 case SUBLANG_DEFAULT: return "ta_IN";
1443 return "ta"; /* Ambiguous: could be "ta_IN" or "ta_LK" or "ta_SG". */
1444 case LANG_TATAR: return "tt_RU";
1445 case LANG_TELUGU: return "te_IN";
1446 case LANG_THAI: return "th_TH";
1447 case LANG_TIBETAN: return "bo_CN";
1451 case SUBLANG_TIGRINYA_ETHIOPIA: return "ti_ET";
1452 case SUBLANG_TIGRINYA_ERITREA: return "ti_ER";
1455 case LANG_TSONGA: return "ts_ZA";
1456 case LANG_TSWANA: return "tn_BW";
1457 case LANG_TURKISH: return "tr_TR";
1458 case LANG_TURKMEN: return "tk_TM";
1459 case LANG_UKRAINIAN: return "uk_UA";
1463 case SUBLANG_URDU_PAKISTAN: return "ur_PK";
1464 case SUBLANG_URDU_INDIA: return "ur_IN";
1470 case SUBLANG_UZBEK_LATIN: return "uz_UZ";
1471 case SUBLANG_UZBEK_CYRILLIC: return "uz_UZ@cyrillic";
1474 case LANG_VENDA: return "ve_ZA";
1475 case LANG_VIETNAMESE: return "vi_VN";
1476 case LANG_WELSH: return "cy_GB";
1477 case LANG_XHOSA: return "xh_ZA";
1478 case LANG_YI: return "sit_CN";
1479 case LANG_YIDDISH: return "yi_IL";
1480 case LANG_YORUBA: return "yo_NG";
1481 case LANG_ZULU: return "zu_ZA";
1482 default: return "C";
1490 _nl_locale_name (int category, const char *categoryname)
1494 retval = _nl_locale_name_posix (category, categoryname);
1498 return _nl_locale_name_default ();