diff options
Diffstat (limited to 'gl/localcharset.c')
| -rw-r--r-- | gl/localcharset.c | 67 |
1 files changed, 14 insertions, 53 deletions
diff --git a/gl/localcharset.c b/gl/localcharset.c index 93c4baa4..22dc38d3 100644 --- a/gl/localcharset.c +++ b/gl/localcharset.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* Determine a canonical name for the current locale's character encoding. | 1 | /* Determine a canonical name for the current locale's character encoding. |
| 2 | 2 | ||
| 3 | Copyright (C) 2000-2006, 2008-2024 Free Software Foundation, Inc. | 3 | Copyright (C) 2000-2006, 2008-2026 Free Software Foundation, Inc. |
| 4 | 4 | ||
| 5 | This file is free software: you can redistribute it and/or modify | 5 | This file is free software: you can redistribute it and/or modify |
| 6 | it under the terms of the GNU Lesser General Public License as | 6 | it under the terms of the GNU Lesser General Public License as |
| @@ -279,45 +279,6 @@ static const struct table_entry alias_table[] = | |||
| 279 | { "utf8", "UTF-8" } | 279 | { "utf8", "UTF-8" } |
| 280 | # define alias_table_defined | 280 | # define alias_table_defined |
| 281 | # endif | 281 | # endif |
| 282 | # if defined __sgi /* IRIX */ | ||
| 283 | { "ISO8859-1", "ISO-8859-1" }, | ||
| 284 | { "ISO8859-15", "ISO-8859-15" }, | ||
| 285 | { "ISO8859-2", "ISO-8859-2" }, | ||
| 286 | { "ISO8859-5", "ISO-8859-5" }, | ||
| 287 | { "ISO8859-7", "ISO-8859-7" }, | ||
| 288 | { "ISO8859-9", "ISO-8859-9" }, | ||
| 289 | { "eucCN", "GB2312" }, | ||
| 290 | { "eucJP", "EUC-JP" }, | ||
| 291 | { "eucKR", "EUC-KR" }, | ||
| 292 | { "eucTW", "EUC-TW" } | ||
| 293 | # define alias_table_defined | ||
| 294 | # endif | ||
| 295 | # if defined __osf__ /* OSF/1 */ | ||
| 296 | /*{ "GBK", "GBK" },*/ | ||
| 297 | { "ISO8859-1", "ISO-8859-1" }, | ||
| 298 | { "ISO8859-15", "ISO-8859-15" }, | ||
| 299 | { "ISO8859-2", "ISO-8859-2" }, | ||
| 300 | { "ISO8859-4", "ISO-8859-4" }, | ||
| 301 | { "ISO8859-5", "ISO-8859-5" }, | ||
| 302 | { "ISO8859-7", "ISO-8859-7" }, | ||
| 303 | { "ISO8859-8", "ISO-8859-8" }, | ||
| 304 | { "ISO8859-9", "ISO-8859-9" }, | ||
| 305 | { "KSC5601", "CP949" }, | ||
| 306 | { "SJIS", "SHIFT_JIS" }, | ||
| 307 | { "TACTIS", "TIS-620" }, | ||
| 308 | /*{ "UTF-8", "UTF-8" },*/ | ||
| 309 | { "big5", "BIG5" }, | ||
| 310 | { "cp850", "CP850" }, | ||
| 311 | { "dechanyu", "DEC-HANYU" }, | ||
| 312 | { "dechanzi", "GB2312" }, | ||
| 313 | { "deckanji", "DEC-KANJI" }, | ||
| 314 | { "deckorean", "EUC-KR" }, | ||
| 315 | { "eucJP", "EUC-JP" }, | ||
| 316 | { "eucKR", "EUC-KR" }, | ||
| 317 | { "eucTW", "EUC-TW" }, | ||
| 318 | { "sdeckanji", "EUC-JP" } | ||
| 319 | # define alias_table_defined | ||
| 320 | # endif | ||
| 321 | # if defined __sun /* Solaris */ | 282 | # if defined __sun /* Solaris */ |
| 322 | { "5601", "EUC-KR" }, | 283 | { "5601", "EUC-KR" }, |
| 323 | { "646", "ASCII" }, | 284 | { "646", "ASCII" }, |
| @@ -380,7 +341,7 @@ static const struct table_entry alias_table[] = | |||
| 380 | # if defined OS2 /* OS/2 */ | 341 | # if defined OS2 /* OS/2 */ |
| 381 | /* The list of encodings is taken from "List of OS/2 Codepages" | 342 | /* The list of encodings is taken from "List of OS/2 Codepages" |
| 382 | by Alex Taylor: | 343 | by Alex Taylor: |
| 383 | <http://altsan.org/os2/toolkits/uls/index.html#codepages>. | 344 | <https://altsan.org/os2/toolkits/uls/index.html#codepages>. |
| 384 | See also "__convcp() of kLIBC": | 345 | See also "__convcp() of kLIBC": |
| 385 | <https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>. */ | 346 | <https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>. */ |
| 386 | { "CP1004", "CP1252" }, | 347 | { "CP1004", "CP1252" }, |
| @@ -850,12 +811,11 @@ locale_charset (void) | |||
| 850 | /* Cygwin < 1.7 does not have locales. nl_langinfo (CODESET) always | 811 | /* Cygwin < 1.7 does not have locales. nl_langinfo (CODESET) always |
| 851 | returns "US-ASCII". Return the suffix of the locale name from the | 812 | returns "US-ASCII". Return the suffix of the locale name from the |
| 852 | environment variables (if present) or the codepage as a number. */ | 813 | environment variables (if present) or the codepage as a number. */ |
| 853 | if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0) | 814 | if (codeset != NULL && streq (codeset, "US-ASCII")) |
| 854 | { | 815 | { |
| 855 | const char *locale; | ||
| 856 | static char resultbuf[2 + 10 + 1]; | 816 | static char resultbuf[2 + 10 + 1]; |
| 857 | 817 | ||
| 858 | locale = getenv ("LC_ALL"); | 818 | const char *locale = getenv ("LC_ALL"); |
| 859 | if (locale == NULL || locale[0] == '\0') | 819 | if (locale == NULL || locale[0] == '\0') |
| 860 | { | 820 | { |
| 861 | locale = getenv ("LC_CTYPE"); | 821 | locale = getenv ("LC_CTYPE"); |
| @@ -939,8 +899,9 @@ locale_charset (void) | |||
| 939 | sprintf (buf, "CP%u", GetACP ()); | 899 | sprintf (buf, "CP%u", GetACP ()); |
| 940 | } | 900 | } |
| 941 | /* For a locale name such as "French_France.65001", in Windows 10, | 901 | /* For a locale name such as "French_France.65001", in Windows 10, |
| 942 | setlocale now returns "French_France.utf8" instead. */ | 902 | setlocale now returns "French_France.utf8" instead, or in the UTF-8 |
| 943 | if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0) | 903 | environment (with modern system settings) "fr_FR.UTF-8". */ |
| 904 | if (streq (buf + 2, "65001") || streq (buf + 2, "utf8") || streq (buf + 2, "UTF-8")) | ||
| 944 | codeset = "UTF-8"; | 905 | codeset = "UTF-8"; |
| 945 | else | 906 | else |
| 946 | { | 907 | { |
| @@ -950,16 +911,13 @@ locale_charset (void) | |||
| 950 | 911 | ||
| 951 | # elif defined OS2 | 912 | # elif defined OS2 |
| 952 | 913 | ||
| 953 | const char *locale; | ||
| 954 | static char resultbuf[2 + 10 + 1]; | 914 | static char resultbuf[2 + 10 + 1]; |
| 955 | ULONG cp[3]; | ||
| 956 | ULONG cplen; | ||
| 957 | 915 | ||
| 958 | codeset = NULL; | 916 | codeset = NULL; |
| 959 | 917 | ||
| 960 | /* Allow user to override the codeset, as set in the operating system, | 918 | /* Allow user to override the codeset, as set in the operating system, |
| 961 | with standard language environment variables. */ | 919 | with standard language environment variables. */ |
| 962 | locale = getenv ("LC_ALL"); | 920 | const char *locale = getenv ("LC_ALL"); |
| 963 | if (locale == NULL || locale[0] == '\0') | 921 | if (locale == NULL || locale[0] == '\0') |
| 964 | { | 922 | { |
| 965 | locale = getenv ("LC_CTYPE"); | 923 | locale = getenv ("LC_CTYPE"); |
| @@ -990,12 +948,15 @@ locale_charset (void) | |||
| 990 | } | 948 | } |
| 991 | 949 | ||
| 992 | /* For the POSIX locale, don't use the system's codepage. */ | 950 | /* For the POSIX locale, don't use the system's codepage. */ |
| 993 | if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0) | 951 | if (streq (locale, "C") || streq (locale, "POSIX")) |
| 994 | codeset = ""; | 952 | codeset = ""; |
| 995 | } | 953 | } |
| 996 | 954 | ||
| 997 | if (codeset == NULL) | 955 | if (codeset == NULL) |
| 998 | { | 956 | { |
| 957 | ULONG cp[3]; | ||
| 958 | ULONG cplen; | ||
| 959 | |||
| 999 | /* OS/2 has a function returning the locale's codepage as a number. */ | 960 | /* OS/2 has a function returning the locale's codepage as a number. */ |
| 1000 | if (DosQueryCp (sizeof (cp), cp, &cplen)) | 961 | if (DosQueryCp (sizeof (cp), cp, &cplen)) |
| 1001 | codeset = ""; | 962 | codeset = ""; |
| @@ -1022,7 +983,7 @@ locale_charset (void) | |||
| 1022 | Speed up the common case and slow down the less common cases by | 983 | Speed up the common case and slow down the less common cases by |
| 1023 | testing for this case first. */ | 984 | testing for this case first. */ |
| 1024 | # if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__ | 985 | # if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__ |
| 1025 | if (strcmp (codeset, "UTF-8") == 0) | 986 | if (streq (codeset, "UTF-8")) |
| 1026 | goto done_table_lookup; | 987 | goto done_table_lookup; |
| 1027 | else | 988 | else |
| 1028 | # endif | 989 | # endif |
| @@ -1151,7 +1112,7 @@ locale_charset (void) | |||
| 1151 | #ifdef DARWIN7 | 1112 | #ifdef DARWIN7 |
| 1152 | /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8" | 1113 | /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8" |
| 1153 | (the default codeset) does not work when MB_CUR_MAX is 1. */ | 1114 | (the default codeset) does not work when MB_CUR_MAX is 1. */ |
| 1154 | if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1) | 1115 | if (streq (codeset, "UTF-8") && MB_CUR_MAX_L (uselocale (NULL)) <= 1) |
| 1155 | codeset = "ASCII"; | 1116 | codeset = "ASCII"; |
| 1156 | #endif | 1117 | #endif |
| 1157 | 1118 | ||
