diff options
Diffstat (limited to 'gl/localcharset.c')
| -rw-r--r-- | gl/localcharset.c | 482 |
1 files changed, 266 insertions, 216 deletions
diff --git a/gl/localcharset.c b/gl/localcharset.c index a7ca94c1..a04dc446 100644 --- a/gl/localcharset.c +++ b/gl/localcharset.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* Determine a canonical name for the current locale's character encoding. | 1 | /* Determine a canonical name for the current locale's character encoding. |
| 2 | 2 | ||
| 3 | Copyright (C) 2000-2006, 2008-2009 Free Software Foundation, Inc. | 3 | Copyright (C) 2000-2006, 2008-2010 Free Software Foundation, Inc. |
| 4 | 4 | ||
| 5 | This program is free software; you can redistribute it and/or modify | 5 | This program is free software; you can redistribute it and/or modify |
| 6 | it under the terms of the GNU General Public License as published by | 6 | it under the terms of the GNU General Public License as published by |
| @@ -23,6 +23,7 @@ | |||
| 23 | /* Specification. */ | 23 | /* Specification. */ |
| 24 | #include "localcharset.h" | 24 | #include "localcharset.h" |
| 25 | 25 | ||
| 26 | #include <fcntl.h> | ||
| 26 | #include <stddef.h> | 27 | #include <stddef.h> |
| 27 | #include <stdio.h> | 28 | #include <stdio.h> |
| 28 | #include <string.h> | 29 | #include <string.h> |
| @@ -44,6 +45,7 @@ | |||
| 44 | #endif | 45 | #endif |
| 45 | 46 | ||
| 46 | #if !defined WIN32_NATIVE | 47 | #if !defined WIN32_NATIVE |
| 48 | # include <unistd.h> | ||
| 47 | # if HAVE_LANGINFO_CODESET | 49 | # if HAVE_LANGINFO_CODESET |
| 48 | # include <langinfo.h> | 50 | # include <langinfo.h> |
| 49 | # else | 51 | # else |
| @@ -75,6 +77,11 @@ | |||
| 75 | # include "configmake.h" | 77 | # include "configmake.h" |
| 76 | #endif | 78 | #endif |
| 77 | 79 | ||
| 80 | /* Define O_NOFOLLOW to 0 on platforms where it does not exist. */ | ||
| 81 | #ifndef O_NOFOLLOW | ||
| 82 | # define O_NOFOLLOW 0 | ||
| 83 | #endif | ||
| 84 | |||
| 78 | #if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ | 85 | #if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ |
| 79 | /* Win32, Cygwin, OS/2, DOS */ | 86 | /* Win32, Cygwin, OS/2, DOS */ |
| 80 | # define ISSLASH(C) ((C) == '/' || (C) == '\\') | 87 | # define ISSLASH(C) ((C) == '/' || (C) == '\\') |
| @@ -117,192 +124,219 @@ get_charset_aliases (void) | |||
| 117 | if (cp == NULL) | 124 | if (cp == NULL) |
| 118 | { | 125 | { |
| 119 | #if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE || defined __CYGWIN__) | 126 | #if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE || defined __CYGWIN__) |
| 120 | FILE *fp; | ||
| 121 | const char *dir; | 127 | const char *dir; |
| 122 | const char *base = "charset.alias"; | 128 | const char *base = "charset.alias"; |
| 123 | char *file_name; | 129 | char *file_name; |
| 124 | 130 | ||
| 125 | /* Make it possible to override the charset.alias location. This is | 131 | /* Make it possible to override the charset.alias location. This is |
| 126 | necessary for running the testsuite before "make install". */ | 132 | necessary for running the testsuite before "make install". */ |
| 127 | dir = getenv ("CHARSETALIASDIR"); | 133 | dir = getenv ("CHARSETALIASDIR"); |
| 128 | if (dir == NULL || dir[0] == '\0') | 134 | if (dir == NULL || dir[0] == '\0') |
| 129 | dir = relocate (LIBDIR); | 135 | dir = relocate (LIBDIR); |
| 130 | 136 | ||
| 131 | /* Concatenate dir and base into freshly allocated file_name. */ | 137 | /* Concatenate dir and base into freshly allocated file_name. */ |
| 132 | { | 138 | { |
| 133 | size_t dir_len = strlen (dir); | 139 | size_t dir_len = strlen (dir); |
| 134 | size_t base_len = strlen (base); | 140 | size_t base_len = strlen (base); |
| 135 | int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1])); | 141 | int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1])); |
| 136 | file_name = (char *) malloc (dir_len + add_slash + base_len + 1); | 142 | file_name = (char *) malloc (dir_len + add_slash + base_len + 1); |
| 137 | if (file_name != NULL) | 143 | if (file_name != NULL) |
| 138 | { | 144 | { |
| 139 | memcpy (file_name, dir, dir_len); | 145 | memcpy (file_name, dir, dir_len); |
| 140 | if (add_slash) | 146 | if (add_slash) |
| 141 | file_name[dir_len] = DIRECTORY_SEPARATOR; | 147 | file_name[dir_len] = DIRECTORY_SEPARATOR; |
| 142 | memcpy (file_name + dir_len + add_slash, base, base_len + 1); | 148 | memcpy (file_name + dir_len + add_slash, base, base_len + 1); |
| 143 | } | 149 | } |
| 144 | } | 150 | } |
| 145 | 151 | ||
| 146 | if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL) | 152 | if (file_name == NULL) |
| 147 | /* Out of memory or file not found, treat it as empty. */ | 153 | /* Out of memory. Treat the file as empty. */ |
| 148 | cp = ""; | 154 | cp = ""; |
| 149 | else | 155 | else |
| 150 | { | 156 | { |
| 151 | /* Parse the file's contents. */ | 157 | int fd; |
| 152 | char *res_ptr = NULL; | 158 | |
| 153 | size_t res_size = 0; | 159 | /* Open the file. Reject symbolic links on platforms that support |
| 154 | 160 | O_NOFOLLOW. This is a security feature. Without it, an attacker | |
| 155 | for (;;) | 161 | could retrieve parts of the contents (namely, the tail of the |
| 156 | { | 162 | first line that starts with "* ") of an arbitrary file by placing |
| 157 | int c; | 163 | a symbolic link to that file under the name "charset.alias" in |
| 158 | char buf1[50+1]; | 164 | some writable directory and defining the environment variable |
| 159 | char buf2[50+1]; | 165 | CHARSETALIASDIR to point to that directory. */ |
| 160 | size_t l1, l2; | 166 | fd = open (file_name, |
| 161 | char *old_res_ptr; | 167 | O_RDONLY | (HAVE_WORKING_O_NOFOLLOW ? O_NOFOLLOW : 0)); |
| 162 | 168 | if (fd < 0) | |
| 163 | c = getc (fp); | 169 | /* File not found. Treat it as empty. */ |
| 164 | if (c == EOF) | 170 | cp = ""; |
| 165 | break; | 171 | else |
| 166 | if (c == '\n' || c == ' ' || c == '\t') | 172 | { |
| 167 | continue; | 173 | FILE *fp; |
| 168 | if (c == '#') | 174 | |
| 169 | { | 175 | fp = fdopen (fd, "r"); |
| 170 | /* Skip comment, to end of line. */ | 176 | if (fp == NULL) |
| 171 | do | 177 | { |
| 172 | c = getc (fp); | 178 | /* Out of memory. Treat the file as empty. */ |
| 173 | while (!(c == EOF || c == '\n')); | 179 | close (fd); |
| 174 | if (c == EOF) | 180 | cp = ""; |
| 175 | break; | 181 | } |
| 176 | continue; | 182 | else |
| 177 | } | 183 | { |
| 178 | ungetc (c, fp); | 184 | /* Parse the file's contents. */ |
| 179 | if (fscanf (fp, "%50s %50s", buf1, buf2) < 2) | 185 | char *res_ptr = NULL; |
| 180 | break; | 186 | size_t res_size = 0; |
| 181 | l1 = strlen (buf1); | 187 | |
| 182 | l2 = strlen (buf2); | 188 | for (;;) |
| 183 | old_res_ptr = res_ptr; | 189 | { |
| 184 | if (res_size == 0) | 190 | int c; |
| 185 | { | 191 | char buf1[50+1]; |
| 186 | res_size = l1 + 1 + l2 + 1; | 192 | char buf2[50+1]; |
| 187 | res_ptr = (char *) malloc (res_size + 1); | 193 | size_t l1, l2; |
| 188 | } | 194 | char *old_res_ptr; |
| 189 | else | 195 | |
| 190 | { | 196 | c = getc (fp); |
| 191 | res_size += l1 + 1 + l2 + 1; | 197 | if (c == EOF) |
| 192 | res_ptr = (char *) realloc (res_ptr, res_size + 1); | 198 | break; |
| 193 | } | 199 | if (c == '\n' || c == ' ' || c == '\t') |
| 194 | if (res_ptr == NULL) | 200 | continue; |
| 195 | { | 201 | if (c == '#') |
| 196 | /* Out of memory. */ | 202 | { |
| 197 | res_size = 0; | 203 | /* Skip comment, to end of line. */ |
| 198 | if (old_res_ptr != NULL) | 204 | do |
| 199 | free (old_res_ptr); | 205 | c = getc (fp); |
| 200 | break; | 206 | while (!(c == EOF || c == '\n')); |
| 201 | } | 207 | if (c == EOF) |
| 202 | strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1); | 208 | break; |
| 203 | strcpy (res_ptr + res_size - (l2 + 1), buf2); | 209 | continue; |
| 204 | } | 210 | } |
| 205 | fclose (fp); | 211 | ungetc (c, fp); |
| 206 | if (res_size == 0) | 212 | if (fscanf (fp, "%50s %50s", buf1, buf2) < 2) |
| 207 | cp = ""; | 213 | break; |
| 208 | else | 214 | l1 = strlen (buf1); |
| 209 | { | 215 | l2 = strlen (buf2); |
| 210 | *(res_ptr + res_size) = '\0'; | 216 | old_res_ptr = res_ptr; |
| 211 | cp = res_ptr; | 217 | if (res_size == 0) |
| 212 | } | 218 | { |
| 213 | } | 219 | res_size = l1 + 1 + l2 + 1; |
| 214 | 220 | res_ptr = (char *) malloc (res_size + 1); | |
| 215 | if (file_name != NULL) | 221 | } |
| 216 | free (file_name); | 222 | else |
| 223 | { | ||
| 224 | res_size += l1 + 1 + l2 + 1; | ||
| 225 | res_ptr = (char *) realloc (res_ptr, res_size + 1); | ||
| 226 | } | ||
| 227 | if (res_ptr == NULL) | ||
| 228 | { | ||
| 229 | /* Out of memory. */ | ||
| 230 | res_size = 0; | ||
| 231 | if (old_res_ptr != NULL) | ||
| 232 | free (old_res_ptr); | ||
| 233 | break; | ||
| 234 | } | ||
| 235 | strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1); | ||
| 236 | strcpy (res_ptr + res_size - (l2 + 1), buf2); | ||
| 237 | } | ||
| 238 | fclose (fp); | ||
| 239 | if (res_size == 0) | ||
| 240 | cp = ""; | ||
| 241 | else | ||
| 242 | { | ||
| 243 | *(res_ptr + res_size) = '\0'; | ||
| 244 | cp = res_ptr; | ||
| 245 | } | ||
| 246 | } | ||
| 247 | } | ||
| 248 | |||
| 249 | free (file_name); | ||
| 250 | } | ||
| 217 | 251 | ||
| 218 | #else | 252 | #else |
| 219 | 253 | ||
| 220 | # if defined DARWIN7 | 254 | # if defined DARWIN7 |
| 221 | /* To avoid the trouble of installing a file that is shared by many | 255 | /* To avoid the trouble of installing a file that is shared by many |
| 222 | GNU packages -- many packaging systems have problems with this --, | 256 | GNU packages -- many packaging systems have problems with this --, |
| 223 | simply inline the aliases here. */ | 257 | simply inline the aliases here. */ |
| 224 | cp = "ISO8859-1" "\0" "ISO-8859-1" "\0" | 258 | cp = "ISO8859-1" "\0" "ISO-8859-1" "\0" |
| 225 | "ISO8859-2" "\0" "ISO-8859-2" "\0" | 259 | "ISO8859-2" "\0" "ISO-8859-2" "\0" |
| 226 | "ISO8859-4" "\0" "ISO-8859-4" "\0" | 260 | "ISO8859-4" "\0" "ISO-8859-4" "\0" |
| 227 | "ISO8859-5" "\0" "ISO-8859-5" "\0" | 261 | "ISO8859-5" "\0" "ISO-8859-5" "\0" |
| 228 | "ISO8859-7" "\0" "ISO-8859-7" "\0" | 262 | "ISO8859-7" "\0" "ISO-8859-7" "\0" |
| 229 | "ISO8859-9" "\0" "ISO-8859-9" "\0" | 263 | "ISO8859-9" "\0" "ISO-8859-9" "\0" |
| 230 | "ISO8859-13" "\0" "ISO-8859-13" "\0" | 264 | "ISO8859-13" "\0" "ISO-8859-13" "\0" |
| 231 | "ISO8859-15" "\0" "ISO-8859-15" "\0" | 265 | "ISO8859-15" "\0" "ISO-8859-15" "\0" |
| 232 | "KOI8-R" "\0" "KOI8-R" "\0" | 266 | "KOI8-R" "\0" "KOI8-R" "\0" |
| 233 | "KOI8-U" "\0" "KOI8-U" "\0" | 267 | "KOI8-U" "\0" "KOI8-U" "\0" |
| 234 | "CP866" "\0" "CP866" "\0" | 268 | "CP866" "\0" "CP866" "\0" |
| 235 | "CP949" "\0" "CP949" "\0" | 269 | "CP949" "\0" "CP949" "\0" |
| 236 | "CP1131" "\0" "CP1131" "\0" | 270 | "CP1131" "\0" "CP1131" "\0" |
| 237 | "CP1251" "\0" "CP1251" "\0" | 271 | "CP1251" "\0" "CP1251" "\0" |
| 238 | "eucCN" "\0" "GB2312" "\0" | 272 | "eucCN" "\0" "GB2312" "\0" |
| 239 | "GB2312" "\0" "GB2312" "\0" | 273 | "GB2312" "\0" "GB2312" "\0" |
| 240 | "eucJP" "\0" "EUC-JP" "\0" | 274 | "eucJP" "\0" "EUC-JP" "\0" |
| 241 | "eucKR" "\0" "EUC-KR" "\0" | 275 | "eucKR" "\0" "EUC-KR" "\0" |
| 242 | "Big5" "\0" "BIG5" "\0" | 276 | "Big5" "\0" "BIG5" "\0" |
| 243 | "Big5HKSCS" "\0" "BIG5-HKSCS" "\0" | 277 | "Big5HKSCS" "\0" "BIG5-HKSCS" "\0" |
| 244 | "GBK" "\0" "GBK" "\0" | 278 | "GBK" "\0" "GBK" "\0" |
| 245 | "GB18030" "\0" "GB18030" "\0" | 279 | "GB18030" "\0" "GB18030" "\0" |
| 246 | "SJIS" "\0" "SHIFT_JIS" "\0" | 280 | "SJIS" "\0" "SHIFT_JIS" "\0" |
| 247 | "ARMSCII-8" "\0" "ARMSCII-8" "\0" | 281 | "ARMSCII-8" "\0" "ARMSCII-8" "\0" |
| 248 | "PT154" "\0" "PT154" "\0" | 282 | "PT154" "\0" "PT154" "\0" |
| 249 | /*"ISCII-DEV" "\0" "?" "\0"*/ | 283 | /*"ISCII-DEV" "\0" "?" "\0"*/ |
| 250 | "*" "\0" "UTF-8" "\0"; | 284 | "*" "\0" "UTF-8" "\0"; |
| 251 | # endif | 285 | # endif |
| 252 | 286 | ||
| 253 | # if defined VMS | 287 | # if defined VMS |
| 254 | /* To avoid the troubles of an extra file charset.alias_vms in the | 288 | /* To avoid the troubles of an extra file charset.alias_vms in the |
| 255 | sources of many GNU packages, simply inline the aliases here. */ | 289 | sources of many GNU packages, simply inline the aliases here. */ |
| 256 | /* The list of encodings is taken from the OpenVMS 7.3-1 documentation | 290 | /* The list of encodings is taken from the OpenVMS 7.3-1 documentation |
| 257 | "Compaq C Run-Time Library Reference Manual for OpenVMS systems" | 291 | "Compaq C Run-Time Library Reference Manual for OpenVMS systems" |
| 258 | section 10.7 "Handling Different Character Sets". */ | 292 | section 10.7 "Handling Different Character Sets". */ |
| 259 | cp = "ISO8859-1" "\0" "ISO-8859-1" "\0" | 293 | cp = "ISO8859-1" "\0" "ISO-8859-1" "\0" |
| 260 | "ISO8859-2" "\0" "ISO-8859-2" "\0" | 294 | "ISO8859-2" "\0" "ISO-8859-2" "\0" |
| 261 | "ISO8859-5" "\0" "ISO-8859-5" "\0" | 295 | "ISO8859-5" "\0" "ISO-8859-5" "\0" |
| 262 | "ISO8859-7" "\0" "ISO-8859-7" "\0" | 296 | "ISO8859-7" "\0" "ISO-8859-7" "\0" |
| 263 | "ISO8859-8" "\0" "ISO-8859-8" "\0" | 297 | "ISO8859-8" "\0" "ISO-8859-8" "\0" |
| 264 | "ISO8859-9" "\0" "ISO-8859-9" "\0" | 298 | "ISO8859-9" "\0" "ISO-8859-9" "\0" |
| 265 | /* Japanese */ | 299 | /* Japanese */ |
| 266 | "eucJP" "\0" "EUC-JP" "\0" | 300 | "eucJP" "\0" "EUC-JP" "\0" |
| 267 | "SJIS" "\0" "SHIFT_JIS" "\0" | 301 | "SJIS" "\0" "SHIFT_JIS" "\0" |
| 268 | "DECKANJI" "\0" "DEC-KANJI" "\0" | 302 | "DECKANJI" "\0" "DEC-KANJI" "\0" |
| 269 | "SDECKANJI" "\0" "EUC-JP" "\0" | 303 | "SDECKANJI" "\0" "EUC-JP" "\0" |
| 270 | /* Chinese */ | 304 | /* Chinese */ |
| 271 | "eucTW" "\0" "EUC-TW" "\0" | 305 | "eucTW" "\0" "EUC-TW" "\0" |
| 272 | "DECHANYU" "\0" "DEC-HANYU" "\0" | 306 | "DECHANYU" "\0" "DEC-HANYU" "\0" |
| 273 | "DECHANZI" "\0" "GB2312" "\0" | 307 | "DECHANZI" "\0" "GB2312" "\0" |
| 274 | /* Korean */ | 308 | /* Korean */ |
| 275 | "DECKOREAN" "\0" "EUC-KR" "\0"; | 309 | "DECKOREAN" "\0" "EUC-KR" "\0"; |
| 276 | # endif | 310 | # endif |
| 277 | 311 | ||
| 278 | # if defined WIN32_NATIVE || defined __CYGWIN__ | 312 | # if defined WIN32_NATIVE || defined __CYGWIN__ |
| 279 | /* To avoid the troubles of installing a separate file in the same | 313 | /* To avoid the troubles of installing a separate file in the same |
| 280 | directory as the DLL and of retrieving the DLL's directory at | 314 | directory as the DLL and of retrieving the DLL's directory at |
| 281 | runtime, simply inline the aliases here. */ | 315 | runtime, simply inline the aliases here. */ |
| 282 | 316 | ||
| 283 | cp = "CP936" "\0" "GBK" "\0" | 317 | cp = "CP936" "\0" "GBK" "\0" |
| 284 | "CP1361" "\0" "JOHAB" "\0" | 318 | "CP1361" "\0" "JOHAB" "\0" |
| 285 | "CP20127" "\0" "ASCII" "\0" | 319 | "CP20127" "\0" "ASCII" "\0" |
| 286 | "CP20866" "\0" "KOI8-R" "\0" | 320 | "CP20866" "\0" "KOI8-R" "\0" |
| 287 | "CP20936" "\0" "GB2312" "\0" | 321 | "CP20936" "\0" "GB2312" "\0" |
| 288 | "CP21866" "\0" "KOI8-RU" "\0" | 322 | "CP21866" "\0" "KOI8-RU" "\0" |
| 289 | "CP28591" "\0" "ISO-8859-1" "\0" | 323 | "CP28591" "\0" "ISO-8859-1" "\0" |
| 290 | "CP28592" "\0" "ISO-8859-2" "\0" | 324 | "CP28592" "\0" "ISO-8859-2" "\0" |
| 291 | "CP28593" "\0" "ISO-8859-3" "\0" | 325 | "CP28593" "\0" "ISO-8859-3" "\0" |
| 292 | "CP28594" "\0" "ISO-8859-4" "\0" | 326 | "CP28594" "\0" "ISO-8859-4" "\0" |
| 293 | "CP28595" "\0" "ISO-8859-5" "\0" | 327 | "CP28595" "\0" "ISO-8859-5" "\0" |
| 294 | "CP28596" "\0" "ISO-8859-6" "\0" | 328 | "CP28596" "\0" "ISO-8859-6" "\0" |
| 295 | "CP28597" "\0" "ISO-8859-7" "\0" | 329 | "CP28597" "\0" "ISO-8859-7" "\0" |
| 296 | "CP28598" "\0" "ISO-8859-8" "\0" | 330 | "CP28598" "\0" "ISO-8859-8" "\0" |
| 297 | "CP28599" "\0" "ISO-8859-9" "\0" | 331 | "CP28599" "\0" "ISO-8859-9" "\0" |
| 298 | "CP28605" "\0" "ISO-8859-15" "\0" | 332 | "CP28605" "\0" "ISO-8859-15" "\0" |
| 299 | "CP38598" "\0" "ISO-8859-8" "\0" | 333 | "CP38598" "\0" "ISO-8859-8" "\0" |
| 300 | "CP51932" "\0" "EUC-JP" "\0" | 334 | "CP51932" "\0" "EUC-JP" "\0" |
| 301 | "CP51936" "\0" "GB2312" "\0" | 335 | "CP51936" "\0" "GB2312" "\0" |
| 302 | "CP51949" "\0" "EUC-KR" "\0" | 336 | "CP51949" "\0" "EUC-KR" "\0" |
| 303 | "CP51950" "\0" "EUC-TW" "\0" | 337 | "CP51950" "\0" "EUC-TW" "\0" |
| 304 | "CP54936" "\0" "GB18030" "\0" | 338 | "CP54936" "\0" "GB18030" "\0" |
| 305 | "CP65001" "\0" "UTF-8" "\0"; | 339 | "CP65001" "\0" "UTF-8" "\0"; |
| 306 | # endif | 340 | # endif |
| 307 | #endif | 341 | #endif |
| 308 | 342 | ||
| @@ -335,7 +369,7 @@ locale_charset (void) | |||
| 335 | codeset = nl_langinfo (CODESET); | 369 | codeset = nl_langinfo (CODESET); |
| 336 | 370 | ||
| 337 | # ifdef __CYGWIN__ | 371 | # ifdef __CYGWIN__ |
| 338 | /* Cygwin 2006 does not have locales. nl_langinfo (CODESET) always | 372 | /* Cygwin 1.5.x does not have locales. nl_langinfo (CODESET) always |
| 339 | returns "US-ASCII". As long as this is not fixed, return the suffix | 373 | returns "US-ASCII". As long as this is not fixed, return the suffix |
| 340 | of the locale name from the environment variables (if present) or | 374 | of the locale name from the environment variables (if present) or |
| 341 | the codepage as a number. */ | 375 | the codepage as a number. */ |
| @@ -346,36 +380,46 @@ locale_charset (void) | |||
| 346 | 380 | ||
| 347 | locale = getenv ("LC_ALL"); | 381 | locale = getenv ("LC_ALL"); |
| 348 | if (locale == NULL || locale[0] == '\0') | 382 | if (locale == NULL || locale[0] == '\0') |
| 349 | { | 383 | { |
| 350 | locale = getenv ("LC_CTYPE"); | 384 | locale = getenv ("LC_CTYPE"); |
| 351 | if (locale == NULL || locale[0] == '\0') | 385 | if (locale == NULL || locale[0] == '\0') |
| 352 | locale = getenv ("LANG"); | 386 | locale = getenv ("LANG"); |
| 353 | } | 387 | } |
| 354 | if (locale != NULL && locale[0] != '\0') | 388 | if (locale != NULL && locale[0] != '\0') |
| 355 | { | 389 | { |
| 356 | /* If the locale name contains an encoding after the dot, return | 390 | /* If the locale name contains an encoding after the dot, return |
| 357 | it. */ | 391 | it. */ |
| 358 | const char *dot = strchr (locale, '.'); | 392 | const char *dot = strchr (locale, '.'); |
| 359 | 393 | ||
| 360 | if (dot != NULL) | 394 | if (dot != NULL) |
| 361 | { | 395 | { |
| 362 | const char *modifier; | 396 | const char *modifier; |
| 363 | 397 | ||
| 364 | dot++; | 398 | dot++; |
| 365 | /* Look for the possible @... trailer and remove it, if any. */ | 399 | /* Look for the possible @... trailer and remove it, if any. */ |
| 366 | modifier = strchr (dot, '@'); | 400 | modifier = strchr (dot, '@'); |
| 367 | if (modifier == NULL) | 401 | if (modifier == NULL) |
| 368 | return dot; | 402 | return dot; |
| 369 | if (modifier - dot < sizeof (buf)) | 403 | if (modifier - dot < sizeof (buf)) |
| 370 | { | 404 | { |
| 371 | memcpy (buf, dot, modifier - dot); | 405 | memcpy (buf, dot, modifier - dot); |
| 372 | buf [modifier - dot] = '\0'; | 406 | buf [modifier - dot] = '\0'; |
| 373 | return buf; | 407 | return buf; |
| 374 | } | 408 | } |
| 375 | } | 409 | } |
| 376 | } | 410 | } |
| 377 | 411 | ||
| 378 | /* Woe32 has a function returning the locale's codepage as a number. */ | 412 | /* Woe32 has a function returning the locale's codepage as a number: |
| 413 | GetACP(). This encoding is used by Cygwin, unless the user has set | ||
| 414 | the environment variable CYGWIN=codepage:oem (which very few people | ||
| 415 | do). | ||
| 416 | Output directed to console windows needs to be converted (to | ||
| 417 | GetOEMCP() if the console is using a raster font, or to | ||
| 418 | GetConsoleOutputCP() if it is using a TrueType font). Cygwin does | ||
| 419 | this conversion transparently (see winsup/cygwin/fhandler_console.cc), | ||
| 420 | converting to GetConsoleOutputCP(). This leads to correct results, | ||
| 421 | except when SetConsoleOutputCP has been called and a raster font is | ||
| 422 | in use. */ | ||
| 379 | sprintf (buf, "CP%u", GetACP ()); | 423 | sprintf (buf, "CP%u", GetACP ()); |
| 380 | codeset = buf; | 424 | codeset = buf; |
| 381 | } | 425 | } |
| @@ -397,11 +441,11 @@ locale_charset (void) | |||
| 397 | { | 441 | { |
| 398 | locale = getenv ("LC_ALL"); | 442 | locale = getenv ("LC_ALL"); |
| 399 | if (locale == NULL || locale[0] == '\0') | 443 | if (locale == NULL || locale[0] == '\0') |
| 400 | { | 444 | { |
| 401 | locale = getenv ("LC_CTYPE"); | 445 | locale = getenv ("LC_CTYPE"); |
| 402 | if (locale == NULL || locale[0] == '\0') | 446 | if (locale == NULL || locale[0] == '\0') |
| 403 | locale = getenv ("LANG"); | 447 | locale = getenv ("LANG"); |
| 404 | } | 448 | } |
| 405 | } | 449 | } |
| 406 | 450 | ||
| 407 | /* On some old systems, one used to set locale = "iso8859_1". On others, | 451 | /* On some old systems, one used to set locale = "iso8859_1". On others, |
| @@ -415,7 +459,13 @@ locale_charset (void) | |||
| 415 | 459 | ||
| 416 | static char buf[2 + 10 + 1]; | 460 | static char buf[2 + 10 + 1]; |
| 417 | 461 | ||
| 418 | /* Woe32 has a function returning the locale's codepage as a number. */ | 462 | /* Woe32 has a function returning the locale's codepage as a number: |
| 463 | GetACP(). | ||
| 464 | When the output goes to a console window, it needs to be provided in | ||
| 465 | GetOEMCP() encoding if the console is using a raster font, or in | ||
| 466 | GetConsoleOutputCP() encoding if it is using a TrueType font. | ||
| 467 | But in GUI programs and for output sent to files and pipes, GetACP() | ||
| 468 | encoding is the best bet. */ | ||
| 419 | sprintf (buf, "CP%u", GetACP ()); | 469 | sprintf (buf, "CP%u", GetACP ()); |
| 420 | codeset = buf; | 470 | codeset = buf; |
| 421 | 471 | ||
| @@ -433,7 +483,7 @@ locale_charset (void) | |||
| 433 | { | 483 | { |
| 434 | locale = getenv ("LC_CTYPE"); | 484 | locale = getenv ("LC_CTYPE"); |
| 435 | if (locale == NULL || locale[0] == '\0') | 485 | if (locale == NULL || locale[0] == '\0') |
| 436 | locale = getenv ("LANG"); | 486 | locale = getenv ("LANG"); |
| 437 | } | 487 | } |
| 438 | if (locale != NULL && locale[0] != '\0') | 488 | if (locale != NULL && locale[0] != '\0') |
| 439 | { | 489 | { |
| @@ -441,21 +491,21 @@ locale_charset (void) | |||
| 441 | const char *dot = strchr (locale, '.'); | 491 | const char *dot = strchr (locale, '.'); |
| 442 | 492 | ||
| 443 | if (dot != NULL) | 493 | if (dot != NULL) |
| 444 | { | 494 | { |
| 445 | const char *modifier; | 495 | const char *modifier; |
| 446 | 496 | ||
| 447 | dot++; | 497 | dot++; |
| 448 | /* Look for the possible @... trailer and remove it, if any. */ | 498 | /* Look for the possible @... trailer and remove it, if any. */ |
| 449 | modifier = strchr (dot, '@'); | 499 | modifier = strchr (dot, '@'); |
| 450 | if (modifier == NULL) | 500 | if (modifier == NULL) |
| 451 | return dot; | 501 | return dot; |
| 452 | if (modifier - dot < sizeof (buf)) | 502 | if (modifier - dot < sizeof (buf)) |
| 453 | { | 503 | { |
| 454 | memcpy (buf, dot, modifier - dot); | 504 | memcpy (buf, dot, modifier - dot); |
| 455 | buf [modifier - dot] = '\0'; | 505 | buf [modifier - dot] = '\0'; |
| 456 | return buf; | 506 | return buf; |
| 457 | } | 507 | } |
| 458 | } | 508 | } |
| 459 | 509 | ||
| 460 | /* Resolve through the charset.alias file. */ | 510 | /* Resolve through the charset.alias file. */ |
| 461 | codeset = locale; | 511 | codeset = locale; |
| @@ -464,12 +514,12 @@ locale_charset (void) | |||
| 464 | { | 514 | { |
| 465 | /* OS/2 has a function returning the locale's codepage as a number. */ | 515 | /* OS/2 has a function returning the locale's codepage as a number. */ |
| 466 | if (DosQueryCp (sizeof (cp), cp, &cplen)) | 516 | if (DosQueryCp (sizeof (cp), cp, &cplen)) |
| 467 | codeset = ""; | 517 | codeset = ""; |
| 468 | else | 518 | else |
| 469 | { | 519 | { |
| 470 | sprintf (buf, "CP%u", cp[0]); | 520 | sprintf (buf, "CP%u", cp[0]); |
| 471 | codeset = buf; | 521 | codeset = buf; |
| 472 | } | 522 | } |
| 473 | } | 523 | } |
| 474 | 524 | ||
| 475 | #endif | 525 | #endif |
| @@ -483,10 +533,10 @@ locale_charset (void) | |||
| 483 | *aliases != '\0'; | 533 | *aliases != '\0'; |
| 484 | aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1) | 534 | aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1) |
| 485 | if (strcmp (codeset, aliases) == 0 | 535 | if (strcmp (codeset, aliases) == 0 |
| 486 | || (aliases[0] == '*' && aliases[1] == '\0')) | 536 | || (aliases[0] == '*' && aliases[1] == '\0')) |
| 487 | { | 537 | { |
| 488 | codeset = aliases + strlen (aliases) + 1; | 538 | codeset = aliases + strlen (aliases) + 1; |
| 489 | break; | 539 | break; |
| 490 | } | 540 | } |
| 491 | 541 | ||
| 492 | /* Don't return an empty string. GNU libc and GNU libiconv interpret | 542 | /* Don't return an empty string. GNU libc and GNU libiconv interpret |
