diff options
| author | Thomas Guyot-Sionnest <dermoth@users.sourceforge.net> | 2008-02-12 11:07:18 +0000 |
|---|---|---|
| committer | Thomas Guyot-Sionnest <dermoth@users.sourceforge.net> | 2008-02-12 11:07:18 +0000 |
| commit | bd7029a99b0c2974265c6665638ef14a052f42ab (patch) | |
| tree | f5661ba73366d81ef6e91f889ea7fec5ebe07b6b /gl/localcharset.c | |
| parent | f99612320d6eda67644c07be04bb21aa4d7789db (diff) | |
| download | monitoring-plugins-bd7029a99b0c2974265c6665638ef14a052f42ab.tar.gz | |
Sync to latest Gnulib
git-svn-id: https://nagiosplug.svn.sourceforge.net/svnroot/nagiosplug/nagiosplug/trunk@1925 f882894a-f735-0410-b71e-b25c423dba1c
Diffstat (limited to 'gl/localcharset.c')
| -rw-r--r-- | gl/localcharset.c | 460 |
1 files changed, 460 insertions, 0 deletions
diff --git a/gl/localcharset.c b/gl/localcharset.c new file mode 100644 index 00000000..4f319487 --- /dev/null +++ b/gl/localcharset.c | |||
| @@ -0,0 +1,460 @@ | |||
| 1 | /* Determine a canonical name for the current locale's character encoding. | ||
| 2 | |||
| 3 | Copyright (C) 2000-2006 Free Software Foundation, Inc. | ||
| 4 | |||
| 5 | This program is free software; you can redistribute it and/or modify | ||
| 6 | it under the terms of the GNU General Public License as published by | ||
| 7 | the Free Software Foundation; either version 3, or (at your option) | ||
| 8 | any later version. | ||
| 9 | |||
| 10 | This program is distributed in the hope that it will be useful, | ||
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 13 | GNU General Public License for more details. | ||
| 14 | |||
| 15 | You should have received a copy of the GNU General Public License along | ||
| 16 | with this program; if not, write to the Free Software Foundation, | ||
| 17 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ | ||
| 18 | |||
| 19 | /* Written by Bruno Haible <bruno@clisp.org>. */ | ||
| 20 | |||
| 21 | #include <config.h> | ||
| 22 | |||
| 23 | /* Specification. */ | ||
| 24 | #include "localcharset.h" | ||
| 25 | |||
| 26 | #include <stddef.h> | ||
| 27 | #include <stdio.h> | ||
| 28 | #include <string.h> | ||
| 29 | #include <stdlib.h> | ||
| 30 | |||
| 31 | #if defined _WIN32 || defined __WIN32__ | ||
| 32 | # define WIN32_NATIVE | ||
| 33 | #endif | ||
| 34 | |||
| 35 | #if defined __EMX__ | ||
| 36 | /* Assume EMX program runs on OS/2, even if compiled under DOS. */ | ||
| 37 | # define OS2 | ||
| 38 | #endif | ||
| 39 | |||
| 40 | #if !defined WIN32_NATIVE | ||
| 41 | # if HAVE_LANGINFO_CODESET | ||
| 42 | # include <langinfo.h> | ||
| 43 | # else | ||
| 44 | # if 0 /* see comment below */ | ||
| 45 | # include <locale.h> | ||
| 46 | # endif | ||
| 47 | # endif | ||
| 48 | # ifdef __CYGWIN__ | ||
| 49 | # define WIN32_LEAN_AND_MEAN | ||
| 50 | # include <windows.h> | ||
| 51 | # endif | ||
| 52 | #elif defined WIN32_NATIVE | ||
| 53 | # define WIN32_LEAN_AND_MEAN | ||
| 54 | # include <windows.h> | ||
| 55 | #endif | ||
| 56 | #if defined OS2 | ||
| 57 | # define INCL_DOS | ||
| 58 | # include <os2.h> | ||
| 59 | #endif | ||
| 60 | |||
| 61 | #if ENABLE_RELOCATABLE | ||
| 62 | # include "relocatable.h" | ||
| 63 | #else | ||
| 64 | # define relocate(pathname) (pathname) | ||
| 65 | #endif | ||
| 66 | |||
| 67 | /* Get LIBDIR. */ | ||
| 68 | #ifndef LIBDIR | ||
| 69 | # include "configmake.h" | ||
| 70 | #endif | ||
| 71 | |||
| 72 | #if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ | ||
| 73 | /* Win32, Cygwin, OS/2, DOS */ | ||
| 74 | # define ISSLASH(C) ((C) == '/' || (C) == '\\') | ||
| 75 | #endif | ||
| 76 | |||
| 77 | #ifndef DIRECTORY_SEPARATOR | ||
| 78 | # define DIRECTORY_SEPARATOR '/' | ||
| 79 | #endif | ||
| 80 | |||
| 81 | #ifndef ISSLASH | ||
| 82 | # define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR) | ||
| 83 | #endif | ||
| 84 | |||
| 85 | #if HAVE_DECL_GETC_UNLOCKED | ||
| 86 | # undef getc | ||
| 87 | # define getc getc_unlocked | ||
| 88 | #endif | ||
| 89 | |||
| 90 | /* The following static variable is declared 'volatile' to avoid a | ||
| 91 | possible multithread problem in the function get_charset_aliases. If we | ||
| 92 | are running in a threaded environment, and if two threads initialize | ||
| 93 | 'charset_aliases' simultaneously, both will produce the same value, | ||
| 94 | and everything will be ok if the two assignments to 'charset_aliases' | ||
| 95 | are atomic. But I don't know what will happen if the two assignments mix. */ | ||
| 96 | #if __STDC__ != 1 | ||
| 97 | # define volatile /* empty */ | ||
| 98 | #endif | ||
| 99 | /* Pointer to the contents of the charset.alias file, if it has already been | ||
| 100 | read, else NULL. Its format is: | ||
| 101 | ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0' */ | ||
| 102 | static const char * volatile charset_aliases; | ||
| 103 | |||
| 104 | /* Return a pointer to the contents of the charset.alias file. */ | ||
| 105 | static const char * | ||
| 106 | get_charset_aliases (void) | ||
| 107 | { | ||
| 108 | const char *cp; | ||
| 109 | |||
| 110 | cp = charset_aliases; | ||
| 111 | if (cp == NULL) | ||
| 112 | { | ||
| 113 | #if !(defined VMS || defined WIN32_NATIVE || defined __CYGWIN__) | ||
| 114 | FILE *fp; | ||
| 115 | const char *dir; | ||
| 116 | const char *base = "charset.alias"; | ||
| 117 | char *file_name; | ||
| 118 | |||
| 119 | /* Make it possible to override the charset.alias location. This is | ||
| 120 | necessary for running the testsuite before "make install". */ | ||
| 121 | dir = getenv ("CHARSETALIASDIR"); | ||
| 122 | if (dir == NULL || dir[0] == '\0') | ||
| 123 | dir = relocate (LIBDIR); | ||
| 124 | |||
| 125 | /* Concatenate dir and base into freshly allocated file_name. */ | ||
| 126 | { | ||
| 127 | size_t dir_len = strlen (dir); | ||
| 128 | size_t base_len = strlen (base); | ||
| 129 | int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1])); | ||
| 130 | file_name = (char *) malloc (dir_len + add_slash + base_len + 1); | ||
| 131 | if (file_name != NULL) | ||
| 132 | { | ||
| 133 | memcpy (file_name, dir, dir_len); | ||
| 134 | if (add_slash) | ||
| 135 | file_name[dir_len] = DIRECTORY_SEPARATOR; | ||
| 136 | memcpy (file_name + dir_len + add_slash, base, base_len + 1); | ||
| 137 | } | ||
| 138 | } | ||
| 139 | |||
| 140 | if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL) | ||
| 141 | /* Out of memory or file not found, treat it as empty. */ | ||
| 142 | cp = ""; | ||
| 143 | else | ||
| 144 | { | ||
| 145 | /* Parse the file's contents. */ | ||
| 146 | char *res_ptr = NULL; | ||
| 147 | size_t res_size = 0; | ||
| 148 | |||
| 149 | for (;;) | ||
| 150 | { | ||
| 151 | int c; | ||
| 152 | char buf1[50+1]; | ||
| 153 | char buf2[50+1]; | ||
| 154 | size_t l1, l2; | ||
| 155 | char *old_res_ptr; | ||
| 156 | |||
| 157 | c = getc (fp); | ||
| 158 | if (c == EOF) | ||
| 159 | break; | ||
| 160 | if (c == '\n' || c == ' ' || c == '\t') | ||
| 161 | continue; | ||
| 162 | if (c == '#') | ||
| 163 | { | ||
| 164 | /* Skip comment, to end of line. */ | ||
| 165 | do | ||
| 166 | c = getc (fp); | ||
| 167 | while (!(c == EOF || c == '\n')); | ||
| 168 | if (c == EOF) | ||
| 169 | break; | ||
| 170 | continue; | ||
| 171 | } | ||
| 172 | ungetc (c, fp); | ||
| 173 | if (fscanf (fp, "%50s %50s", buf1, buf2) < 2) | ||
| 174 | break; | ||
| 175 | l1 = strlen (buf1); | ||
| 176 | l2 = strlen (buf2); | ||
| 177 | old_res_ptr = res_ptr; | ||
| 178 | if (res_size == 0) | ||
| 179 | { | ||
| 180 | res_size = l1 + 1 + l2 + 1; | ||
| 181 | res_ptr = (char *) malloc (res_size + 1); | ||
| 182 | } | ||
| 183 | else | ||
| 184 | { | ||
| 185 | res_size += l1 + 1 + l2 + 1; | ||
| 186 | res_ptr = (char *) realloc (res_ptr, res_size + 1); | ||
| 187 | } | ||
| 188 | if (res_ptr == NULL) | ||
| 189 | { | ||
| 190 | /* Out of memory. */ | ||
| 191 | res_size = 0; | ||
| 192 | if (old_res_ptr != NULL) | ||
| 193 | free (old_res_ptr); | ||
| 194 | break; | ||
| 195 | } | ||
| 196 | strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1); | ||
| 197 | strcpy (res_ptr + res_size - (l2 + 1), buf2); | ||
| 198 | } | ||
| 199 | fclose (fp); | ||
| 200 | if (res_size == 0) | ||
| 201 | cp = ""; | ||
| 202 | else | ||
| 203 | { | ||
| 204 | *(res_ptr + res_size) = '\0'; | ||
| 205 | cp = res_ptr; | ||
| 206 | } | ||
| 207 | } | ||
| 208 | |||
| 209 | if (file_name != NULL) | ||
| 210 | free (file_name); | ||
| 211 | |||
| 212 | #else | ||
| 213 | |||
| 214 | # if defined VMS | ||
| 215 | /* To avoid the troubles of an extra file charset.alias_vms in the | ||
| 216 | sources of many GNU packages, simply inline the aliases here. */ | ||
| 217 | /* The list of encodings is taken from the OpenVMS 7.3-1 documentation | ||
| 218 | "Compaq C Run-Time Library Reference Manual for OpenVMS systems" | ||
| 219 | section 10.7 "Handling Different Character Sets". */ | ||
| 220 | cp = "ISO8859-1" "\0" "ISO-8859-1" "\0" | ||
| 221 | "ISO8859-2" "\0" "ISO-8859-2" "\0" | ||
| 222 | "ISO8859-5" "\0" "ISO-8859-5" "\0" | ||
| 223 | "ISO8859-7" "\0" "ISO-8859-7" "\0" | ||
| 224 | "ISO8859-8" "\0" "ISO-8859-8" "\0" | ||
| 225 | "ISO8859-9" "\0" "ISO-8859-9" "\0" | ||
| 226 | /* Japanese */ | ||
| 227 | "eucJP" "\0" "EUC-JP" "\0" | ||
| 228 | "SJIS" "\0" "SHIFT_JIS" "\0" | ||
| 229 | "DECKANJI" "\0" "DEC-KANJI" "\0" | ||
| 230 | "SDECKANJI" "\0" "EUC-JP" "\0" | ||
| 231 | /* Chinese */ | ||
| 232 | "eucTW" "\0" "EUC-TW" "\0" | ||
| 233 | "DECHANYU" "\0" "DEC-HANYU" "\0" | ||
| 234 | "DECHANZI" "\0" "GB2312" "\0" | ||
| 235 | /* Korean */ | ||
| 236 | "DECKOREAN" "\0" "EUC-KR" "\0"; | ||
| 237 | # endif | ||
| 238 | |||
| 239 | # if defined WIN32_NATIVE || defined __CYGWIN__ | ||
| 240 | /* To avoid the troubles of installing a separate file in the same | ||
| 241 | directory as the DLL and of retrieving the DLL's directory at | ||
| 242 | runtime, simply inline the aliases here. */ | ||
| 243 | |||
| 244 | cp = "CP936" "\0" "GBK" "\0" | ||
| 245 | "CP1361" "\0" "JOHAB" "\0" | ||
| 246 | "CP20127" "\0" "ASCII" "\0" | ||
| 247 | "CP20866" "\0" "KOI8-R" "\0" | ||
| 248 | "CP20936" "\0" "GB2312" "\0" | ||
| 249 | "CP21866" "\0" "KOI8-RU" "\0" | ||
| 250 | "CP28591" "\0" "ISO-8859-1" "\0" | ||
| 251 | "CP28592" "\0" "ISO-8859-2" "\0" | ||
| 252 | "CP28593" "\0" "ISO-8859-3" "\0" | ||
| 253 | "CP28594" "\0" "ISO-8859-4" "\0" | ||
| 254 | "CP28595" "\0" "ISO-8859-5" "\0" | ||
| 255 | "CP28596" "\0" "ISO-8859-6" "\0" | ||
| 256 | "CP28597" "\0" "ISO-8859-7" "\0" | ||
| 257 | "CP28598" "\0" "ISO-8859-8" "\0" | ||
| 258 | "CP28599" "\0" "ISO-8859-9" "\0" | ||
| 259 | "CP28605" "\0" "ISO-8859-15" "\0" | ||
| 260 | "CP38598" "\0" "ISO-8859-8" "\0" | ||
| 261 | "CP51932" "\0" "EUC-JP" "\0" | ||
| 262 | "CP51936" "\0" "GB2312" "\0" | ||
| 263 | "CP51949" "\0" "EUC-KR" "\0" | ||
| 264 | "CP51950" "\0" "EUC-TW" "\0" | ||
| 265 | "CP54936" "\0" "GB18030" "\0" | ||
| 266 | "CP65001" "\0" "UTF-8" "\0"; | ||
| 267 | # endif | ||
| 268 | #endif | ||
| 269 | |||
| 270 | charset_aliases = cp; | ||
| 271 | } | ||
| 272 | |||
| 273 | return cp; | ||
| 274 | } | ||
| 275 | |||
| 276 | /* Determine the current locale's character encoding, and canonicalize it | ||
| 277 | into one of the canonical names listed in config.charset. | ||
| 278 | The result must not be freed; it is statically allocated. | ||
| 279 | If the canonical name cannot be determined, the result is a non-canonical | ||
| 280 | name. */ | ||
| 281 | |||
| 282 | #ifdef STATIC | ||
| 283 | STATIC | ||
| 284 | #endif | ||
| 285 | const char * | ||
| 286 | locale_charset (void) | ||
| 287 | { | ||
| 288 | const char *codeset; | ||
| 289 | const char *aliases; | ||
| 290 | |||
| 291 | #if !(defined WIN32_NATIVE || defined OS2) | ||
| 292 | |||
| 293 | # if HAVE_LANGINFO_CODESET | ||
| 294 | |||
| 295 | /* Most systems support nl_langinfo (CODESET) nowadays. */ | ||
| 296 | codeset = nl_langinfo (CODESET); | ||
| 297 | |||
| 298 | # ifdef __CYGWIN__ | ||
| 299 | /* Cygwin 2006 does not have locales. nl_langinfo (CODESET) always | ||
| 300 | returns "US-ASCII". As long as this is not fixed, return the suffix | ||
| 301 | of the locale name from the environment variables (if present) or | ||
| 302 | the codepage as a number. */ | ||
| 303 | if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0) | ||
| 304 | { | ||
| 305 | const char *locale; | ||
| 306 | static char buf[2 + 10 + 1]; | ||
| 307 | |||
| 308 | locale = getenv ("LC_ALL"); | ||
| 309 | if (locale == NULL || locale[0] == '\0') | ||
| 310 | { | ||
| 311 | locale = getenv ("LC_CTYPE"); | ||
| 312 | if (locale == NULL || locale[0] == '\0') | ||
| 313 | locale = getenv ("LANG"); | ||
| 314 | } | ||
| 315 | if (locale != NULL && locale[0] != '\0') | ||
| 316 | { | ||
| 317 | /* If the locale name contains an encoding after the dot, return | ||
| 318 | it. */ | ||
| 319 | const char *dot = strchr (locale, '.'); | ||
| 320 | |||
| 321 | if (dot != NULL) | ||
| 322 | { | ||
| 323 | const char *modifier; | ||
| 324 | |||
| 325 | dot++; | ||
| 326 | /* Look for the possible @... trailer and remove it, if any. */ | ||
| 327 | modifier = strchr (dot, '@'); | ||
| 328 | if (modifier == NULL) | ||
| 329 | return dot; | ||
| 330 | if (modifier - dot < sizeof (buf)) | ||
| 331 | { | ||
| 332 | memcpy (buf, dot, modifier - dot); | ||
| 333 | buf [modifier - dot] = '\0'; | ||
| 334 | return buf; | ||
| 335 | } | ||
| 336 | } | ||
| 337 | } | ||
| 338 | |||
| 339 | /* Woe32 has a function returning the locale's codepage as a number. */ | ||
| 340 | sprintf (buf, "CP%u", GetACP ()); | ||
| 341 | codeset = buf; | ||
| 342 | } | ||
| 343 | # endif | ||
| 344 | |||
| 345 | # else | ||
| 346 | |||
| 347 | /* On old systems which lack it, use setlocale or getenv. */ | ||
| 348 | const char *locale = NULL; | ||
| 349 | |||
| 350 | /* But most old systems don't have a complete set of locales. Some | ||
| 351 | (like SunOS 4 or DJGPP) have only the C locale. Therefore we don't | ||
| 352 | use setlocale here; it would return "C" when it doesn't support the | ||
| 353 | locale name the user has set. */ | ||
| 354 | # if 0 | ||
| 355 | locale = setlocale (LC_CTYPE, NULL); | ||
| 356 | # endif | ||
| 357 | if (locale == NULL || locale[0] == '\0') | ||
| 358 | { | ||
| 359 | locale = getenv ("LC_ALL"); | ||
| 360 | if (locale == NULL || locale[0] == '\0') | ||
| 361 | { | ||
| 362 | locale = getenv ("LC_CTYPE"); | ||
| 363 | if (locale == NULL || locale[0] == '\0') | ||
| 364 | locale = getenv ("LANG"); | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 368 | /* On some old systems, one used to set locale = "iso8859_1". On others, | ||
| 369 | you set it to "language_COUNTRY.charset". In any case, we resolve it | ||
| 370 | through the charset.alias file. */ | ||
| 371 | codeset = locale; | ||
| 372 | |||
| 373 | # endif | ||
| 374 | |||
| 375 | #elif defined WIN32_NATIVE | ||
| 376 | |||
| 377 | static char buf[2 + 10 + 1]; | ||
| 378 | |||
| 379 | /* Woe32 has a function returning the locale's codepage as a number. */ | ||
| 380 | sprintf (buf, "CP%u", GetACP ()); | ||
| 381 | codeset = buf; | ||
| 382 | |||
| 383 | #elif defined OS2 | ||
| 384 | |||
| 385 | const char *locale; | ||
| 386 | static char buf[2 + 10 + 1]; | ||
| 387 | ULONG cp[3]; | ||
| 388 | ULONG cplen; | ||
| 389 | |||
| 390 | /* Allow user to override the codeset, as set in the operating system, | ||
| 391 | with standard language environment variables. */ | ||
| 392 | locale = getenv ("LC_ALL"); | ||
| 393 | if (locale == NULL || locale[0] == '\0') | ||
| 394 | { | ||
| 395 | locale = getenv ("LC_CTYPE"); | ||
| 396 | if (locale == NULL || locale[0] == '\0') | ||
| 397 | locale = getenv ("LANG"); | ||
| 398 | } | ||
| 399 | if (locale != NULL && locale[0] != '\0') | ||
| 400 | { | ||
| 401 | /* If the locale name contains an encoding after the dot, return it. */ | ||
| 402 | const char *dot = strchr (locale, '.'); | ||
| 403 | |||
| 404 | if (dot != NULL) | ||
| 405 | { | ||
| 406 | const char *modifier; | ||
| 407 | |||
| 408 | dot++; | ||
| 409 | /* Look for the possible @... trailer and remove it, if any. */ | ||
| 410 | modifier = strchr (dot, '@'); | ||
| 411 | if (modifier == NULL) | ||
| 412 | return dot; | ||
| 413 | if (modifier - dot < sizeof (buf)) | ||
| 414 | { | ||
| 415 | memcpy (buf, dot, modifier - dot); | ||
| 416 | buf [modifier - dot] = '\0'; | ||
| 417 | return buf; | ||
| 418 | } | ||
| 419 | } | ||
| 420 | |||
| 421 | /* Resolve through the charset.alias file. */ | ||
| 422 | codeset = locale; | ||
| 423 | } | ||
| 424 | else | ||
| 425 | { | ||
| 426 | /* OS/2 has a function returning the locale's codepage as a number. */ | ||
| 427 | if (DosQueryCp (sizeof (cp), cp, &cplen)) | ||
| 428 | codeset = ""; | ||
| 429 | else | ||
| 430 | { | ||
| 431 | sprintf (buf, "CP%u", cp[0]); | ||
| 432 | codeset = buf; | ||
| 433 | } | ||
| 434 | } | ||
| 435 | |||
| 436 | #endif | ||
| 437 | |||
| 438 | if (codeset == NULL) | ||
| 439 | /* The canonical name cannot be determined. */ | ||
| 440 | codeset = ""; | ||
| 441 | |||
| 442 | /* Resolve alias. */ | ||
| 443 | for (aliases = get_charset_aliases (); | ||
| 444 | *aliases != '\0'; | ||
| 445 | aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1) | ||
| 446 | if (strcmp (codeset, aliases) == 0 | ||
| 447 | || (aliases[0] == '*' && aliases[1] == '\0')) | ||
| 448 | { | ||
| 449 | codeset = aliases + strlen (aliases) + 1; | ||
| 450 | break; | ||
| 451 | } | ||
| 452 | |||
| 453 | /* Don't return an empty string. GNU libc and GNU libiconv interpret | ||
| 454 | the empty string as denoting "the locale's character encoding", | ||
| 455 | thus GNU libiconv would call this function a second time. */ | ||
| 456 | if (codeset[0] == '\0') | ||
| 457 | codeset = "ASCII"; | ||
| 458 | |||
| 459 | return codeset; | ||
| 460 | } | ||
