diff options
Diffstat (limited to 'gl/m4/mbrtoc32.m4')
| -rw-r--r-- | gl/m4/mbrtoc32.m4 | 326 |
1 files changed, 326 insertions, 0 deletions
diff --git a/gl/m4/mbrtoc32.m4 b/gl/m4/mbrtoc32.m4 new file mode 100644 index 00000000..1991529c --- /dev/null +++ b/gl/m4/mbrtoc32.m4 | |||
| @@ -0,0 +1,326 @@ | |||
| 1 | # mbrtoc32.m4 | ||
| 2 | # serial 21 | ||
| 3 | dnl Copyright (C) 2014-2025 Free Software Foundation, Inc. | ||
| 4 | dnl This file is free software; the Free Software Foundation | ||
| 5 | dnl gives unlimited permission to copy and/or distribute it, | ||
| 6 | dnl with or without modifications, as long as this notice is preserved. | ||
| 7 | dnl This file is offered as-is, without any warranty. | ||
| 8 | |||
| 9 | AC_DEFUN([gl_FUNC_MBRTOC32], | ||
| 10 | [ | ||
| 11 | AC_REQUIRE([gl_UCHAR_H_DEFAULTS]) | ||
| 12 | |||
| 13 | AC_REQUIRE([AC_TYPE_MBSTATE_T]) | ||
| 14 | dnl Determine REPLACE_MBSTATE_T, from which GNULIB_defined_mbstate_t is | ||
| 15 | dnl determined. It describes how our overridden mbrtowc is implemented. | ||
| 16 | dnl We then implement mbrtoc32 accordingly. | ||
| 17 | AC_REQUIRE([gl_MBSTATE_T_BROKEN]) | ||
| 18 | |||
| 19 | AC_REQUIRE([gl_TYPE_CHAR32_T]) | ||
| 20 | AC_REQUIRE([gl_MBRTOC32_SANITYCHECK]) | ||
| 21 | |||
| 22 | AC_REQUIRE([gl_CHECK_FUNC_MBRTOC32]) | ||
| 23 | if test $gl_cv_func_mbrtoc32 = no; then | ||
| 24 | HAVE_MBRTOC32=0 | ||
| 25 | else | ||
| 26 | if test $GNULIBHEADERS_OVERRIDE_CHAR32_T = 1 || test $REPLACE_MBSTATE_T = 1; then | ||
| 27 | REPLACE_MBRTOC32=1 | ||
| 28 | else | ||
| 29 | gl_MBRTOC32_EMPTY_INPUT | ||
| 30 | gl_MBRTOC32_C_LOCALE | ||
| 31 | gl_MBRTOC32_UTF8_LOCALE | ||
| 32 | case "$gl_cv_func_mbrtoc32_empty_input" in | ||
| 33 | *yes) ;; | ||
| 34 | *) AC_DEFINE([MBRTOC32_EMPTY_INPUT_BUG], [1], | ||
| 35 | [Define if the mbrtoc32 function does not return (size_t) -2 for empty input.]) | ||
| 36 | REPLACE_MBRTOC32=1 | ||
| 37 | ;; | ||
| 38 | esac | ||
| 39 | case "$gl_cv_func_mbrtoc32_C_locale_sans_EILSEQ" in | ||
| 40 | *yes) ;; | ||
| 41 | *) AC_DEFINE([MBRTOC32_IN_C_LOCALE_MAYBE_EILSEQ], [1], | ||
| 42 | [Define if the mbrtoc32 function may signal encoding errors in the C locale.]) | ||
| 43 | REPLACE_MBRTOC32=1 | ||
| 44 | ;; | ||
| 45 | esac | ||
| 46 | case "$gl_cv_func_mbrtoc32_utf8_locale_works" in | ||
| 47 | *yes) ;; | ||
| 48 | *) AC_DEFINE([MBRTOC32_MULTIBYTE_LOCALE_BUG], [1], | ||
| 49 | [Define if the mbrtoc32 function does not accept the input bytes one-by-one.]) | ||
| 50 | REPLACE_MBRTOC32=1 | ||
| 51 | dnl Our replacement mbrtoc32 can handle UTF-8, but not GB18030. | ||
| 52 | LOCALE_ZH_CN=none | ||
| 53 | ;; | ||
| 54 | esac | ||
| 55 | fi | ||
| 56 | if test $HAVE_WORKING_MBRTOC32 = 0; then | ||
| 57 | REPLACE_MBRTOC32=1 | ||
| 58 | fi | ||
| 59 | fi | ||
| 60 | ]) | ||
| 61 | |||
| 62 | AC_DEFUN([gl_CHECK_FUNC_MBRTOC32], | ||
| 63 | [ | ||
| 64 | dnl Cf. gl_CHECK_FUNCS_ANDROID | ||
| 65 | AC_CHECK_DECL([mbrtoc32], , , | ||
| 66 | [[#ifdef __HAIKU__ | ||
| 67 | #include <stdint.h> | ||
| 68 | #endif | ||
| 69 | #include <uchar.h> | ||
| 70 | ]]) | ||
| 71 | if test $ac_cv_have_decl_mbrtoc32 = yes; then | ||
| 72 | dnl We can't use AC_CHECK_FUNC here, because mbrtoc32() is defined as a | ||
| 73 | dnl static inline function on Haiku 2020. | ||
| 74 | AC_CACHE_CHECK([for mbrtoc32], [gl_cv_func_mbrtoc32], | ||
| 75 | [AC_LINK_IFELSE( | ||
| 76 | [AC_LANG_PROGRAM( | ||
| 77 | [[#include <stdlib.h> | ||
| 78 | #ifdef __HAIKU__ | ||
| 79 | #include <stdint.h> | ||
| 80 | #endif | ||
| 81 | #include <uchar.h> | ||
| 82 | ]], | ||
| 83 | [[char32_t c; | ||
| 84 | return mbrtoc32 (&c, "", 1, NULL) == 0; | ||
| 85 | ]]) | ||
| 86 | ], | ||
| 87 | [gl_cv_func_mbrtoc32=yes], | ||
| 88 | [gl_cv_func_mbrtoc32=no]) | ||
| 89 | ]) | ||
| 90 | else | ||
| 91 | gl_cv_func_mbrtoc32=no | ||
| 92 | fi | ||
| 93 | ]) | ||
| 94 | |||
| 95 | dnl Test whether mbrtoc32 returns the correct value on empty input. | ||
| 96 | |||
| 97 | AC_DEFUN([gl_MBRTOC32_EMPTY_INPUT], | ||
| 98 | [ | ||
| 99 | AC_REQUIRE([AC_PROG_CC]) | ||
| 100 | AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles | ||
| 101 | AC_CACHE_CHECK([whether mbrtoc32 works on empty input], | ||
| 102 | [gl_cv_func_mbrtoc32_empty_input], | ||
| 103 | [ | ||
| 104 | AC_RUN_IFELSE( | ||
| 105 | [AC_LANG_SOURCE([[ | ||
| 106 | #ifdef __HAIKU__ | ||
| 107 | #include <stdint.h> | ||
| 108 | #endif | ||
| 109 | #include <uchar.h> | ||
| 110 | static char32_t wc; | ||
| 111 | static mbstate_t mbs; | ||
| 112 | int | ||
| 113 | main (void) | ||
| 114 | { | ||
| 115 | return mbrtoc32 (&wc, "", 0, &mbs) != (size_t) -2; | ||
| 116 | }]])], | ||
| 117 | [gl_cv_func_mbrtoc32_empty_input=yes], | ||
| 118 | [gl_cv_func_mbrtoc32_empty_input=no], | ||
| 119 | [case "$host_os" in | ||
| 120 | # Guess no on glibc systems. | ||
| 121 | *-gnu* | gnu*) gl_cv_func_mbrtoc32_empty_input="guessing no" ;; | ||
| 122 | # Guess no on Android. | ||
| 123 | linux*-android*) gl_cv_func_mbrtoc32_empty_input="guessing no" ;; | ||
| 124 | # Guess no on native Windows. | ||
| 125 | mingw* | windows*) gl_cv_func_mbrtoc32_empty_input="guessing no" ;; | ||
| 126 | *) gl_cv_func_mbrtoc32_empty_input="guessing yes" ;; | ||
| 127 | esac | ||
| 128 | ]) | ||
| 129 | ]) | ||
| 130 | ]) | ||
| 131 | |||
| 132 | dnl <https://pubs.opengroup.org/onlinepubs/9699919799/functions/mbrtowc.html> | ||
| 133 | dnl POSIX:2018 says regarding mbrtowc: "In the POSIX locale an [EILSEQ] error | ||
| 134 | dnl cannot occur since all byte values are valid characters." It is reasonable | ||
| 135 | dnl to expect mbrtoc32 to behave in the same way. | ||
| 136 | |||
| 137 | AC_DEFUN([gl_MBRTOC32_C_LOCALE], | ||
| 138 | [ | ||
| 139 | AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles | ||
| 140 | AC_CACHE_CHECK([whether the C locale is free of encoding errors], | ||
| 141 | [gl_cv_func_mbrtoc32_C_locale_sans_EILSEQ], | ||
| 142 | [AC_RUN_IFELSE( | ||
| 143 | [AC_LANG_PROGRAM( | ||
| 144 | [[#include <limits.h> | ||
| 145 | #include <locale.h> | ||
| 146 | #ifdef __HAIKU__ | ||
| 147 | #include <stdint.h> | ||
| 148 | #endif | ||
| 149 | #include <uchar.h> | ||
| 150 | ]], [[ | ||
| 151 | int i; | ||
| 152 | char *locale = setlocale (LC_ALL, "C"); | ||
| 153 | if (! locale) | ||
| 154 | return 2; | ||
| 155 | for (i = CHAR_MIN; i <= CHAR_MAX; i++) | ||
| 156 | { | ||
| 157 | char c = i; | ||
| 158 | char32_t wc; | ||
| 159 | mbstate_t mbs = { 0, }; | ||
| 160 | size_t ss = mbrtoc32 (&wc, &c, 1, &mbs); | ||
| 161 | if (1 < ss) | ||
| 162 | return 3; | ||
| 163 | } | ||
| 164 | return 0; | ||
| 165 | ]])], | ||
| 166 | [gl_cv_func_mbrtoc32_C_locale_sans_EILSEQ=yes], | ||
| 167 | [gl_cv_func_mbrtoc32_C_locale_sans_EILSEQ=no], | ||
| 168 | [case "$host_os" in | ||
| 169 | # Guess yes on native Windows. | ||
| 170 | mingw* | windows*) gl_cv_func_mbrtoc32_C_locale_sans_EILSEQ="guessing yes" ;; | ||
| 171 | *) gl_cv_func_mbrtoc32_C_locale_sans_EILSEQ="$gl_cross_guess_normal" ;; | ||
| 172 | esac | ||
| 173 | ]) | ||
| 174 | ]) | ||
| 175 | ]) | ||
| 176 | |||
| 177 | dnl Test whether mbrtoc32 works when it's fed the bytes one-by-one in an UTF-8 | ||
| 178 | dnl locale. | ||
| 179 | |||
| 180 | AC_DEFUN([gl_MBRTOC32_UTF8_LOCALE], | ||
| 181 | [ | ||
| 182 | AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles | ||
| 183 | AC_CACHE_CHECK([whether mbrtoc32 works in an UTF-8 locale], | ||
| 184 | [gl_cv_func_mbrtoc32_utf8_locale_works], | ||
| 185 | [AC_RUN_IFELSE( | ||
| 186 | [AC_LANG_PROGRAM( | ||
| 187 | [[#include <locale.h> | ||
| 188 | #ifdef __HAIKU__ | ||
| 189 | #include <stdint.h> | ||
| 190 | #endif | ||
| 191 | #include <uchar.h> | ||
| 192 | ]], [[ | ||
| 193 | char *locale = setlocale (LC_ALL, "en_US.UTF-8"); | ||
| 194 | if (locale) | ||
| 195 | { | ||
| 196 | /* This test fails on Cygwin 3.5.3. */ | ||
| 197 | mbstate_t state = { 0, }; | ||
| 198 | char32_t uc = 0xDEADBEEF; | ||
| 199 | /* \360\237\220\203 = U+0001F403 */ | ||
| 200 | if (mbrtoc32 (&uc, "\360", 1, &state) != (size_t)-2) | ||
| 201 | return 1; | ||
| 202 | if (mbrtoc32 (&uc, "\237", 1, &state) != (size_t)-2) | ||
| 203 | return 2; | ||
| 204 | if (mbrtoc32 (&uc, "\220", 1, &state) != (size_t)-2) | ||
| 205 | return 3; | ||
| 206 | if (mbrtoc32 (&uc, "\203", 1, &state) != 1) | ||
| 207 | return 4; | ||
| 208 | if (uc != 0x0001F403) | ||
| 209 | return 5; | ||
| 210 | } | ||
| 211 | return 0; | ||
| 212 | ]])], | ||
| 213 | [gl_cv_func_mbrtoc32_utf8_locale_works=yes], | ||
| 214 | [gl_cv_func_mbrtoc32_utf8_locale_works=no], | ||
| 215 | [case "$host_os" in | ||
| 216 | # Guess no on Cygwin. | ||
| 217 | cygwin*) gl_cv_func_mbrtoc32_utf8_locale_works="guessing no" ;; | ||
| 218 | *) gl_cv_func_mbrtoc32_utf8_locale_works="$gl_cross_guess_normal" ;; | ||
| 219 | esac | ||
| 220 | ]) | ||
| 221 | ]) | ||
| 222 | ]) | ||
| 223 | |||
| 224 | dnl Test whether mbrtoc32 works not worse than mbrtowc. | ||
| 225 | dnl Result is HAVE_WORKING_MBRTOC32. | ||
| 226 | |||
| 227 | AC_DEFUN([gl_MBRTOC32_SANITYCHECK], | ||
| 228 | [ | ||
| 229 | AC_REQUIRE([AC_PROG_CC]) | ||
| 230 | AC_REQUIRE([gl_TYPE_CHAR32_T]) | ||
| 231 | AC_REQUIRE([gl_CHECK_FUNC_MBRTOC32]) | ||
| 232 | AC_REQUIRE([gt_LOCALE_FR]) | ||
| 233 | AC_REQUIRE([gt_LOCALE_ZH_CN]) | ||
| 234 | AC_REQUIRE([AC_CANONICAL_HOST]) | ||
| 235 | if test $GNULIBHEADERS_OVERRIDE_CHAR32_T = 1 || test $gl_cv_func_mbrtoc32 = no; then | ||
| 236 | HAVE_WORKING_MBRTOC32=0 | ||
| 237 | else | ||
| 238 | AC_CACHE_CHECK([whether mbrtoc32 works as well as mbrtowc], | ||
| 239 | [gl_cv_func_mbrtoc32_sanitycheck], | ||
| 240 | [ | ||
| 241 | dnl Initial guess, used when cross-compiling or when no suitable locale | ||
| 242 | dnl is present. | ||
| 243 | changequote(,)dnl | ||
| 244 | case "$host_os" in | ||
| 245 | # Guess no on FreeBSD, Solaris, native Windows. | ||
| 246 | freebsd* | midnightbsd* | solaris* | mingw* | windows*) | ||
| 247 | gl_cv_func_mbrtoc32_sanitycheck="guessing no" | ||
| 248 | ;; | ||
| 249 | # Guess yes otherwise. | ||
| 250 | *) | ||
| 251 | gl_cv_func_mbrtoc32_sanitycheck="guessing yes" | ||
| 252 | ;; | ||
| 253 | esac | ||
| 254 | changequote([,])dnl | ||
| 255 | if test $LOCALE_FR != none || test $LOCALE_ZH_CN != none; then | ||
| 256 | AC_RUN_IFELSE( | ||
| 257 | [AC_LANG_SOURCE([[ | ||
| 258 | #include <locale.h> | ||
| 259 | #include <stdlib.h> | ||
| 260 | #include <string.h> | ||
| 261 | #include <wchar.h> | ||
| 262 | #ifdef __HAIKU__ | ||
| 263 | #include <stdint.h> | ||
| 264 | #endif | ||
| 265 | #include <uchar.h> | ||
| 266 | int main () | ||
| 267 | { | ||
| 268 | int result = 0; | ||
| 269 | /* This fails on native Windows: | ||
| 270 | mbrtoc32 returns (size_t)-1. | ||
| 271 | mbrtowc returns 1 (correct). */ | ||
| 272 | if (strcmp ("$LOCALE_FR", "none") != 0 | ||
| 273 | && setlocale (LC_ALL, "$LOCALE_FR") != NULL) | ||
| 274 | { | ||
| 275 | mbstate_t state; | ||
| 276 | wchar_t wc = (wchar_t) 0xBADFACE; | ||
| 277 | memset (&state, '\0', sizeof (mbstate_t)); | ||
| 278 | if (mbrtowc (&wc, "\374", 1, &state) == 1) | ||
| 279 | { | ||
| 280 | char32_t c32 = (wchar_t) 0xBADFACE; | ||
| 281 | memset (&state, '\0', sizeof (mbstate_t)); | ||
| 282 | if (mbrtoc32 (&c32, "\374", 1, &state) != 1) | ||
| 283 | result |= 1; | ||
| 284 | } | ||
| 285 | } | ||
| 286 | /* This fails on FreeBSD 13.0 and Solaris 11.4: | ||
| 287 | mbrtoc32 returns (size_t)-2 or (size_t)-1. | ||
| 288 | mbrtowc returns 4 (correct). */ | ||
| 289 | if (strcmp ("$LOCALE_ZH_CN", "none") != 0 | ||
| 290 | && setlocale (LC_ALL, "$LOCALE_ZH_CN") != NULL) | ||
| 291 | { | ||
| 292 | mbstate_t state; | ||
| 293 | wchar_t wc = (wchar_t) 0xBADFACE; | ||
| 294 | memset (&state, '\0', sizeof (mbstate_t)); | ||
| 295 | if (mbrtowc (&wc, "\224\071\375\067", 4, &state) == 4) | ||
| 296 | { | ||
| 297 | char32_t c32 = (wchar_t) 0xBADFACE; | ||
| 298 | memset (&state, '\0', sizeof (mbstate_t)); | ||
| 299 | if (mbrtoc32 (&c32, "\224\071\375\067", 4, &state) != 4) | ||
| 300 | result |= 2; | ||
| 301 | } | ||
| 302 | } | ||
| 303 | return result; | ||
| 304 | }]])], | ||
| 305 | [gl_cv_func_mbrtoc32_sanitycheck=yes], | ||
| 306 | [gl_cv_func_mbrtoc32_sanitycheck=no], | ||
| 307 | [:]) | ||
| 308 | fi | ||
| 309 | ]) | ||
| 310 | case "$gl_cv_func_mbrtoc32_sanitycheck" in | ||
| 311 | *yes) | ||
| 312 | HAVE_WORKING_MBRTOC32=1 | ||
| 313 | AC_DEFINE([HAVE_WORKING_MBRTOC32], [1], | ||
| 314 | [Define if the mbrtoc32 function basically works.]) | ||
| 315 | ;; | ||
| 316 | *) HAVE_WORKING_MBRTOC32=0 ;; | ||
| 317 | esac | ||
| 318 | fi | ||
| 319 | AC_SUBST([HAVE_WORKING_MBRTOC32]) | ||
| 320 | ]) | ||
| 321 | |||
| 322 | # Prerequisites of lib/mbrtoc32.c and lib/lc-charset-dispatch.c. | ||
| 323 | AC_DEFUN([gl_PREREQ_MBRTOC32], [ | ||
| 324 | AC_REQUIRE([gl_C32RTOMB_SANITYCHECK]) | ||
| 325 | : | ||
| 326 | ]) | ||
