summaryrefslogtreecommitdiffstats
path: root/gl/m4/mbrtoc32.m4
diff options
context:
space:
mode:
Diffstat (limited to 'gl/m4/mbrtoc32.m4')
-rw-r--r--gl/m4/mbrtoc32.m4326
1 files changed, 326 insertions, 0 deletions
diff --git a/gl/m4/mbrtoc32.m4 b/gl/m4/mbrtoc32.m4
new file mode 100644
index 00000000..1991529c
--- /dev/null
+++ b/gl/m4/mbrtoc32.m4
@@ -0,0 +1,326 @@
1# mbrtoc32.m4
2# serial 21
3dnl Copyright (C) 2014-2025 Free Software Foundation, Inc.
4dnl This file is free software; the Free Software Foundation
5dnl gives unlimited permission to copy and/or distribute it,
6dnl with or without modifications, as long as this notice is preserved.
7dnl This file is offered as-is, without any warranty.
8
9AC_DEFUN([gl_FUNC_MBRTOC32],
10[
11 AC_REQUIRE([gl_UCHAR_H_DEFAULTS])
12
13 AC_REQUIRE([AC_TYPE_MBSTATE_T])
14 dnl Determine REPLACE_MBSTATE_T, from which GNULIB_defined_mbstate_t is
15 dnl determined. It describes how our overridden mbrtowc is implemented.
16 dnl We then implement mbrtoc32 accordingly.
17 AC_REQUIRE([gl_MBSTATE_T_BROKEN])
18
19 AC_REQUIRE([gl_TYPE_CHAR32_T])
20 AC_REQUIRE([gl_MBRTOC32_SANITYCHECK])
21
22 AC_REQUIRE([gl_CHECK_FUNC_MBRTOC32])
23 if test $gl_cv_func_mbrtoc32 = no; then
24 HAVE_MBRTOC32=0
25 else
26 if test $GNULIBHEADERS_OVERRIDE_CHAR32_T = 1 || test $REPLACE_MBSTATE_T = 1; then
27 REPLACE_MBRTOC32=1
28 else
29 gl_MBRTOC32_EMPTY_INPUT
30 gl_MBRTOC32_C_LOCALE
31 gl_MBRTOC32_UTF8_LOCALE
32 case "$gl_cv_func_mbrtoc32_empty_input" in
33 *yes) ;;
34 *) AC_DEFINE([MBRTOC32_EMPTY_INPUT_BUG], [1],
35 [Define if the mbrtoc32 function does not return (size_t) -2 for empty input.])
36 REPLACE_MBRTOC32=1
37 ;;
38 esac
39 case "$gl_cv_func_mbrtoc32_C_locale_sans_EILSEQ" in
40 *yes) ;;
41 *) AC_DEFINE([MBRTOC32_IN_C_LOCALE_MAYBE_EILSEQ], [1],
42 [Define if the mbrtoc32 function may signal encoding errors in the C locale.])
43 REPLACE_MBRTOC32=1
44 ;;
45 esac
46 case "$gl_cv_func_mbrtoc32_utf8_locale_works" in
47 *yes) ;;
48 *) AC_DEFINE([MBRTOC32_MULTIBYTE_LOCALE_BUG], [1],
49 [Define if the mbrtoc32 function does not accept the input bytes one-by-one.])
50 REPLACE_MBRTOC32=1
51 dnl Our replacement mbrtoc32 can handle UTF-8, but not GB18030.
52 LOCALE_ZH_CN=none
53 ;;
54 esac
55 fi
56 if test $HAVE_WORKING_MBRTOC32 = 0; then
57 REPLACE_MBRTOC32=1
58 fi
59 fi
60])
61
62AC_DEFUN([gl_CHECK_FUNC_MBRTOC32],
63[
64 dnl Cf. gl_CHECK_FUNCS_ANDROID
65 AC_CHECK_DECL([mbrtoc32], , ,
66 [[#ifdef __HAIKU__
67 #include <stdint.h>
68 #endif
69 #include <uchar.h>
70 ]])
71 if test $ac_cv_have_decl_mbrtoc32 = yes; then
72 dnl We can't use AC_CHECK_FUNC here, because mbrtoc32() is defined as a
73 dnl static inline function on Haiku 2020.
74 AC_CACHE_CHECK([for mbrtoc32], [gl_cv_func_mbrtoc32],
75 [AC_LINK_IFELSE(
76 [AC_LANG_PROGRAM(
77 [[#include <stdlib.h>
78 #ifdef __HAIKU__
79 #include <stdint.h>
80 #endif
81 #include <uchar.h>
82 ]],
83 [[char32_t c;
84 return mbrtoc32 (&c, "", 1, NULL) == 0;
85 ]])
86 ],
87 [gl_cv_func_mbrtoc32=yes],
88 [gl_cv_func_mbrtoc32=no])
89 ])
90 else
91 gl_cv_func_mbrtoc32=no
92 fi
93])
94
95dnl Test whether mbrtoc32 returns the correct value on empty input.
96
97AC_DEFUN([gl_MBRTOC32_EMPTY_INPUT],
98[
99 AC_REQUIRE([AC_PROG_CC])
100 AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
101 AC_CACHE_CHECK([whether mbrtoc32 works on empty input],
102 [gl_cv_func_mbrtoc32_empty_input],
103 [
104 AC_RUN_IFELSE(
105 [AC_LANG_SOURCE([[
106 #ifdef __HAIKU__
107 #include <stdint.h>
108 #endif
109 #include <uchar.h>
110 static char32_t wc;
111 static mbstate_t mbs;
112 int
113 main (void)
114 {
115 return mbrtoc32 (&wc, "", 0, &mbs) != (size_t) -2;
116 }]])],
117 [gl_cv_func_mbrtoc32_empty_input=yes],
118 [gl_cv_func_mbrtoc32_empty_input=no],
119 [case "$host_os" in
120 # Guess no on glibc systems.
121 *-gnu* | gnu*) gl_cv_func_mbrtoc32_empty_input="guessing no" ;;
122 # Guess no on Android.
123 linux*-android*) gl_cv_func_mbrtoc32_empty_input="guessing no" ;;
124 # Guess no on native Windows.
125 mingw* | windows*) gl_cv_func_mbrtoc32_empty_input="guessing no" ;;
126 *) gl_cv_func_mbrtoc32_empty_input="guessing yes" ;;
127 esac
128 ])
129 ])
130])
131
132dnl <https://pubs.opengroup.org/onlinepubs/9699919799/functions/mbrtowc.html>
133dnl POSIX:2018 says regarding mbrtowc: "In the POSIX locale an [EILSEQ] error
134dnl cannot occur since all byte values are valid characters." It is reasonable
135dnl to expect mbrtoc32 to behave in the same way.
136
137AC_DEFUN([gl_MBRTOC32_C_LOCALE],
138[
139 AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
140 AC_CACHE_CHECK([whether the C locale is free of encoding errors],
141 [gl_cv_func_mbrtoc32_C_locale_sans_EILSEQ],
142 [AC_RUN_IFELSE(
143 [AC_LANG_PROGRAM(
144 [[#include <limits.h>
145 #include <locale.h>
146 #ifdef __HAIKU__
147 #include <stdint.h>
148 #endif
149 #include <uchar.h>
150 ]], [[
151 int i;
152 char *locale = setlocale (LC_ALL, "C");
153 if (! locale)
154 return 2;
155 for (i = CHAR_MIN; i <= CHAR_MAX; i++)
156 {
157 char c = i;
158 char32_t wc;
159 mbstate_t mbs = { 0, };
160 size_t ss = mbrtoc32 (&wc, &c, 1, &mbs);
161 if (1 < ss)
162 return 3;
163 }
164 return 0;
165 ]])],
166 [gl_cv_func_mbrtoc32_C_locale_sans_EILSEQ=yes],
167 [gl_cv_func_mbrtoc32_C_locale_sans_EILSEQ=no],
168 [case "$host_os" in
169 # Guess yes on native Windows.
170 mingw* | windows*) gl_cv_func_mbrtoc32_C_locale_sans_EILSEQ="guessing yes" ;;
171 *) gl_cv_func_mbrtoc32_C_locale_sans_EILSEQ="$gl_cross_guess_normal" ;;
172 esac
173 ])
174 ])
175])
176
177dnl Test whether mbrtoc32 works when it's fed the bytes one-by-one in an UTF-8
178dnl locale.
179
180AC_DEFUN([gl_MBRTOC32_UTF8_LOCALE],
181[
182 AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
183 AC_CACHE_CHECK([whether mbrtoc32 works in an UTF-8 locale],
184 [gl_cv_func_mbrtoc32_utf8_locale_works],
185 [AC_RUN_IFELSE(
186 [AC_LANG_PROGRAM(
187 [[#include <locale.h>
188 #ifdef __HAIKU__
189 #include <stdint.h>
190 #endif
191 #include <uchar.h>
192 ]], [[
193 char *locale = setlocale (LC_ALL, "en_US.UTF-8");
194 if (locale)
195 {
196 /* This test fails on Cygwin 3.5.3. */
197 mbstate_t state = { 0, };
198 char32_t uc = 0xDEADBEEF;
199 /* \360\237\220\203 = U+0001F403 */
200 if (mbrtoc32 (&uc, "\360", 1, &state) != (size_t)-2)
201 return 1;
202 if (mbrtoc32 (&uc, "\237", 1, &state) != (size_t)-2)
203 return 2;
204 if (mbrtoc32 (&uc, "\220", 1, &state) != (size_t)-2)
205 return 3;
206 if (mbrtoc32 (&uc, "\203", 1, &state) != 1)
207 return 4;
208 if (uc != 0x0001F403)
209 return 5;
210 }
211 return 0;
212 ]])],
213 [gl_cv_func_mbrtoc32_utf8_locale_works=yes],
214 [gl_cv_func_mbrtoc32_utf8_locale_works=no],
215 [case "$host_os" in
216 # Guess no on Cygwin.
217 cygwin*) gl_cv_func_mbrtoc32_utf8_locale_works="guessing no" ;;
218 *) gl_cv_func_mbrtoc32_utf8_locale_works="$gl_cross_guess_normal" ;;
219 esac
220 ])
221 ])
222])
223
224dnl Test whether mbrtoc32 works not worse than mbrtowc.
225dnl Result is HAVE_WORKING_MBRTOC32.
226
227AC_DEFUN([gl_MBRTOC32_SANITYCHECK],
228[
229 AC_REQUIRE([AC_PROG_CC])
230 AC_REQUIRE([gl_TYPE_CHAR32_T])
231 AC_REQUIRE([gl_CHECK_FUNC_MBRTOC32])
232 AC_REQUIRE([gt_LOCALE_FR])
233 AC_REQUIRE([gt_LOCALE_ZH_CN])
234 AC_REQUIRE([AC_CANONICAL_HOST])
235 if test $GNULIBHEADERS_OVERRIDE_CHAR32_T = 1 || test $gl_cv_func_mbrtoc32 = no; then
236 HAVE_WORKING_MBRTOC32=0
237 else
238 AC_CACHE_CHECK([whether mbrtoc32 works as well as mbrtowc],
239 [gl_cv_func_mbrtoc32_sanitycheck],
240 [
241 dnl Initial guess, used when cross-compiling or when no suitable locale
242 dnl is present.
243changequote(,)dnl
244 case "$host_os" in
245 # Guess no on FreeBSD, Solaris, native Windows.
246 freebsd* | midnightbsd* | solaris* | mingw* | windows*)
247 gl_cv_func_mbrtoc32_sanitycheck="guessing no"
248 ;;
249 # Guess yes otherwise.
250 *)
251 gl_cv_func_mbrtoc32_sanitycheck="guessing yes"
252 ;;
253 esac
254changequote([,])dnl
255 if test $LOCALE_FR != none || test $LOCALE_ZH_CN != none; then
256 AC_RUN_IFELSE(
257 [AC_LANG_SOURCE([[
258#include <locale.h>
259#include <stdlib.h>
260#include <string.h>
261#include <wchar.h>
262#ifdef __HAIKU__
263 #include <stdint.h>
264#endif
265#include <uchar.h>
266int main ()
267{
268 int result = 0;
269 /* This fails on native Windows:
270 mbrtoc32 returns (size_t)-1.
271 mbrtowc returns 1 (correct). */
272 if (strcmp ("$LOCALE_FR", "none") != 0
273 && setlocale (LC_ALL, "$LOCALE_FR") != NULL)
274 {
275 mbstate_t state;
276 wchar_t wc = (wchar_t) 0xBADFACE;
277 memset (&state, '\0', sizeof (mbstate_t));
278 if (mbrtowc (&wc, "\374", 1, &state) == 1)
279 {
280 char32_t c32 = (wchar_t) 0xBADFACE;
281 memset (&state, '\0', sizeof (mbstate_t));
282 if (mbrtoc32 (&c32, "\374", 1, &state) != 1)
283 result |= 1;
284 }
285 }
286 /* This fails on FreeBSD 13.0 and Solaris 11.4:
287 mbrtoc32 returns (size_t)-2 or (size_t)-1.
288 mbrtowc returns 4 (correct). */
289 if (strcmp ("$LOCALE_ZH_CN", "none") != 0
290 && setlocale (LC_ALL, "$LOCALE_ZH_CN") != NULL)
291 {
292 mbstate_t state;
293 wchar_t wc = (wchar_t) 0xBADFACE;
294 memset (&state, '\0', sizeof (mbstate_t));
295 if (mbrtowc (&wc, "\224\071\375\067", 4, &state) == 4)
296 {
297 char32_t c32 = (wchar_t) 0xBADFACE;
298 memset (&state, '\0', sizeof (mbstate_t));
299 if (mbrtoc32 (&c32, "\224\071\375\067", 4, &state) != 4)
300 result |= 2;
301 }
302 }
303 return result;
304}]])],
305 [gl_cv_func_mbrtoc32_sanitycheck=yes],
306 [gl_cv_func_mbrtoc32_sanitycheck=no],
307 [:])
308 fi
309 ])
310 case "$gl_cv_func_mbrtoc32_sanitycheck" in
311 *yes)
312 HAVE_WORKING_MBRTOC32=1
313 AC_DEFINE([HAVE_WORKING_MBRTOC32], [1],
314 [Define if the mbrtoc32 function basically works.])
315 ;;
316 *) HAVE_WORKING_MBRTOC32=0 ;;
317 esac
318 fi
319 AC_SUBST([HAVE_WORKING_MBRTOC32])
320])
321
322# Prerequisites of lib/mbrtoc32.c and lib/lc-charset-dispatch.c.
323AC_DEFUN([gl_PREREQ_MBRTOC32], [
324 AC_REQUIRE([gl_C32RTOMB_SANITYCHECK])
325 :
326])