summaryrefslogtreecommitdiffstats
path: root/gl/unicase.h
diff options
context:
space:
mode:
Diffstat (limited to 'gl/unicase.h')
-rw-r--r--gl/unicase.h472
1 files changed, 472 insertions, 0 deletions
diff --git a/gl/unicase.h b/gl/unicase.h
new file mode 100644
index 00000000..507a83d0
--- /dev/null
+++ b/gl/unicase.h
@@ -0,0 +1,472 @@
1/* DO NOT EDIT! GENERATED AUTOMATICALLY! */
2/* Unicode character case mappings.
3 Copyright (C) 2002, 2009-2025 Free Software Foundation, Inc.
4
5 This file is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation; either version 2.1 of the
8 License, or (at your option) any later version.
9
10 This file is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18#ifndef _UNICASE_H
19#define _UNICASE_H
20
21#include "unitypes.h"
22
23/* Get bool. */
24#include <stdbool.h>
25
26/* Get size_t. */
27#include <stddef.h>
28
29/* Get uninorm_t. */
30#include "uninorm.h"
31
32#if 0
33# include <unistring/woe32dll.h>
34#else
35# define LIBUNISTRING_DLL_VARIABLE
36#endif
37
38#ifdef __cplusplus
39extern "C" {
40#endif
41
42/* ========================================================================= */
43
44/* Character case mappings.
45 These mappings are locale and context independent.
46 WARNING! These functions are not sufficient for languages such as German.
47 Better use the functions below that treat an entire string at once and are
48 language aware. */
49
50/* Return the uppercase mapping of a Unicode character. */
51extern ucs4_t
52 uc_toupper (ucs4_t uc)
53 _UC_ATTRIBUTE_CONST;
54
55/* Return the lowercase mapping of a Unicode character. */
56extern ucs4_t
57 uc_tolower (ucs4_t uc)
58 _UC_ATTRIBUTE_CONST;
59
60/* Return the titlecase mapping of a Unicode character. */
61extern ucs4_t
62 uc_totitle (ucs4_t uc)
63 _UC_ATTRIBUTE_CONST;
64
65/* ========================================================================= */
66
67/* String case mappings. */
68
69/* These functions are locale dependent. The iso639_language argument
70 identifies the language (e.g. "tr" for Turkish). NULL means to use
71 locale independent case mappings. */
72
73/* Return the ISO 639 language code of the current locale.
74 Return "" if it is unknown, or in the "C" locale. */
75extern const char *
76 uc_locale_language (void)
77 _UC_ATTRIBUTE_PURE;
78
79/* Conventions:
80
81 All functions prefixed with u8_ operate on UTF-8 encoded strings.
82 Their unit is an uint8_t (1 byte).
83
84 All functions prefixed with u16_ operate on UTF-16 encoded strings.
85 Their unit is an uint16_t (a 2-byte word).
86
87 All functions prefixed with u32_ operate on UCS-4 encoded strings.
88 Their unit is an uint32_t (a 4-byte word).
89
90 All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
91 n units.
92
93 Functions returning a string result take a (resultbuf, lengthp) argument
94 pair. If resultbuf is not NULL and the result fits into *lengthp units,
95 it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly
96 allocated string is returned. In both cases, *lengthp is set to the
97 length (number of units) of the returned string. In case of error,
98 NULL is returned and errno is set. */
99
100/* Return the uppercase mapping of a string.
101 The nf argument identifies the normalization form to apply after the
102 case-mapping. It can also be NULL, for no normalization. */
103extern uint8_t *
104 u8_toupper (const uint8_t *s, size_t n, const char *iso639_language,
105 uninorm_t nf,
106 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
107extern uint16_t *
108 u16_toupper (const uint16_t *s, size_t n, const char *iso639_language,
109 uninorm_t nf,
110 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
111extern uint32_t *
112 u32_toupper (const uint32_t *s, size_t n, const char *iso639_language,
113 uninorm_t nf,
114 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
115
116/* Return the lowercase mapping of a string.
117 The nf argument identifies the normalization form to apply after the
118 case-mapping. It can also be NULL, for no normalization. */
119extern uint8_t *
120 u8_tolower (const uint8_t *s, size_t n, const char *iso639_language,
121 uninorm_t nf,
122 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
123extern uint16_t *
124 u16_tolower (const uint16_t *s, size_t n, const char *iso639_language,
125 uninorm_t nf,
126 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
127extern uint32_t *
128 u32_tolower (const uint32_t *s, size_t n, const char *iso639_language,
129 uninorm_t nf,
130 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
131
132/* Return the titlecase mapping of a string.
133 The nf argument identifies the normalization form to apply after the
134 case-mapping. It can also be NULL, for no normalization. */
135extern uint8_t *
136 u8_totitle (const uint8_t *s, size_t n, const char *iso639_language,
137 uninorm_t nf,
138 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
139extern uint16_t *
140 u16_totitle (const uint16_t *s, size_t n, const char *iso639_language,
141 uninorm_t nf,
142 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
143extern uint32_t *
144 u32_totitle (const uint32_t *s, size_t n, const char *iso639_language,
145 uninorm_t nf,
146 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
147
148/* The case-mapping context given by a prefix string. */
149typedef struct casing_prefix_context
150 {
151 /* These fields are private, undocumented. */
152 uint32_t last_char_except_ignorable;
153 uint32_t last_char_normal_or_above;
154 }
155 casing_prefix_context_t;
156/* The case-mapping context of the empty prefix string. */
157extern LIBUNISTRING_DLL_VARIABLE const casing_prefix_context_t unicase_empty_prefix_context;
158/* Return the case-mapping context of a given prefix string. */
159extern casing_prefix_context_t
160 u8_casing_prefix_context (const uint8_t *s, size_t n);
161extern casing_prefix_context_t
162 u16_casing_prefix_context (const uint16_t *s, size_t n);
163extern casing_prefix_context_t
164 u32_casing_prefix_context (const uint32_t *s, size_t n);
165/* Return the case-mapping context of the prefix concat(A, S), given the
166 case-mapping context of the prefix A. */
167extern casing_prefix_context_t
168 u8_casing_prefixes_context (const uint8_t *s, size_t n,
169 casing_prefix_context_t a_context);
170extern casing_prefix_context_t
171 u16_casing_prefixes_context (const uint16_t *s, size_t n,
172 casing_prefix_context_t a_context);
173extern casing_prefix_context_t
174 u32_casing_prefixes_context (const uint32_t *s, size_t n,
175 casing_prefix_context_t a_context);
176
177/* The case-mapping context given by a suffix string. */
178typedef struct casing_suffix_context
179 {
180 /* These fields are private, undocumented. */
181 uint32_t first_char_except_ignorable;
182 uint32_t bits;
183 }
184 casing_suffix_context_t;
185/* The case-mapping context of the empty suffix string. */
186extern LIBUNISTRING_DLL_VARIABLE const casing_suffix_context_t unicase_empty_suffix_context;
187/* Return the case-mapping context of a given suffix string. */
188extern casing_suffix_context_t
189 u8_casing_suffix_context (const uint8_t *s, size_t n);
190extern casing_suffix_context_t
191 u16_casing_suffix_context (const uint16_t *s, size_t n);
192extern casing_suffix_context_t
193 u32_casing_suffix_context (const uint32_t *s, size_t n);
194/* Return the case-mapping context of the suffix concat(S, A), given the
195 case-mapping context of the suffix A. */
196extern casing_suffix_context_t
197 u8_casing_suffixes_context (const uint8_t *s, size_t n,
198 casing_suffix_context_t a_context);
199extern casing_suffix_context_t
200 u16_casing_suffixes_context (const uint16_t *s, size_t n,
201 casing_suffix_context_t a_context);
202extern casing_suffix_context_t
203 u32_casing_suffixes_context (const uint32_t *s, size_t n,
204 casing_suffix_context_t a_context);
205
206/* Return the uppercase mapping of a string that is surrounded by a prefix
207 and a suffix. */
208extern uint8_t *
209 u8_ct_toupper (const uint8_t *s, size_t n,
210 casing_prefix_context_t prefix_context,
211 casing_suffix_context_t suffix_context,
212 const char *iso639_language,
213 uninorm_t nf,
214 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
215extern uint16_t *
216 u16_ct_toupper (const uint16_t *s, size_t n,
217 casing_prefix_context_t prefix_context,
218 casing_suffix_context_t suffix_context,
219 const char *iso639_language,
220 uninorm_t nf,
221 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
222extern uint32_t *
223 u32_ct_toupper (const uint32_t *s, size_t n,
224 casing_prefix_context_t prefix_context,
225 casing_suffix_context_t suffix_context,
226 const char *iso639_language,
227 uninorm_t nf,
228 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
229
230/* Return the lowercase mapping of a string that is surrounded by a prefix
231 and a suffix. */
232extern uint8_t *
233 u8_ct_tolower (const uint8_t *s, size_t n,
234 casing_prefix_context_t prefix_context,
235 casing_suffix_context_t suffix_context,
236 const char *iso639_language,
237 uninorm_t nf,
238 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
239extern uint16_t *
240 u16_ct_tolower (const uint16_t *s, size_t n,
241 casing_prefix_context_t prefix_context,
242 casing_suffix_context_t suffix_context,
243 const char *iso639_language,
244 uninorm_t nf,
245 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
246extern uint32_t *
247 u32_ct_tolower (const uint32_t *s, size_t n,
248 casing_prefix_context_t prefix_context,
249 casing_suffix_context_t suffix_context,
250 const char *iso639_language,
251 uninorm_t nf,
252 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
253
254/* Return the titlecase mapping of a string that is surrounded by a prefix
255 and a suffix. */
256extern uint8_t *
257 u8_ct_totitle (const uint8_t *s, size_t n,
258 casing_prefix_context_t prefix_context,
259 casing_suffix_context_t suffix_context,
260 const char *iso639_language,
261 uninorm_t nf,
262 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
263extern uint16_t *
264 u16_ct_totitle (const uint16_t *s, size_t n,
265 casing_prefix_context_t prefix_context,
266 casing_suffix_context_t suffix_context,
267 const char *iso639_language,
268 uninorm_t nf,
269 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
270extern uint32_t *
271 u32_ct_totitle (const uint32_t *s, size_t n,
272 casing_prefix_context_t prefix_context,
273 casing_suffix_context_t suffix_context,
274 const char *iso639_language,
275 uninorm_t nf,
276 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
277
278/* Return the case folded string.
279 Comparing uN_casefold (S1) and uN_casefold (S2) with uN_cmp2() is equivalent
280 to comparing S1 and S2 with uN_casecmp().
281 The nf argument identifies the normalization form to apply after the
282 case-mapping. It can also be NULL, for no normalization. */
283extern uint8_t *
284 u8_casefold (const uint8_t *s, size_t n, const char *iso639_language,
285 uninorm_t nf,
286 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
287extern uint16_t *
288 u16_casefold (const uint16_t *s, size_t n, const char *iso639_language,
289 uninorm_t nf,
290 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
291extern uint32_t *
292 u32_casefold (const uint32_t *s, size_t n, const char *iso639_language,
293 uninorm_t nf,
294 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
295/* Likewise, for a string that is surrounded by a prefix and a suffix. */
296extern uint8_t *
297 u8_ct_casefold (const uint8_t *s, size_t n,
298 casing_prefix_context_t prefix_context,
299 casing_suffix_context_t suffix_context,
300 const char *iso639_language,
301 uninorm_t nf,
302 uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
303extern uint16_t *
304 u16_ct_casefold (const uint16_t *s, size_t n,
305 casing_prefix_context_t prefix_context,
306 casing_suffix_context_t suffix_context,
307 const char *iso639_language,
308 uninorm_t nf,
309 uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
310extern uint32_t *
311 u32_ct_casefold (const uint32_t *s, size_t n,
312 casing_prefix_context_t prefix_context,
313 casing_suffix_context_t suffix_context,
314 const char *iso639_language,
315 uninorm_t nf,
316 uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
317
318/* Compare S1 and S2, ignoring differences in case and normalization.
319 The nf argument identifies the normalization form to apply after the
320 case-mapping. It can also be NULL, for no normalization.
321 If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
322 return 0. Upon failure, return -1 with errno set. */
323extern int
324 u8_casecmp (const uint8_t *s1, size_t n1,
325 const uint8_t *s2, size_t n2,
326 const char *iso639_language, uninorm_t nf, int *resultp);
327extern int
328 u16_casecmp (const uint16_t *s1, size_t n1,
329 const uint16_t *s2, size_t n2,
330 const char *iso639_language, uninorm_t nf, int *resultp);
331extern int
332 u32_casecmp (const uint32_t *s1, size_t n1,
333 const uint32_t *s2, size_t n2,
334 const char *iso639_language, uninorm_t nf, int *resultp);
335extern int
336 ulc_casecmp (const char *s1, size_t n1,
337 const char *s2, size_t n2,
338 const char *iso639_language, uninorm_t nf, int *resultp);
339
340/* Convert the string S of length N to a NUL-terminated byte sequence, in such
341 a way that comparing uN_casexfrm (S1) and uN_casexfrm (S2) with the gnulib
342 function memcmp2() is equivalent to comparing S1 and S2 with uN_casecoll().
343 NF must be either UNINORM_NFC, UNINORM_NFKC, or NULL for no normalization. */
344extern char *
345 u8_casexfrm (const uint8_t *s, size_t n, const char *iso639_language,
346 uninorm_t nf,
347 char *_UC_RESTRICT resultbuf, size_t *lengthp);
348extern char *
349 u16_casexfrm (const uint16_t *s, size_t n, const char *iso639_language,
350 uninorm_t nf,
351 char *_UC_RESTRICT resultbuf, size_t *lengthp);
352extern char *
353 u32_casexfrm (const uint32_t *s, size_t n, const char *iso639_language,
354 uninorm_t nf,
355 char *_UC_RESTRICT resultbuf, size_t *lengthp);
356extern char *
357 ulc_casexfrm (const char *s, size_t n, const char *iso639_language,
358 uninorm_t nf,
359 char *_UC_RESTRICT resultbuf, size_t *lengthp);
360
361/* Compare S1 and S2, ignoring differences in case and normalization, using the
362 collation rules of the current locale.
363 The nf argument identifies the normalization form to apply after the
364 case-mapping. It must be either UNINORM_NFC or UNINORM_NFKC. It can also
365 be NULL, for no normalization.
366 If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
367 return 0. Upon failure, return -1 with errno set. */
368extern int
369 u8_casecoll (const uint8_t *s1, size_t n1,
370 const uint8_t *s2, size_t n2,
371 const char *iso639_language, uninorm_t nf, int *resultp);
372extern int
373 u16_casecoll (const uint16_t *s1, size_t n1,
374 const uint16_t *s2, size_t n2,
375 const char *iso639_language, uninorm_t nf, int *resultp);
376extern int
377 u32_casecoll (const uint32_t *s1, size_t n1,
378 const uint32_t *s2, size_t n2,
379 const char *iso639_language, uninorm_t nf, int *resultp);
380extern int
381 ulc_casecoll (const char *s1, size_t n1,
382 const char *s2, size_t n2,
383 const char *iso639_language, uninorm_t nf, int *resultp);
384
385
386/* Set *RESULTP to true if mapping NFD(S) to upper case is a no-op, or to false
387 otherwise, and return 0. Upon failure, return -1 with errno set. */
388extern int
389 u8_is_uppercase (const uint8_t *s, size_t n,
390 const char *iso639_language,
391 bool *resultp);
392extern int
393 u16_is_uppercase (const uint16_t *s, size_t n,
394 const char *iso639_language,
395 bool *resultp);
396extern int
397 u32_is_uppercase (const uint32_t *s, size_t n,
398 const char *iso639_language,
399 bool *resultp);
400
401/* Set *RESULTP to true if mapping NFD(S) to lower case is a no-op, or to false
402 otherwise, and return 0. Upon failure, return -1 with errno set. */
403extern int
404 u8_is_lowercase (const uint8_t *s, size_t n,
405 const char *iso639_language,
406 bool *resultp);
407extern int
408 u16_is_lowercase (const uint16_t *s, size_t n,
409 const char *iso639_language,
410 bool *resultp);
411extern int
412 u32_is_lowercase (const uint32_t *s, size_t n,
413 const char *iso639_language,
414 bool *resultp);
415
416/* Set *RESULTP to true if mapping NFD(S) to title case is a no-op, or to false
417 otherwise, and return 0. Upon failure, return -1 with errno set. */
418extern int
419 u8_is_titlecase (const uint8_t *s, size_t n,
420 const char *iso639_language,
421 bool *resultp);
422extern int
423 u16_is_titlecase (const uint16_t *s, size_t n,
424 const char *iso639_language,
425 bool *resultp);
426extern int
427 u32_is_titlecase (const uint32_t *s, size_t n,
428 const char *iso639_language,
429 bool *resultp);
430
431/* Set *RESULTP to true if applying case folding to NFD(S) is a no-op, or to
432 false otherwise, and return 0. Upon failure, return -1 with errno set. */
433extern int
434 u8_is_casefolded (const uint8_t *s, size_t n,
435 const char *iso639_language,
436 bool *resultp);
437extern int
438 u16_is_casefolded (const uint16_t *s, size_t n,
439 const char *iso639_language,
440 bool *resultp);
441extern int
442 u32_is_casefolded (const uint32_t *s, size_t n,
443 const char *iso639_language,
444 bool *resultp);
445
446/* Set *RESULTP to true if case matters for S, that is, if mapping NFD(S) to
447 either upper case or lower case or title case is not a no-op.
448 Set *RESULTP to false if NFD(S) maps to itself under the upper case mapping,
449 under the lower case mapping, and under the title case mapping; in other
450 words, when NFD(S) consists entirely of caseless characters.
451 Upon failure, return -1 with errno set. */
452extern int
453 u8_is_cased (const uint8_t *s, size_t n,
454 const char *iso639_language,
455 bool *resultp);
456extern int
457 u16_is_cased (const uint16_t *s, size_t n,
458 const char *iso639_language,
459 bool *resultp);
460extern int
461 u32_is_cased (const uint32_t *s, size_t n,
462 const char *iso639_language,
463 bool *resultp);
464
465
466/* ========================================================================= */
467
468#ifdef __cplusplus
469}
470#endif
471
472#endif /* _UNICASE_H */