summaryrefslogtreecommitdiffstats
path: root/gl/regex_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'gl/regex_internal.h')
-rw-r--r--gl/regex_internal.h51
1 files changed, 20 insertions, 31 deletions
diff --git a/gl/regex_internal.h b/gl/regex_internal.h
index 1245e78..57a455b 100644
--- a/gl/regex_internal.h
+++ b/gl/regex_internal.h
@@ -1,5 +1,5 @@
1/* Extended regular expression matching and search library. 1/* Extended regular expression matching and search library.
2 Copyright (C) 2002-2021 Free Software Foundation, Inc. 2 Copyright (C) 2002-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library. 3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5 5
@@ -116,10 +116,6 @@
116# define gettext_noop(String) String 116# define gettext_noop(String) String
117#endif 117#endif
118 118
119#if (defined MB_CUR_MAX && HAVE_WCTYPE_H && HAVE_ISWCTYPE) || _LIBC
120# define RE_ENABLE_I18N
121#endif
122
123/* Number of ASCII characters. */ 119/* Number of ASCII characters. */
124#define ASCII_CHARS 0x80 120#define ASCII_CHARS 0x80
125 121
@@ -150,6 +146,11 @@
150# define __regfree regfree 146# define __regfree regfree
151#endif /* not _LIBC */ 147#endif /* not _LIBC */
152 148
149/* Types related to integers. Unless protected by #ifdef _LIBC, the
150 regex code should avoid exact-width types like int32_t and uint64_t
151 as some non-GCC platforms lack them, an issue when this code is
152 used in Gnulib. */
153
153#ifndef SSIZE_MAX 154#ifndef SSIZE_MAX
154# define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2)) 155# define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2))
155#endif 156#endif
@@ -246,10 +247,8 @@ typedef enum
246 SIMPLE_BRACKET = 3, 247 SIMPLE_BRACKET = 3,
247 OP_BACK_REF = 4, 248 OP_BACK_REF = 4,
248 OP_PERIOD = 5, 249 OP_PERIOD = 5,
249#ifdef RE_ENABLE_I18N
250 COMPLEX_BRACKET = 6, 250 COMPLEX_BRACKET = 6,
251 OP_UTF8_PERIOD = 7, 251 OP_UTF8_PERIOD = 7,
252#endif /* RE_ENABLE_I18N */
253 252
254 /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used 253 /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
255 when the debugger shows values of this enum type. */ 254 when the debugger shows values of this enum type. */
@@ -287,30 +286,29 @@ typedef enum
287 286
288} re_token_type_t; 287} re_token_type_t;
289 288
290#ifdef RE_ENABLE_I18N
291typedef struct 289typedef struct
292{ 290{
293 /* Multibyte characters. */ 291 /* Multibyte characters. */
294 wchar_t *mbchars; 292 wchar_t *mbchars;
295 293
294#ifdef _LIBC
296 /* Collating symbols. */ 295 /* Collating symbols. */
297# ifdef _LIBC
298 int32_t *coll_syms; 296 int32_t *coll_syms;
299# endif 297#endif
300 298
299#ifdef _LIBC
301 /* Equivalence classes. */ 300 /* Equivalence classes. */
302# ifdef _LIBC
303 int32_t *equiv_classes; 301 int32_t *equiv_classes;
304# endif 302#endif
305 303
306 /* Range expressions. */ 304 /* Range expressions. */
307# ifdef _LIBC 305#ifdef _LIBC
308 uint32_t *range_starts; 306 uint32_t *range_starts;
309 uint32_t *range_ends; 307 uint32_t *range_ends;
310# else /* not _LIBC */ 308#else
311 wchar_t *range_starts; 309 wchar_t *range_starts;
312 wchar_t *range_ends; 310 wchar_t *range_ends;
313# endif /* not _LIBC */ 311#endif
314 312
315 /* Character classes. */ 313 /* Character classes. */
316 wctype_t *char_classes; 314 wctype_t *char_classes;
@@ -333,7 +331,6 @@ typedef struct
333 /* # of character classes. */ 331 /* # of character classes. */
334 Idx nchar_classes; 332 Idx nchar_classes;
335} re_charset_t; 333} re_charset_t;
336#endif /* RE_ENABLE_I18N */
337 334
338typedef struct 335typedef struct
339{ 336{
@@ -341,9 +338,7 @@ typedef struct
341 { 338 {
342 unsigned char c; /* for CHARACTER */ 339 unsigned char c; /* for CHARACTER */
343 re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ 340 re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */
344#ifdef RE_ENABLE_I18N
345 re_charset_t *mbcset; /* for COMPLEX_BRACKET */ 341 re_charset_t *mbcset; /* for COMPLEX_BRACKET */
346#endif /* RE_ENABLE_I18N */
347 Idx idx; /* for BACK_REF */ 342 Idx idx; /* for BACK_REF */
348 re_context_type ctx_type; /* for ANCHOR */ 343 re_context_type ctx_type; /* for ANCHOR */
349 } opr; 344 } opr;
@@ -355,12 +350,10 @@ typedef struct
355 unsigned int constraint : 10; /* context constraint */ 350 unsigned int constraint : 10; /* context constraint */
356 unsigned int duplicated : 1; 351 unsigned int duplicated : 1;
357 unsigned int opt_subexp : 1; 352 unsigned int opt_subexp : 1;
358#ifdef RE_ENABLE_I18N
359 unsigned int accept_mb : 1; 353 unsigned int accept_mb : 1;
360 /* These 2 bits can be moved into the union if needed (e.g. if running out 354 /* These 2 bits can be moved into the union if needed (e.g. if running out
361 of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ 355 of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
362 unsigned int mb_partial : 1; 356 unsigned int mb_partial : 1;
363#endif
364 unsigned int word_char : 1; 357 unsigned int word_char : 1;
365} re_token_t; 358} re_token_t;
366 359
@@ -375,12 +368,10 @@ struct re_string_t
375 REG_ICASE, upper cases of the string are stored, otherwise MBS points 368 REG_ICASE, upper cases of the string are stored, otherwise MBS points
376 the same address that RAW_MBS points. */ 369 the same address that RAW_MBS points. */
377 unsigned char *mbs; 370 unsigned char *mbs;
378#ifdef RE_ENABLE_I18N
379 /* Store the wide character string which is corresponding to MBS. */ 371 /* Store the wide character string which is corresponding to MBS. */
380 wint_t *wcs; 372 wint_t *wcs;
381 Idx *offsets; 373 Idx *offsets;
382 mbstate_t cur_state; 374 mbstate_t cur_state;
383#endif
384 /* Index in RAW_MBS. Each character mbs[i] corresponds to 375 /* Index in RAW_MBS. Each character mbs[i] corresponds to
385 raw_mbs[raw_mbs_idx + i]. */ 376 raw_mbs[raw_mbs_idx + i]. */
386 Idx raw_mbs_idx; 377 Idx raw_mbs_idx;
@@ -779,7 +770,6 @@ bitset_mask (bitset_t dest, const bitset_t src)
779 dest[bitset_i] &= src[bitset_i]; 770 dest[bitset_i] &= src[bitset_i];
780} 771}
781 772
782#ifdef RE_ENABLE_I18N
783/* Functions for re_string. */ 773/* Functions for re_string. */
784static int 774static int
785__attribute__ ((pure, unused)) 775__attribute__ ((pure, unused))
@@ -803,15 +793,15 @@ re_string_wchar_at (const re_string_t *pstr, Idx idx)
803 return (wint_t) pstr->wcs[idx]; 793 return (wint_t) pstr->wcs[idx];
804} 794}
805 795
806# ifdef _LIBC 796#ifdef _LIBC
807# include <locale/weight.h> 797# include <locale/weight.h>
808# endif 798#endif
809 799
810static int 800static int
811__attribute__ ((pure, unused)) 801__attribute__ ((pure, unused))
812re_string_elem_size_at (const re_string_t *pstr, Idx idx) 802re_string_elem_size_at (const re_string_t *pstr, Idx idx)
813{ 803{
814# ifdef _LIBC 804#ifdef _LIBC
815 const unsigned char *p, *extra; 805 const unsigned char *p, *extra;
816 const int32_t *table, *indirect; 806 const int32_t *table, *indirect;
817 uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 807 uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
@@ -827,11 +817,10 @@ re_string_elem_size_at (const re_string_t *pstr, Idx idx)
827 findidx (table, indirect, extra, &p, pstr->len - idx); 817 findidx (table, indirect, extra, &p, pstr->len - idx);
828 return p - pstr->mbs - idx; 818 return p - pstr->mbs - idx;
829 } 819 }
830 else 820#endif /* _LIBC */
831# endif /* _LIBC */ 821
832 return 1; 822 return 1;
833} 823}
834#endif /* RE_ENABLE_I18N */
835 824
836#ifdef _LIBC 825#ifdef _LIBC
837# if __GNUC__ >= 7 826# if __GNUC__ >= 7