diff options
Diffstat (limited to 'gl/regex_internal.h')
| -rw-r--r-- | gl/regex_internal.h | 51 |
1 files changed, 20 insertions, 31 deletions
diff --git a/gl/regex_internal.h b/gl/regex_internal.h index 1245e782..57a455b1 100644 --- a/gl/regex_internal.h +++ b/gl/regex_internal.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* Extended regular expression matching and search library. | 1 | /* Extended regular expression matching and search library. |
| 2 | Copyright (C) 2002-2021 Free Software Foundation, Inc. | 2 | Copyright (C) 2002-2022 Free Software Foundation, Inc. |
| 3 | This file is part of the GNU C Library. | 3 | This file is part of the GNU C Library. |
| 4 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. | 4 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. |
| 5 | 5 | ||
| @@ -116,10 +116,6 @@ | |||
| 116 | # define gettext_noop(String) String | 116 | # define gettext_noop(String) String |
| 117 | #endif | 117 | #endif |
| 118 | 118 | ||
| 119 | #if (defined MB_CUR_MAX && HAVE_WCTYPE_H && HAVE_ISWCTYPE) || _LIBC | ||
| 120 | # define RE_ENABLE_I18N | ||
| 121 | #endif | ||
| 122 | |||
| 123 | /* Number of ASCII characters. */ | 119 | /* Number of ASCII characters. */ |
| 124 | #define ASCII_CHARS 0x80 | 120 | #define ASCII_CHARS 0x80 |
| 125 | 121 | ||
| @@ -150,6 +146,11 @@ | |||
| 150 | # define __regfree regfree | 146 | # define __regfree regfree |
| 151 | #endif /* not _LIBC */ | 147 | #endif /* not _LIBC */ |
| 152 | 148 | ||
| 149 | /* Types related to integers. Unless protected by #ifdef _LIBC, the | ||
| 150 | regex code should avoid exact-width types like int32_t and uint64_t | ||
| 151 | as some non-GCC platforms lack them, an issue when this code is | ||
| 152 | used in Gnulib. */ | ||
| 153 | |||
| 153 | #ifndef SSIZE_MAX | 154 | #ifndef SSIZE_MAX |
| 154 | # define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2)) | 155 | # define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2)) |
| 155 | #endif | 156 | #endif |
| @@ -246,10 +247,8 @@ typedef enum | |||
| 246 | SIMPLE_BRACKET = 3, | 247 | SIMPLE_BRACKET = 3, |
| 247 | OP_BACK_REF = 4, | 248 | OP_BACK_REF = 4, |
| 248 | OP_PERIOD = 5, | 249 | OP_PERIOD = 5, |
| 249 | #ifdef RE_ENABLE_I18N | ||
| 250 | COMPLEX_BRACKET = 6, | 250 | COMPLEX_BRACKET = 6, |
| 251 | OP_UTF8_PERIOD = 7, | 251 | OP_UTF8_PERIOD = 7, |
| 252 | #endif /* RE_ENABLE_I18N */ | ||
| 253 | 252 | ||
| 254 | /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used | 253 | /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used |
| 255 | when the debugger shows values of this enum type. */ | 254 | when the debugger shows values of this enum type. */ |
| @@ -287,30 +286,29 @@ typedef enum | |||
| 287 | 286 | ||
| 288 | } re_token_type_t; | 287 | } re_token_type_t; |
| 289 | 288 | ||
| 290 | #ifdef RE_ENABLE_I18N | ||
| 291 | typedef struct | 289 | typedef struct |
| 292 | { | 290 | { |
| 293 | /* Multibyte characters. */ | 291 | /* Multibyte characters. */ |
| 294 | wchar_t *mbchars; | 292 | wchar_t *mbchars; |
| 295 | 293 | ||
| 294 | #ifdef _LIBC | ||
| 296 | /* Collating symbols. */ | 295 | /* Collating symbols. */ |
| 297 | # ifdef _LIBC | ||
| 298 | int32_t *coll_syms; | 296 | int32_t *coll_syms; |
| 299 | # endif | 297 | #endif |
| 300 | 298 | ||
| 299 | #ifdef _LIBC | ||
| 301 | /* Equivalence classes. */ | 300 | /* Equivalence classes. */ |
| 302 | # ifdef _LIBC | ||
| 303 | int32_t *equiv_classes; | 301 | int32_t *equiv_classes; |
| 304 | # endif | 302 | #endif |
| 305 | 303 | ||
| 306 | /* Range expressions. */ | 304 | /* Range expressions. */ |
| 307 | # ifdef _LIBC | 305 | #ifdef _LIBC |
| 308 | uint32_t *range_starts; | 306 | uint32_t *range_starts; |
| 309 | uint32_t *range_ends; | 307 | uint32_t *range_ends; |
| 310 | # else /* not _LIBC */ | 308 | #else |
| 311 | wchar_t *range_starts; | 309 | wchar_t *range_starts; |
| 312 | wchar_t *range_ends; | 310 | wchar_t *range_ends; |
| 313 | # endif /* not _LIBC */ | 311 | #endif |
| 314 | 312 | ||
| 315 | /* Character classes. */ | 313 | /* Character classes. */ |
| 316 | wctype_t *char_classes; | 314 | wctype_t *char_classes; |
| @@ -333,7 +331,6 @@ typedef struct | |||
| 333 | /* # of character classes. */ | 331 | /* # of character classes. */ |
| 334 | Idx nchar_classes; | 332 | Idx nchar_classes; |
| 335 | } re_charset_t; | 333 | } re_charset_t; |
| 336 | #endif /* RE_ENABLE_I18N */ | ||
| 337 | 334 | ||
| 338 | typedef struct | 335 | typedef struct |
| 339 | { | 336 | { |
| @@ -341,9 +338,7 @@ typedef struct | |||
| 341 | { | 338 | { |
| 342 | unsigned char c; /* for CHARACTER */ | 339 | unsigned char c; /* for CHARACTER */ |
| 343 | re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ | 340 | re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ |
| 344 | #ifdef RE_ENABLE_I18N | ||
| 345 | re_charset_t *mbcset; /* for COMPLEX_BRACKET */ | 341 | re_charset_t *mbcset; /* for COMPLEX_BRACKET */ |
| 346 | #endif /* RE_ENABLE_I18N */ | ||
| 347 | Idx idx; /* for BACK_REF */ | 342 | Idx idx; /* for BACK_REF */ |
| 348 | re_context_type ctx_type; /* for ANCHOR */ | 343 | re_context_type ctx_type; /* for ANCHOR */ |
| 349 | } opr; | 344 | } opr; |
| @@ -355,12 +350,10 @@ typedef struct | |||
| 355 | unsigned int constraint : 10; /* context constraint */ | 350 | unsigned int constraint : 10; /* context constraint */ |
| 356 | unsigned int duplicated : 1; | 351 | unsigned int duplicated : 1; |
| 357 | unsigned int opt_subexp : 1; | 352 | unsigned int opt_subexp : 1; |
| 358 | #ifdef RE_ENABLE_I18N | ||
| 359 | unsigned int accept_mb : 1; | 353 | unsigned int accept_mb : 1; |
| 360 | /* These 2 bits can be moved into the union if needed (e.g. if running out | 354 | /* These 2 bits can be moved into the union if needed (e.g. if running out |
| 361 | of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ | 355 | of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ |
| 362 | unsigned int mb_partial : 1; | 356 | unsigned int mb_partial : 1; |
| 363 | #endif | ||
| 364 | unsigned int word_char : 1; | 357 | unsigned int word_char : 1; |
| 365 | } re_token_t; | 358 | } re_token_t; |
| 366 | 359 | ||
| @@ -375,12 +368,10 @@ struct re_string_t | |||
| 375 | REG_ICASE, upper cases of the string are stored, otherwise MBS points | 368 | REG_ICASE, upper cases of the string are stored, otherwise MBS points |
| 376 | the same address that RAW_MBS points. */ | 369 | the same address that RAW_MBS points. */ |
| 377 | unsigned char *mbs; | 370 | unsigned char *mbs; |
| 378 | #ifdef RE_ENABLE_I18N | ||
| 379 | /* Store the wide character string which is corresponding to MBS. */ | 371 | /* Store the wide character string which is corresponding to MBS. */ |
| 380 | wint_t *wcs; | 372 | wint_t *wcs; |
| 381 | Idx *offsets; | 373 | Idx *offsets; |
| 382 | mbstate_t cur_state; | 374 | mbstate_t cur_state; |
| 383 | #endif | ||
| 384 | /* Index in RAW_MBS. Each character mbs[i] corresponds to | 375 | /* Index in RAW_MBS. Each character mbs[i] corresponds to |
| 385 | raw_mbs[raw_mbs_idx + i]. */ | 376 | raw_mbs[raw_mbs_idx + i]. */ |
| 386 | Idx raw_mbs_idx; | 377 | Idx raw_mbs_idx; |
| @@ -779,7 +770,6 @@ bitset_mask (bitset_t dest, const bitset_t src) | |||
| 779 | dest[bitset_i] &= src[bitset_i]; | 770 | dest[bitset_i] &= src[bitset_i]; |
| 780 | } | 771 | } |
| 781 | 772 | ||
| 782 | #ifdef RE_ENABLE_I18N | ||
| 783 | /* Functions for re_string. */ | 773 | /* Functions for re_string. */ |
| 784 | static int | 774 | static int |
| 785 | __attribute__ ((pure, unused)) | 775 | __attribute__ ((pure, unused)) |
| @@ -803,15 +793,15 @@ re_string_wchar_at (const re_string_t *pstr, Idx idx) | |||
| 803 | return (wint_t) pstr->wcs[idx]; | 793 | return (wint_t) pstr->wcs[idx]; |
| 804 | } | 794 | } |
| 805 | 795 | ||
| 806 | # ifdef _LIBC | 796 | #ifdef _LIBC |
| 807 | # include <locale/weight.h> | 797 | # include <locale/weight.h> |
| 808 | # endif | 798 | #endif |
| 809 | 799 | ||
| 810 | static int | 800 | static int |
| 811 | __attribute__ ((pure, unused)) | 801 | __attribute__ ((pure, unused)) |
| 812 | re_string_elem_size_at (const re_string_t *pstr, Idx idx) | 802 | re_string_elem_size_at (const re_string_t *pstr, Idx idx) |
| 813 | { | 803 | { |
| 814 | # ifdef _LIBC | 804 | #ifdef _LIBC |
| 815 | const unsigned char *p, *extra; | 805 | const unsigned char *p, *extra; |
| 816 | const int32_t *table, *indirect; | 806 | const int32_t *table, *indirect; |
| 817 | uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); | 807 | uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
| @@ -827,11 +817,10 @@ re_string_elem_size_at (const re_string_t *pstr, Idx idx) | |||
| 827 | findidx (table, indirect, extra, &p, pstr->len - idx); | 817 | findidx (table, indirect, extra, &p, pstr->len - idx); |
| 828 | return p - pstr->mbs - idx; | 818 | return p - pstr->mbs - idx; |
| 829 | } | 819 | } |
| 830 | else | 820 | #endif /* _LIBC */ |
| 831 | # endif /* _LIBC */ | 821 | |
| 832 | return 1; | 822 | return 1; |
| 833 | } | 823 | } |
| 834 | #endif /* RE_ENABLE_I18N */ | ||
| 835 | 824 | ||
| 836 | #ifdef _LIBC | 825 | #ifdef _LIBC |
| 837 | # if __GNUC__ >= 7 | 826 | # if __GNUC__ >= 7 |
