summaryrefslogtreecommitdiffstats
path: root/gl/regcomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'gl/regcomp.c')
-rw-r--r--gl/regcomp.c831
1 files changed, 351 insertions, 480 deletions
diff --git a/gl/regcomp.c b/gl/regcomp.c
index 887e5b5..122c3de 100644
--- a/gl/regcomp.c
+++ b/gl/regcomp.c
@@ -1,5 +1,5 @@
1/* Extended regular expression matching and search library. 1/* Extended regular expression matching and search library.
2 Copyright (C) 2002-2021 Free Software Foundation, Inc. 2 Copyright (C) 2002-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library. 3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5 5
@@ -27,14 +27,10 @@ static void re_compile_fastmap_iter (regex_t *bufp,
27 const re_dfastate_t *init_state, 27 const re_dfastate_t *init_state,
28 char *fastmap); 28 char *fastmap);
29static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); 29static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
30#ifdef RE_ENABLE_I18N
31static void free_charset (re_charset_t *cset); 30static void free_charset (re_charset_t *cset);
32#endif /* RE_ENABLE_I18N */
33static void free_workarea_compile (regex_t *preg); 31static void free_workarea_compile (regex_t *preg);
34static reg_errcode_t create_initial_state (re_dfa_t *dfa); 32static reg_errcode_t create_initial_state (re_dfa_t *dfa);
35#ifdef RE_ENABLE_I18N
36static void optimize_utf8 (re_dfa_t *dfa); 33static void optimize_utf8 (re_dfa_t *dfa);
37#endif
38static reg_errcode_t analyze (regex_t *preg); 34static reg_errcode_t analyze (regex_t *preg);
39static reg_errcode_t preorder (bin_tree_t *root, 35static reg_errcode_t preorder (bin_tree_t *root,
40 reg_errcode_t (fn (void *, bin_tree_t *)), 36 reg_errcode_t (fn (void *, bin_tree_t *)),
@@ -89,7 +85,6 @@ static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
89static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, 85static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
90 re_string_t *regexp, 86 re_string_t *regexp,
91 re_token_t *token); 87 re_token_t *token);
92#ifdef RE_ENABLE_I18N
93static reg_errcode_t build_equiv_class (bitset_t sbcset, 88static reg_errcode_t build_equiv_class (bitset_t sbcset,
94 re_charset_t *mbcset, 89 re_charset_t *mbcset,
95 Idx *equiv_class_alloc, 90 Idx *equiv_class_alloc,
@@ -100,14 +95,6 @@ static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
100 Idx *char_class_alloc, 95 Idx *char_class_alloc,
101 const char *class_name, 96 const char *class_name,
102 reg_syntax_t syntax); 97 reg_syntax_t syntax);
103#else /* not RE_ENABLE_I18N */
104static reg_errcode_t build_equiv_class (bitset_t sbcset,
105 const unsigned char *name);
106static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
107 bitset_t sbcset,
108 const char *class_name,
109 reg_syntax_t syntax);
110#endif /* not RE_ENABLE_I18N */
111static bin_tree_t *build_charclass_op (re_dfa_t *dfa, 98static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
112 RE_TRANSLATE_TYPE trans, 99 RE_TRANSLATE_TYPE trans,
113 const char *class_name, 100 const char *class_name,
@@ -279,8 +266,7 @@ re_compile_fastmap (struct re_pattern_buffer *bufp)
279} 266}
280weak_alias (__re_compile_fastmap, re_compile_fastmap) 267weak_alias (__re_compile_fastmap, re_compile_fastmap)
281 268
282static inline void 269static __always_inline void
283__attribute__ ((always_inline))
284re_set_fastmap (char *fastmap, bool icase, int ch) 270re_set_fastmap (char *fastmap, bool icase, int ch)
285{ 271{
286 fastmap[ch] = 1; 272 fastmap[ch] = 1;
@@ -306,7 +292,6 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
306 if (type == CHARACTER) 292 if (type == CHARACTER)
307 { 293 {
308 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); 294 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
309#ifdef RE_ENABLE_I18N
310 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) 295 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
311 { 296 {
312 unsigned char buf[MB_LEN_MAX]; 297 unsigned char buf[MB_LEN_MAX];
@@ -327,7 +312,6 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
327 != (size_t) -1)) 312 != (size_t) -1))
328 re_set_fastmap (fastmap, false, buf[0]); 313 re_set_fastmap (fastmap, false, buf[0]);
329 } 314 }
330#endif
331 } 315 }
332 else if (type == SIMPLE_BRACKET) 316 else if (type == SIMPLE_BRACKET)
333 { 317 {
@@ -341,13 +325,12 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
341 re_set_fastmap (fastmap, icase, ch); 325 re_set_fastmap (fastmap, icase, ch);
342 } 326 }
343 } 327 }
344#ifdef RE_ENABLE_I18N
345 else if (type == COMPLEX_BRACKET) 328 else if (type == COMPLEX_BRACKET)
346 { 329 {
347 re_charset_t *cset = dfa->nodes[node].opr.mbcset; 330 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
348 Idx i; 331 Idx i;
349 332
350# ifdef _LIBC 333#ifdef _LIBC
351 /* See if we have to try all bytes which start multiple collation 334 /* See if we have to try all bytes which start multiple collation
352 elements. 335 elements.
353 e.g. In da_DK, we want to catch 'a' since "aa" is a valid 336 e.g. In da_DK, we want to catch 'a' since "aa" is a valid
@@ -363,7 +346,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
363 if (table[i] < 0) 346 if (table[i] < 0)
364 re_set_fastmap (fastmap, icase, i); 347 re_set_fastmap (fastmap, icase, i);
365 } 348 }
366# endif /* _LIBC */ 349#endif /* _LIBC */
367 350
368 /* See if we have to start the match at all multibyte characters, 351 /* See if we have to start the match at all multibyte characters,
369 i.e. where we would not find an invalid sequence. This only 352 i.e. where we would not find an invalid sequence. This only
@@ -371,9 +354,9 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
371 sets, the SIMPLE_BRACKET again suffices. */ 354 sets, the SIMPLE_BRACKET again suffices. */
372 if (dfa->mb_cur_max > 1 355 if (dfa->mb_cur_max > 1
373 && (cset->nchar_classes || cset->non_match || cset->nranges 356 && (cset->nchar_classes || cset->non_match || cset->nranges
374# ifdef _LIBC 357#ifdef _LIBC
375 || cset->nequiv_classes 358 || cset->nequiv_classes
376# endif /* _LIBC */ 359#endif /* _LIBC */
377 )) 360 ))
378 { 361 {
379 unsigned char c = 0; 362 unsigned char c = 0;
@@ -406,12 +389,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
406 } 389 }
407 } 390 }
408 } 391 }
409#endif /* RE_ENABLE_I18N */ 392 else if (type == OP_PERIOD || type == OP_UTF8_PERIOD || type == END_OF_RE)
410 else if (type == OP_PERIOD
411#ifdef RE_ENABLE_I18N
412 || type == OP_UTF8_PERIOD
413#endif /* RE_ENABLE_I18N */
414 || type == END_OF_RE)
415 { 393 {
416 memset (fastmap, '\1', sizeof (char) * SBC_MAX); 394 memset (fastmap, '\1', sizeof (char) * SBC_MAX);
417 if (type == END_OF_RE) 395 if (type == END_OF_RE)
@@ -550,7 +528,6 @@ regerror (int errcode, const regex_t *__restrict preg, char *__restrict errbuf,
550weak_alias (__regerror, regerror) 528weak_alias (__regerror, regerror)
551 529
552 530
553#ifdef RE_ENABLE_I18N
554/* This static array is used for the map to single-byte characters when 531/* This static array is used for the map to single-byte characters when
555 UTF-8 is used. Otherwise we would allocate memory just to initialize 532 UTF-8 is used. Otherwise we would allocate memory just to initialize
556 it the same all the time. UTF-8 is the preferred encoding so this is 533 it the same all the time. UTF-8 is the preferred encoding so this is
@@ -558,25 +535,24 @@ weak_alias (__regerror, regerror)
558static const bitset_t utf8_sb_map = 535static const bitset_t utf8_sb_map =
559{ 536{
560 /* Set the first 128 bits. */ 537 /* Set the first 128 bits. */
561# if (defined __GNUC__ || __clang_major__ >= 4) && !defined __STRICT_ANSI__ 538#if (defined __GNUC__ || __clang_major__ >= 4) && !defined __STRICT_ANSI__
562 [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX 539 [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
563# else 540#else
564# if 4 * BITSET_WORD_BITS < ASCII_CHARS 541# if 4 * BITSET_WORD_BITS < ASCII_CHARS
565# error "bitset_word_t is narrower than 32 bits" 542# error "bitset_word_t is narrower than 32 bits"
566# elif 3 * BITSET_WORD_BITS < ASCII_CHARS 543# elif 3 * BITSET_WORD_BITS < ASCII_CHARS
567 BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX, 544 BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,
568# elif 2 * BITSET_WORD_BITS < ASCII_CHARS 545# elif 2 * BITSET_WORD_BITS < ASCII_CHARS
569 BITSET_WORD_MAX, BITSET_WORD_MAX, 546 BITSET_WORD_MAX, BITSET_WORD_MAX,
570# elif 1 * BITSET_WORD_BITS < ASCII_CHARS 547# elif 1 * BITSET_WORD_BITS < ASCII_CHARS
571 BITSET_WORD_MAX, 548 BITSET_WORD_MAX,
572# endif 549# endif
573 (BITSET_WORD_MAX 550 (BITSET_WORD_MAX
574 >> (SBC_MAX % BITSET_WORD_BITS == 0 551 >> (SBC_MAX % BITSET_WORD_BITS == 0
575 ? 0 552 ? 0
576 : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS)) 553 : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
577# endif
578};
579#endif 554#endif
555};
580 556
581 557
582static void 558static void
@@ -614,10 +590,8 @@ free_dfa_content (re_dfa_t *dfa)
614 re_free (entry->array); 590 re_free (entry->array);
615 } 591 }
616 re_free (dfa->state_table); 592 re_free (dfa->state_table);
617#ifdef RE_ENABLE_I18N
618 if (dfa->sb_char != utf8_sb_map) 593 if (dfa->sb_char != utf8_sb_map)
619 re_free (dfa->sb_char); 594 re_free (dfa->sb_char);
620#endif
621 re_free (dfa->subexp_map); 595 re_free (dfa->subexp_map);
622#ifdef DEBUG 596#ifdef DEBUG
623 re_free (dfa->re_str); 597 re_free (dfa->re_str);
@@ -796,11 +770,9 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length,
796 if (__glibc_unlikely (err != REG_NOERROR)) 770 if (__glibc_unlikely (err != REG_NOERROR))
797 goto re_compile_internal_free_return; 771 goto re_compile_internal_free_return;
798 772
799#ifdef RE_ENABLE_I18N
800 /* If possible, do searching in single byte encoding to speed things up. */ 773 /* If possible, do searching in single byte encoding to speed things up. */
801 if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) 774 if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
802 optimize_utf8 (dfa); 775 optimize_utf8 (dfa);
803#endif
804 776
805 /* Then create the initial state of the dfa. */ 777 /* Then create the initial state of the dfa. */
806 err = create_initial_state (dfa); 778 err = create_initial_state (dfa);
@@ -830,11 +802,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
830#ifndef _LIBC 802#ifndef _LIBC
831 const char *codeset_name; 803 const char *codeset_name;
832#endif 804#endif
833#ifdef RE_ENABLE_I18N
834 size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t)); 805 size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
835#else
836 size_t max_i18n_object_size = 0;
837#endif
838 size_t max_object_size = 806 size_t max_object_size =
839 MAX (sizeof (struct re_state_table_entry), 807 MAX (sizeof (struct re_state_table_entry),
840 MAX (sizeof (re_token_t), 808 MAX (sizeof (re_token_t),
@@ -886,7 +854,6 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
886 dfa->map_notascii = 0; 854 dfa->map_notascii = 0;
887#endif 855#endif
888 856
889#ifdef RE_ENABLE_I18N
890 if (dfa->mb_cur_max > 1) 857 if (dfa->mb_cur_max > 1)
891 { 858 {
892 if (dfa->is_utf8) 859 if (dfa->is_utf8)
@@ -906,14 +873,13 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
906 wint_t wch = __btowc (ch); 873 wint_t wch = __btowc (ch);
907 if (wch != WEOF) 874 if (wch != WEOF)
908 dfa->sb_char[i] |= (bitset_word_t) 1 << j; 875 dfa->sb_char[i] |= (bitset_word_t) 1 << j;
909# ifndef _LIBC 876#ifndef _LIBC
910 if (isascii (ch) && wch != ch) 877 if (isascii (ch) && wch != ch)
911 dfa->map_notascii = 1; 878 dfa->map_notascii = 1;
912# endif 879#endif
913 } 880 }
914 } 881 }
915 } 882 }
916#endif
917 883
918 if (__glibc_unlikely (dfa->nodes == NULL || dfa->state_table == NULL)) 884 if (__glibc_unlikely (dfa->nodes == NULL || dfa->state_table == NULL))
919 return REG_ESPACE; 885 return REG_ESPACE;
@@ -933,8 +899,6 @@ init_word_char (re_dfa_t *dfa)
933 dfa->word_ops_used = 1; 899 dfa->word_ops_used = 1;
934 if (__glibc_likely (dfa->map_notascii == 0)) 900 if (__glibc_likely (dfa->map_notascii == 0))
935 { 901 {
936 /* Avoid uint32_t and uint64_t as some non-GCC platforms lack
937 them, an issue when this code is used in Gnulib. */
938 bitset_word_t bits0 = 0x00000000; 902 bitset_word_t bits0 = 0x00000000;
939 bitset_word_t bits1 = 0x03ff0000; 903 bitset_word_t bits1 = 0x03ff0000;
940 bitset_word_t bits2 = 0x87fffffe; 904 bitset_word_t bits2 = 0x87fffffe;
@@ -1074,7 +1038,6 @@ create_initial_state (re_dfa_t *dfa)
1074 return REG_NOERROR; 1038 return REG_NOERROR;
1075} 1039}
1076 1040
1077#ifdef RE_ENABLE_I18N
1078/* If it is possible to do searching in single byte encoding instead of UTF-8 1041/* If it is possible to do searching in single byte encoding instead of UTF-8
1079 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change 1042 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
1080 DFA nodes where needed. */ 1043 DFA nodes where needed. */
@@ -1154,7 +1117,6 @@ optimize_utf8 (re_dfa_t *dfa)
1154 dfa->is_utf8 = 0; 1117 dfa->is_utf8 = 0;
1155 dfa->has_mb_node = dfa->nbackref > 0 || has_period; 1118 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
1156} 1119}
1157#endif
1158 1120
1159/* Analyze the structure tree, and calculate "first", "next", "edest", 1121/* Analyze the structure tree, and calculate "first", "next", "edest",
1160 "eclosure", and "inveclosure". */ 1122 "eclosure", and "inveclosure". */
@@ -1792,7 +1754,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1792 token->opr.c = c; 1754 token->opr.c = c;
1793 1755
1794 token->word_char = 0; 1756 token->word_char = 0;
1795#ifdef RE_ENABLE_I18N
1796 token->mb_partial = 0; 1757 token->mb_partial = 0;
1797 if (input->mb_cur_max > 1 1758 if (input->mb_cur_max > 1
1798 && !re_string_first_byte (input, re_string_cur_idx (input))) 1759 && !re_string_first_byte (input, re_string_cur_idx (input)))
@@ -1801,7 +1762,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1801 token->mb_partial = 1; 1762 token->mb_partial = 1;
1802 return 1; 1763 return 1;
1803 } 1764 }
1804#endif
1805 if (c == '\\') 1765 if (c == '\\')
1806 { 1766 {
1807 unsigned char c2; 1767 unsigned char c2;
@@ -1814,7 +1774,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1814 c2 = re_string_peek_byte_case (input, 1); 1774 c2 = re_string_peek_byte_case (input, 1);
1815 token->opr.c = c2; 1775 token->opr.c = c2;
1816 token->type = CHARACTER; 1776 token->type = CHARACTER;
1817#ifdef RE_ENABLE_I18N
1818 if (input->mb_cur_max > 1) 1777 if (input->mb_cur_max > 1)
1819 { 1778 {
1820 wint_t wc = re_string_wchar_at (input, 1779 wint_t wc = re_string_wchar_at (input,
@@ -1822,7 +1781,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1822 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; 1781 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1823 } 1782 }
1824 else 1783 else
1825#endif
1826 token->word_char = IS_WORD_CHAR (c2) != 0; 1784 token->word_char = IS_WORD_CHAR (c2) != 0;
1827 1785
1828 switch (c2) 1786 switch (c2)
@@ -1928,14 +1886,12 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
1928 } 1886 }
1929 1887
1930 token->type = CHARACTER; 1888 token->type = CHARACTER;
1931#ifdef RE_ENABLE_I18N
1932 if (input->mb_cur_max > 1) 1889 if (input->mb_cur_max > 1)
1933 { 1890 {
1934 wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); 1891 wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
1935 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; 1892 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1936 } 1893 }
1937 else 1894 else
1938#endif
1939 token->word_char = IS_WORD_CHAR (token->opr.c); 1895 token->word_char = IS_WORD_CHAR (token->opr.c);
1940 1896
1941 switch (c) 1897 switch (c)
@@ -2027,14 +1983,12 @@ peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2027 c = re_string_peek_byte (input, 0); 1983 c = re_string_peek_byte (input, 0);
2028 token->opr.c = c; 1984 token->opr.c = c;
2029 1985
2030#ifdef RE_ENABLE_I18N
2031 if (input->mb_cur_max > 1 1986 if (input->mb_cur_max > 1
2032 && !re_string_first_byte (input, re_string_cur_idx (input))) 1987 && !re_string_first_byte (input, re_string_cur_idx (input)))
2033 { 1988 {
2034 token->type = CHARACTER; 1989 token->type = CHARACTER;
2035 return 1; 1990 return 1;
2036 } 1991 }
2037#endif /* RE_ENABLE_I18N */
2038 1992
2039 if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) 1993 if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
2040 && re_string_cur_idx (input) + 1 < re_string_length (input)) 1994 && re_string_cur_idx (input) + 1 < re_string_length (input))
@@ -2084,15 +2038,25 @@ peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
2084 } 2038 }
2085 switch (c) 2039 switch (c)
2086 { 2040 {
2087 case '-':
2088 token->type = OP_CHARSET_RANGE;
2089 break;
2090 case ']': 2041 case ']':
2091 token->type = OP_CLOSE_BRACKET; 2042 token->type = OP_CLOSE_BRACKET;
2092 break; 2043 break;
2093 case '^': 2044 case '^':
2094 token->type = OP_NON_MATCH_LIST; 2045 token->type = OP_NON_MATCH_LIST;
2095 break; 2046 break;
2047 case '-':
2048 /* In V7 Unix grep and Unix awk and mawk, [...---...]
2049 (3 adjacent minus signs) stands for a single minus sign.
2050 Support that without breaking anything else. */
2051 if (! (re_string_cur_idx (input) + 2 < re_string_length (input)
2052 && re_string_peek_byte (input, 1) == '-'
2053 && re_string_peek_byte (input, 2) == '-'))
2054 {
2055 token->type = OP_CHARSET_RANGE;
2056 break;
2057 }
2058 re_string_skip_bytes (input, 2);
2059 FALLTHROUGH;
2096 default: 2060 default:
2097 token->type = CHARACTER; 2061 token->type = CHARACTER;
2098 } 2062 }
@@ -2256,7 +2220,6 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2256 *err = REG_ESPACE; 2220 *err = REG_ESPACE;
2257 return NULL; 2221 return NULL;
2258 } 2222 }
2259#ifdef RE_ENABLE_I18N
2260 if (dfa->mb_cur_max > 1) 2223 if (dfa->mb_cur_max > 1)
2261 { 2224 {
2262 while (!re_string_eoi (regexp) 2225 while (!re_string_eoi (regexp)
@@ -2273,7 +2236,6 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2273 } 2236 }
2274 } 2237 }
2275 } 2238 }
2276#endif
2277 break; 2239 break;
2278 2240
2279 case OP_OPEN_SUBEXP: 2241 case OP_OPEN_SUBEXP:
@@ -2666,40 +2628,30 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2666 2628
2667#ifndef _LIBC 2629#ifndef _LIBC
2668 2630
2669# ifdef RE_ENABLE_I18N
2670/* Convert the byte B to the corresponding wide character. In a 2631/* Convert the byte B to the corresponding wide character. In a
2671 unibyte locale, treat B as itself. In a multibyte locale, return 2632 unibyte locale, treat B as itself. In a multibyte locale, return
2672 WEOF if B is an encoding error. */ 2633 WEOF if B is an encoding error. */
2673static wint_t 2634static wint_t
2674parse_byte (unsigned char b, re_charset_t *mbcset) 2635parse_byte (unsigned char b, re_dfa_t const *dfa)
2675{ 2636{
2676 return mbcset == NULL ? b : __btowc (b); 2637 return dfa->mb_cur_max > 1 ? __btowc (b) : b;
2677} 2638}
2678# endif
2679 2639
2680 /* Local function for parse_bracket_exp only used in case of NOT _LIBC. 2640/* Local function for parse_bracket_exp used in _LIBC environment.
2681 Build the range expression which starts from START_ELEM, and ends 2641 Build the range expression which starts from START_ELEM, and ends
2682 at END_ELEM. The result are written to MBCSET and SBCSET. 2642 at END_ELEM. The result are written to MBCSET and SBCSET.
2683 RANGE_ALLOC is the allocated size of mbcset->range_starts, and 2643 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2684 mbcset->range_ends, is a pointer argument since we may 2644 mbcset->range_ends, is a pointer argument since we may
2685 update it. */ 2645 update it. */
2686 2646
2687static reg_errcode_t 2647static reg_errcode_t
2688# ifdef RE_ENABLE_I18N 2648build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc,
2689build_range_exp (const reg_syntax_t syntax, 2649 bracket_elem_t *start_elem, bracket_elem_t *end_elem,
2690 bitset_t sbcset, 2650 re_dfa_t *dfa, reg_syntax_t syntax, uint_fast32_t nrules,
2691 re_charset_t *mbcset, 2651 const unsigned char *collseqmb, const char *collseqwc,
2692 Idx *range_alloc, 2652 int_fast32_t table_size, const void *symb_table,
2693 const bracket_elem_t *start_elem, 2653 const unsigned char *extra)
2694 const bracket_elem_t *end_elem)
2695# else /* not RE_ENABLE_I18N */
2696build_range_exp (const reg_syntax_t syntax,
2697 bitset_t sbcset,
2698 const bracket_elem_t *start_elem,
2699 const bracket_elem_t *end_elem)
2700# endif /* not RE_ENABLE_I18N */
2701{ 2654{
2702 unsigned int start_ch, end_ch;
2703 /* Equivalence Classes and Character Classes can't be a range start/end. */ 2655 /* Equivalence Classes and Character Classes can't be a range start/end. */
2704 if (__glibc_unlikely (start_elem->type == EQUIV_CLASS 2656 if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
2705 || start_elem->type == CHAR_CLASS 2657 || start_elem->type == CHAR_CLASS
@@ -2715,110 +2667,88 @@ build_range_exp (const reg_syntax_t syntax,
2715 && strlen ((char *) end_elem->opr.name) > 1))) 2667 && strlen ((char *) end_elem->opr.name) > 1)))
2716 return REG_ECOLLATE; 2668 return REG_ECOLLATE;
2717 2669
2718# ifdef RE_ENABLE_I18N 2670 unsigned int
2719 {
2720 wchar_t wc;
2721 wint_t start_wc;
2722 wint_t end_wc;
2723
2724 start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch 2671 start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
2725 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] 2672 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2726 : 0)); 2673 : 0)),
2727 end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch 2674 end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
2728 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] 2675 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2729 : 0)); 2676 : 0));
2677 wint_t
2730 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) 2678 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
2731 ? parse_byte (start_ch, mbcset) : start_elem->opr.wch); 2679 ? parse_byte (start_ch, dfa) : start_elem->opr.wch),
2732 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) 2680 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
2733 ? parse_byte (end_ch, mbcset) : end_elem->opr.wch); 2681 ? parse_byte (end_ch, dfa) : end_elem->opr.wch);
2734 if (start_wc == WEOF || end_wc == WEOF)
2735 return REG_ECOLLATE;
2736 else if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
2737 && start_wc > end_wc))
2738 return REG_ERANGE;
2739
2740 /* Got valid collation sequence values, add them as a new entry.
2741 However, for !_LIBC we have no collation elements: if the
2742 character set is single byte, the single byte character set
2743 that we build below suffices. parse_bracket_exp passes
2744 no MBCSET if dfa->mb_cur_max == 1. */
2745 if (mbcset)
2746 {
2747 /* Check the space of the arrays. */
2748 if (__glibc_unlikely (*range_alloc == mbcset->nranges))
2749 {
2750 /* There is not enough space, need realloc. */
2751 wchar_t *new_array_start, *new_array_end;
2752 Idx new_nranges;
2753
2754 /* +1 in case of mbcset->nranges is 0. */
2755 new_nranges = 2 * mbcset->nranges + 1;
2756 /* Use realloc since mbcset->range_starts and mbcset->range_ends
2757 are NULL if *range_alloc == 0. */
2758 new_array_start = re_realloc (mbcset->range_starts, wchar_t,
2759 new_nranges);
2760 new_array_end = re_realloc (mbcset->range_ends, wchar_t,
2761 new_nranges);
2762 2682
2763 if (__glibc_unlikely (new_array_start == NULL 2683 if (start_wc == WEOF || end_wc == WEOF)
2764 || new_array_end == NULL)) 2684 return REG_ECOLLATE;
2765 { 2685 else if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
2766 re_free (new_array_start); 2686 && start_wc > end_wc))
2767 re_free (new_array_end); 2687 return REG_ERANGE;
2768 return REG_ESPACE;
2769 }
2770 2688
2771 mbcset->range_starts = new_array_start; 2689 /* Got valid collation sequence values, add them as a new entry.
2772 mbcset->range_ends = new_array_end; 2690 However, for !_LIBC we have no collation elements: if the
2773 *range_alloc = new_nranges; 2691 character set is single byte, the single byte character set
2774 } 2692 that we build below suffices. parse_bracket_exp passes
2693 no MBCSET if dfa->mb_cur_max == 1. */
2694 if (dfa->mb_cur_max > 1)
2695 {
2696 /* Check the space of the arrays. */
2697 if (__glibc_unlikely (*range_alloc == mbcset->nranges))
2698 {
2699 /* There is not enough space, need realloc. */
2700 wchar_t *new_array_start, *new_array_end;
2701 Idx new_nranges;
2775 2702
2776 mbcset->range_starts[mbcset->nranges] = start_wc; 2703 /* +1 in case of mbcset->nranges is 0. */
2777 mbcset->range_ends[mbcset->nranges++] = end_wc; 2704 new_nranges = 2 * mbcset->nranges + 1;
2778 } 2705 /* Use realloc since mbcset->range_starts and mbcset->range_ends
2706 are NULL if *range_alloc == 0. */
2707 new_array_start = re_realloc (mbcset->range_starts, wchar_t,
2708 new_nranges);
2709 new_array_end = re_realloc (mbcset->range_ends, wchar_t,
2710 new_nranges);
2711
2712 if (__glibc_unlikely (new_array_start == NULL
2713 || new_array_end == NULL))
2714 {
2715 re_free (new_array_start);
2716 re_free (new_array_end);
2717 return REG_ESPACE;
2718 }
2719
2720 mbcset->range_starts = new_array_start;
2721 mbcset->range_ends = new_array_end;
2722 *range_alloc = new_nranges;
2723 }
2724
2725 mbcset->range_starts[mbcset->nranges] = start_wc;
2726 mbcset->range_ends[mbcset->nranges++] = end_wc;
2727 }
2728
2729 /* Build the table for single byte characters. */
2730 for (wchar_t wc = 0; wc < SBC_MAX; ++wc)
2731 {
2732 if (start_wc <= wc && wc <= end_wc)
2733 bitset_set (sbcset, wc);
2734 }
2779 2735
2780 /* Build the table for single byte characters. */
2781 for (wc = 0; wc < SBC_MAX; ++wc)
2782 {
2783 if (start_wc <= wc && wc <= end_wc)
2784 bitset_set (sbcset, wc);
2785 }
2786 }
2787# else /* not RE_ENABLE_I18N */
2788 {
2789 unsigned int ch;
2790 start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
2791 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2792 : 0));
2793 end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
2794 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2795 : 0));
2796 if (start_ch > end_ch)
2797 return REG_ERANGE;
2798 /* Build the table for single byte characters. */
2799 for (ch = 0; ch < SBC_MAX; ++ch)
2800 if (start_ch <= ch && ch <= end_ch)
2801 bitset_set (sbcset, ch);
2802 }
2803# endif /* not RE_ENABLE_I18N */
2804 return REG_NOERROR; 2736 return REG_NOERROR;
2805} 2737}
2806#endif /* not _LIBC */ 2738#endif /* not _LIBC */
2807 2739
2808#ifndef _LIBC 2740#ifndef _LIBC
2809/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. 2741/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.
2810 Build the collating element which is represented by NAME. 2742 Build the collating element which is represented by NAME.
2811 The result are written to MBCSET and SBCSET. 2743 The result are written to MBCSET and SBCSET.
2812 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a 2744 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2813 pointer argument since we may update it. */ 2745 pointer argument since we may update it. */
2814 2746
2815static reg_errcode_t 2747static reg_errcode_t
2816# ifdef RE_ENABLE_I18N
2817build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, 2748build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
2818 Idx *coll_sym_alloc, const unsigned char *name) 2749 Idx *coll_sym_alloc, const unsigned char *name,
2819# else /* not RE_ENABLE_I18N */ 2750 uint_fast32_t nrules, int_fast32_t table_size,
2820build_collating_symbol (bitset_t sbcset, const unsigned char *name) 2751 const void *symb_table, const unsigned char *extra)
2821# endif /* not RE_ENABLE_I18N */
2822{ 2752{
2823 size_t name_len = strlen ((const char *) name); 2753 size_t name_len = strlen ((const char *) name);
2824 if (__glibc_unlikely (name_len != 1)) 2754 if (__glibc_unlikely (name_len != 1))
@@ -2831,271 +2761,280 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name)
2831} 2761}
2832#endif /* not _LIBC */ 2762#endif /* not _LIBC */
2833 2763
2834/* This function parse bracket expression like "[abc]", "[a-c]",
2835 "[[.a-a.]]" etc. */
2836
2837static bin_tree_t *
2838parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
2839 reg_syntax_t syntax, reg_errcode_t *err)
2840{
2841#ifdef _LIBC 2764#ifdef _LIBC
2842 const unsigned char *collseqmb; 2765/* Local function for parse_bracket_exp used in _LIBC environment.
2843 const char *collseqwc; 2766 Seek the collating symbol entry corresponding to NAME.
2844 uint32_t nrules; 2767 Return the index of the symbol in the SYMB_TABLE,
2845 int32_t table_size; 2768 or -1 if not found. */
2846 const int32_t *symb_table; 2769
2847 const unsigned char *extra; 2770static __always_inline int32_t
2848 2771seek_collating_symbol_entry (const unsigned char *name, size_t name_len,
2849 /* Local function for parse_bracket_exp used in _LIBC environment. 2772 const int32_t *symb_table,
2850 Seek the collating symbol entry corresponding to NAME. 2773 int_fast32_t table_size,
2851 Return the index of the symbol in the SYMB_TABLE, 2774 const unsigned char *extra)
2852 or -1 if not found. */ 2775{
2853 2776 int_fast32_t elem;
2854 auto inline int32_t
2855 __attribute__ ((always_inline))
2856 seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
2857 {
2858 int32_t elem;
2859
2860 for (elem = 0; elem < table_size; elem++)
2861 if (symb_table[2 * elem] != 0)
2862 {
2863 int32_t idx = symb_table[2 * elem + 1];
2864 /* Skip the name of collating element name. */
2865 idx += 1 + extra[idx];
2866 if (/* Compare the length of the name. */
2867 name_len == extra[idx]
2868 /* Compare the name. */
2869 && memcmp (name, &extra[idx + 1], name_len) == 0)
2870 /* Yep, this is the entry. */
2871 return elem;
2872 }
2873 return -1;
2874 }
2875 2777
2876 /* Local function for parse_bracket_exp used in _LIBC environment. 2778 for (elem = 0; elem < table_size; elem++)
2877 Look up the collation sequence value of BR_ELEM. 2779 if (symb_table[2 * elem] != 0)
2878 Return the value if succeeded, UINT_MAX otherwise. */ 2780 {
2781 int32_t idx = symb_table[2 * elem + 1];
2782 /* Skip the name of collating element name. */
2783 idx += 1 + extra[idx];
2784 if (/* Compare the length of the name. */
2785 name_len == extra[idx]
2786 /* Compare the name. */
2787 && memcmp (name, &extra[idx + 1], name_len) == 0)
2788 /* Yep, this is the entry. */
2789 return elem;
2790 }
2791 return -1;
2792}
2879 2793
2880 auto inline unsigned int 2794/* Local function for parse_bracket_exp used in _LIBC environment.
2881 __attribute__ ((always_inline)) 2795 Look up the collation sequence value of BR_ELEM.
2882 lookup_collation_sequence_value (bracket_elem_t *br_elem) 2796 Return the value if succeeded, UINT_MAX otherwise. */
2797
2798static __always_inline unsigned int
2799lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules,
2800 const unsigned char *collseqmb,
2801 const char *collseqwc,
2802 int_fast32_t table_size,
2803 const int32_t *symb_table,
2804 const unsigned char *extra)
2805{
2806 if (br_elem->type == SB_CHAR)
2883 { 2807 {
2884 if (br_elem->type == SB_CHAR) 2808 /* if (MB_CUR_MAX == 1) */
2885 { 2809 if (nrules == 0)
2886 /* 2810 return collseqmb[br_elem->opr.ch];
2887 if (MB_CUR_MAX == 1) 2811 else
2888 */
2889 if (nrules == 0)
2890 return collseqmb[br_elem->opr.ch];
2891 else
2892 {
2893 wint_t wc = __btowc (br_elem->opr.ch);
2894 return __collseq_table_lookup (collseqwc, wc);
2895 }
2896 }
2897 else if (br_elem->type == MB_CHAR)
2898 { 2812 {
2899 if (nrules != 0) 2813 wint_t wc = __btowc (br_elem->opr.ch);
2900 return __collseq_table_lookup (collseqwc, br_elem->opr.wch); 2814 return __collseq_table_lookup (collseqwc, wc);
2901 } 2815 }
2902 else if (br_elem->type == COLL_SYM) 2816 }
2817 else if (br_elem->type == MB_CHAR)
2818 {
2819 if (nrules != 0)
2820 return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
2821 }
2822 else if (br_elem->type == COLL_SYM)
2823 {
2824 size_t sym_name_len = strlen ((char *) br_elem->opr.name);
2825 if (nrules != 0)
2903 { 2826 {
2904 size_t sym_name_len = strlen ((char *) br_elem->opr.name); 2827 int32_t elem, idx;
2905 if (nrules != 0) 2828 elem = seek_collating_symbol_entry (br_elem->opr.name,
2829 sym_name_len,
2830 symb_table, table_size,
2831 extra);
2832 if (elem != -1)
2906 { 2833 {
2907 int32_t elem, idx; 2834 /* We found the entry. */
2908 elem = seek_collating_symbol_entry (br_elem->opr.name, 2835 idx = symb_table[2 * elem + 1];
2909 sym_name_len); 2836 /* Skip the name of collating element name. */
2910 if (elem != -1) 2837 idx += 1 + extra[idx];
2911 { 2838 /* Skip the byte sequence of the collating element. */
2912 /* We found the entry. */ 2839 idx += 1 + extra[idx];
2913 idx = symb_table[2 * elem + 1]; 2840 /* Adjust for the alignment. */
2914 /* Skip the name of collating element name. */ 2841 idx = (idx + 3) & ~3;
2915 idx += 1 + extra[idx]; 2842 /* Skip the multibyte collation sequence value. */
2916 /* Skip the byte sequence of the collating element. */ 2843 idx += sizeof (unsigned int);
2917 idx += 1 + extra[idx]; 2844 /* Skip the wide char sequence of the collating element. */
2918 /* Adjust for the alignment. */ 2845 idx += sizeof (unsigned int) *
2919 idx = (idx + 3) & ~3; 2846 (1 + *(unsigned int *) (extra + idx));
2920 /* Skip the multibyte collation sequence value. */ 2847 /* Return the collation sequence value. */
2921 idx += sizeof (unsigned int); 2848 return *(unsigned int *) (extra + idx);
2922 /* Skip the wide char sequence of the collating element. */
2923 idx += sizeof (unsigned int) *
2924 (1 + *(unsigned int *) (extra + idx));
2925 /* Return the collation sequence value. */
2926 return *(unsigned int *) (extra + idx);
2927 }
2928 else if (sym_name_len == 1)
2929 {
2930 /* No valid character. Match it as a single byte
2931 character. */
2932 return collseqmb[br_elem->opr.name[0]];
2933 }
2934 } 2849 }
2935 else if (sym_name_len == 1) 2850 else if (sym_name_len == 1)
2936 return collseqmb[br_elem->opr.name[0]]; 2851 {
2852 /* No valid character. Match it as a single byte
2853 character. */
2854 return collseqmb[br_elem->opr.name[0]];
2855 }
2937 } 2856 }
2938 return UINT_MAX; 2857 else if (sym_name_len == 1)
2858 return collseqmb[br_elem->opr.name[0]];
2939 } 2859 }
2860 return UINT_MAX;
2861}
2940 2862
2941 /* Local function for parse_bracket_exp used in _LIBC environment. 2863/* Local function for parse_bracket_exp used in _LIBC environment.
2942 Build the range expression which starts from START_ELEM, and ends 2864 Build the range expression which starts from START_ELEM, and ends
2943 at END_ELEM. The result are written to MBCSET and SBCSET. 2865 at END_ELEM. The result are written to MBCSET and SBCSET.
2944 RANGE_ALLOC is the allocated size of mbcset->range_starts, and 2866 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2945 mbcset->range_ends, is a pointer argument since we may 2867 mbcset->range_ends, is a pointer argument since we may
2946 update it. */ 2868 update it. */
2869
2870static __always_inline reg_errcode_t
2871build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc,
2872 bracket_elem_t *start_elem, bracket_elem_t *end_elem,
2873 re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules,
2874 const unsigned char *collseqmb, const char *collseqwc,
2875 int_fast32_t table_size, const int32_t *symb_table,
2876 const unsigned char *extra)
2877{
2878 unsigned int ch;
2879 uint32_t start_collseq;
2880 uint32_t end_collseq;
2947 2881
2948 auto inline reg_errcode_t 2882 /* Equivalence Classes and Character Classes can't be a range
2949 __attribute__ ((always_inline)) 2883 start/end. */
2950 build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, 2884 if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
2951 bracket_elem_t *start_elem, bracket_elem_t *end_elem) 2885 || start_elem->type == CHAR_CLASS
2952 { 2886 || end_elem->type == EQUIV_CLASS
2953 unsigned int ch; 2887 || end_elem->type == CHAR_CLASS))
2954 uint32_t start_collseq; 2888 return REG_ERANGE;
2955 uint32_t end_collseq;
2956
2957 /* Equivalence Classes and Character Classes can't be a range
2958 start/end. */
2959 if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
2960 || start_elem->type == CHAR_CLASS
2961 || end_elem->type == EQUIV_CLASS
2962 || end_elem->type == CHAR_CLASS))
2963 return REG_ERANGE;
2964 2889
2965 /* FIXME: Implement rational ranges here, too. */ 2890 /* FIXME: Implement rational ranges here, too. */
2966 start_collseq = lookup_collation_sequence_value (start_elem); 2891 start_collseq = lookup_collation_sequence_value (start_elem, nrules, collseqmb, collseqwc,
2967 end_collseq = lookup_collation_sequence_value (end_elem); 2892 table_size, symb_table, extra);
2968 /* Check start/end collation sequence values. */ 2893 end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb, collseqwc,
2969 if (__glibc_unlikely (start_collseq == UINT_MAX 2894 table_size, symb_table, extra);
2970 || end_collseq == UINT_MAX)) 2895 /* Check start/end collation sequence values. */
2971 return REG_ECOLLATE; 2896 if (__glibc_unlikely (start_collseq == UINT_MAX
2972 if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES) 2897 || end_collseq == UINT_MAX))
2973 && start_collseq > end_collseq)) 2898 return REG_ECOLLATE;
2974 return REG_ERANGE; 2899 if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
2900 && start_collseq > end_collseq))
2901 return REG_ERANGE;
2975 2902
2976 /* Got valid collation sequence values, add them as a new entry. 2903 /* Got valid collation sequence values, add them as a new entry.
2977 However, if we have no collation elements, and the character set 2904 However, if we have no collation elements, and the character set
2978 is single byte, the single byte character set that we 2905 is single byte, the single byte character set that we
2979 build below suffices. */ 2906 build below suffices. */
2980 if (nrules > 0 || dfa->mb_cur_max > 1) 2907 if (nrules > 0 || dfa->mb_cur_max > 1)
2908 {
2909 /* Check the space of the arrays. */
2910 if (__glibc_unlikely (*range_alloc == mbcset->nranges))
2981 { 2911 {
2982 /* Check the space of the arrays. */ 2912 /* There is not enough space, need realloc. */
2983 if (__glibc_unlikely (*range_alloc == mbcset->nranges)) 2913 uint32_t *new_array_start;
2984 { 2914 uint32_t *new_array_end;
2985 /* There is not enough space, need realloc. */ 2915 int new_nranges;
2986 uint32_t *new_array_start;
2987 uint32_t *new_array_end;
2988 Idx new_nranges;
2989
2990 /* +1 in case of mbcset->nranges is 0. */
2991 new_nranges = 2 * mbcset->nranges + 1;
2992 new_array_start = re_realloc (mbcset->range_starts, uint32_t,
2993 new_nranges);
2994 new_array_end = re_realloc (mbcset->range_ends, uint32_t,
2995 new_nranges);
2996
2997 if (__glibc_unlikely (new_array_start == NULL
2998 || new_array_end == NULL))
2999 return REG_ESPACE;
3000 2916
3001 mbcset->range_starts = new_array_start; 2917 /* +1 in case of mbcset->nranges is 0. */
3002 mbcset->range_ends = new_array_end; 2918 new_nranges = 2 * mbcset->nranges + 1;
3003 *range_alloc = new_nranges; 2919 new_array_start = re_realloc (mbcset->range_starts, uint32_t,
3004 } 2920 new_nranges);
2921 new_array_end = re_realloc (mbcset->range_ends, uint32_t,
2922 new_nranges);
3005 2923
3006 mbcset->range_starts[mbcset->nranges] = start_collseq; 2924 if (__glibc_unlikely (new_array_start == NULL
3007 mbcset->range_ends[mbcset->nranges++] = end_collseq; 2925 || new_array_end == NULL))
3008 } 2926 return REG_ESPACE;
3009 2927
3010 /* Build the table for single byte characters. */ 2928 mbcset->range_starts = new_array_start;
3011 for (ch = 0; ch < SBC_MAX; ch++) 2929 mbcset->range_ends = new_array_end;
3012 { 2930 *range_alloc = new_nranges;
3013 uint32_t ch_collseq;
3014 /*
3015 if (MB_CUR_MAX == 1)
3016 */
3017 if (nrules == 0)
3018 ch_collseq = collseqmb[ch];
3019 else
3020 ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
3021 if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
3022 bitset_set (sbcset, ch);
3023 } 2931 }
3024 return REG_NOERROR; 2932
2933 mbcset->range_starts[mbcset->nranges] = start_collseq;
2934 mbcset->range_ends[mbcset->nranges++] = end_collseq;
3025 } 2935 }
3026 2936
3027 /* Local function for parse_bracket_exp used in _LIBC environment. 2937 /* Build the table for single byte characters. */
3028 Build the collating element which is represented by NAME. 2938 for (ch = 0; ch < SBC_MAX; ch++)
3029 The result are written to MBCSET and SBCSET. 2939 {
3030 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a 2940 uint32_t ch_collseq;
3031 pointer argument since we may update it. */ 2941 /* if (MB_CUR_MAX == 1) */
2942 if (nrules == 0)
2943 ch_collseq = collseqmb[ch];
2944 else
2945 ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
2946 if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
2947 bitset_set (sbcset, ch);
2948 }
2949 return REG_NOERROR;
2950}
3032 2951
3033 auto inline reg_errcode_t 2952/* Local function for parse_bracket_exp used in _LIBC environment.
3034 __attribute__ ((always_inline)) 2953 Build the collating element which is represented by NAME.
3035 build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, 2954 The result are written to MBCSET and SBCSET.
3036 Idx *coll_sym_alloc, const unsigned char *name) 2955 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2956 pointer argument since we may update it. */
2957
2958static __always_inline reg_errcode_t
2959build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
2960 Idx *coll_sym_alloc, const unsigned char *name,
2961 uint_fast32_t nrules, int_fast32_t table_size,
2962 const int32_t *symb_table, const unsigned char *extra)
2963{
2964 int32_t elem, idx;
2965 size_t name_len = strlen ((const char *) name);
2966 if (nrules != 0)
3037 { 2967 {
3038 int32_t elem, idx; 2968 elem = seek_collating_symbol_entry (name, name_len, symb_table,
3039 size_t name_len = strlen ((const char *) name); 2969 table_size, extra);
3040 if (nrules != 0) 2970 if (elem != -1)
3041 { 2971 {
3042 elem = seek_collating_symbol_entry (name, name_len); 2972 /* We found the entry. */
3043 if (elem != -1) 2973 idx = symb_table[2 * elem + 1];
3044 { 2974 /* Skip the name of collating element name. */
3045 /* We found the entry. */ 2975 idx += 1 + extra[idx];
3046 idx = symb_table[2 * elem + 1]; 2976 }
3047 /* Skip the name of collating element name. */ 2977 else if (name_len == 1)
3048 idx += 1 + extra[idx]; 2978 {
3049 } 2979 /* No valid character, treat it as a normal
3050 else if (name_len == 1) 2980 character. */
3051 { 2981 bitset_set (sbcset, name[0]);
3052 /* No valid character, treat it as a normal
3053 character. */
3054 bitset_set (sbcset, name[0]);
3055 return REG_NOERROR;
3056 }
3057 else
3058 return REG_ECOLLATE;
3059
3060 /* Got valid collation sequence, add it as a new entry. */
3061 /* Check the space of the arrays. */
3062 if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
3063 {
3064 /* Not enough, realloc it. */
3065 /* +1 in case of mbcset->ncoll_syms is 0. */
3066 Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
3067 /* Use realloc since mbcset->coll_syms is NULL
3068 if *alloc == 0. */
3069 int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
3070 new_coll_sym_alloc);
3071 if (__glibc_unlikely (new_coll_syms == NULL))
3072 return REG_ESPACE;
3073 mbcset->coll_syms = new_coll_syms;
3074 *coll_sym_alloc = new_coll_sym_alloc;
3075 }
3076 mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
3077 return REG_NOERROR; 2982 return REG_NOERROR;
3078 } 2983 }
3079 else 2984 else
2985 return REG_ECOLLATE;
2986
2987 /* Got valid collation sequence, add it as a new entry. */
2988 /* Check the space of the arrays. */
2989 if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
3080 { 2990 {
3081 if (__glibc_unlikely (name_len != 1)) 2991 /* Not enough, realloc it. */
3082 return REG_ECOLLATE; 2992 /* +1 in case of mbcset->ncoll_syms is 0. */
3083 else 2993 int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
3084 { 2994 /* Use realloc since mbcset->coll_syms is NULL
3085 bitset_set (sbcset, name[0]); 2995 if *alloc == 0. */
3086 return REG_NOERROR; 2996 int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
3087 } 2997 new_coll_sym_alloc);
2998 if (__glibc_unlikely (new_coll_syms == NULL))
2999 return REG_ESPACE;
3000 mbcset->coll_syms = new_coll_syms;
3001 *coll_sym_alloc = new_coll_sym_alloc;
3088 } 3002 }
3003 mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
3004 return REG_NOERROR;
3089 } 3005 }
3090#endif 3006 else
3007 {
3008 if (__glibc_unlikely (name_len != 1))
3009 return REG_ECOLLATE;
3010 else
3011 {
3012 bitset_set (sbcset, name[0]);
3013 return REG_NOERROR;
3014 }
3015 }
3016}
3017#endif /* _LIBC */
3018
3019/* This function parse bracket expression like "[abc]", "[a-c]",
3020 "[[.a-a.]]" etc. */
3021
3022static bin_tree_t *
3023parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3024 reg_syntax_t syntax, reg_errcode_t *err)
3025{
3026 const unsigned char *collseqmb = NULL;
3027 const char *collseqwc = NULL;
3028 uint_fast32_t nrules = 0;
3029 int_fast32_t table_size = 0;
3030 const void *symb_table = NULL;
3031 const unsigned char *extra = NULL;
3091 3032
3092 re_token_t br_token; 3033 re_token_t br_token;
3093 re_bitset_ptr_t sbcset; 3034 re_bitset_ptr_t sbcset;
3094#ifdef RE_ENABLE_I18N
3095 re_charset_t *mbcset; 3035 re_charset_t *mbcset;
3096 Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; 3036 Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
3097 Idx equiv_class_alloc = 0, char_class_alloc = 0; 3037 Idx equiv_class_alloc = 0, char_class_alloc = 0;
3098#endif /* not RE_ENABLE_I18N */
3099 bool non_match = false; 3038 bool non_match = false;
3100 bin_tree_t *work_tree; 3039 bin_tree_t *work_tree;
3101 int token_len; 3040 int token_len;
@@ -3111,26 +3050,17 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3111 */ 3050 */
3112 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); 3051 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3113 table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); 3052 table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
3114 symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, 3053 symb_table = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_TABLEMB);
3115 _NL_COLLATE_SYMB_TABLEMB);
3116 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, 3054 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3117 _NL_COLLATE_SYMB_EXTRAMB); 3055 _NL_COLLATE_SYMB_EXTRAMB);
3118 } 3056 }
3119#endif 3057#endif
3120 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); 3058 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3121#ifdef RE_ENABLE_I18N
3122 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); 3059 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3123#endif /* RE_ENABLE_I18N */
3124#ifdef RE_ENABLE_I18N
3125 if (__glibc_unlikely (sbcset == NULL || mbcset == NULL)) 3060 if (__glibc_unlikely (sbcset == NULL || mbcset == NULL))
3126#else
3127 if (__glibc_unlikely (sbcset == NULL))
3128#endif /* RE_ENABLE_I18N */
3129 { 3061 {
3130 re_free (sbcset); 3062 re_free (sbcset);
3131#ifdef RE_ENABLE_I18N
3132 re_free (mbcset); 3063 re_free (mbcset);
3133#endif
3134 *err = REG_ESPACE; 3064 *err = REG_ESPACE;
3135 return NULL; 3065 return NULL;
3136 } 3066 }
@@ -3143,9 +3073,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3143 } 3073 }
3144 if (token->type == OP_NON_MATCH_LIST) 3074 if (token->type == OP_NON_MATCH_LIST)
3145 { 3075 {
3146#ifdef RE_ENABLE_I18N
3147 mbcset->non_match = 1; 3076 mbcset->non_match = 1;
3148#endif /* not RE_ENABLE_I18N */
3149 non_match = true; 3077 non_match = true;
3150 if (syntax & RE_HAT_LISTS_NOT_NEWLINE) 3078 if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
3151 bitset_set (sbcset, '\n'); 3079 bitset_set (sbcset, '\n');
@@ -3228,18 +3156,10 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3228 3156
3229 token_len = peek_token_bracket (token, regexp, syntax); 3157 token_len = peek_token_bracket (token, regexp, syntax);
3230 3158
3231#ifdef _LIBC
3232 *err = build_range_exp (sbcset, mbcset, &range_alloc, 3159 *err = build_range_exp (sbcset, mbcset, &range_alloc,
3233 &start_elem, &end_elem); 3160 &start_elem, &end_elem,
3234#else 3161 dfa, syntax, nrules, collseqmb, collseqwc,
3235# ifdef RE_ENABLE_I18N 3162 table_size, symb_table, extra);
3236 *err = build_range_exp (syntax, sbcset,
3237 dfa->mb_cur_max > 1 ? mbcset : NULL,
3238 &range_alloc, &start_elem, &end_elem);
3239# else
3240 *err = build_range_exp (syntax, sbcset, &start_elem, &end_elem);
3241# endif
3242#endif /* RE_ENABLE_I18N */
3243 if (__glibc_unlikely (*err != REG_NOERROR)) 3163 if (__glibc_unlikely (*err != REG_NOERROR))
3244 goto parse_bracket_exp_free_return; 3164 goto parse_bracket_exp_free_return;
3245 } 3165 }
@@ -3250,7 +3170,6 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3250 case SB_CHAR: 3170 case SB_CHAR:
3251 bitset_set (sbcset, start_elem.opr.ch); 3171 bitset_set (sbcset, start_elem.opr.ch);
3252 break; 3172 break;
3253#ifdef RE_ENABLE_I18N
3254 case MB_CHAR: 3173 case MB_CHAR:
3255 /* Check whether the array has enough space. */ 3174 /* Check whether the array has enough space. */
3256 if (__glibc_unlikely (mbchar_alloc == mbcset->nmbchars)) 3175 if (__glibc_unlikely (mbchar_alloc == mbcset->nmbchars))
@@ -3268,30 +3187,24 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3268 } 3187 }
3269 mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; 3188 mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
3270 break; 3189 break;
3271#endif /* RE_ENABLE_I18N */
3272 case EQUIV_CLASS: 3190 case EQUIV_CLASS:
3273 *err = build_equiv_class (sbcset, 3191 *err = build_equiv_class (sbcset,
3274#ifdef RE_ENABLE_I18N
3275 mbcset, &equiv_class_alloc, 3192 mbcset, &equiv_class_alloc,
3276#endif /* RE_ENABLE_I18N */
3277 start_elem.opr.name); 3193 start_elem.opr.name);
3278 if (__glibc_unlikely (*err != REG_NOERROR)) 3194 if (__glibc_unlikely (*err != REG_NOERROR))
3279 goto parse_bracket_exp_free_return; 3195 goto parse_bracket_exp_free_return;
3280 break; 3196 break;
3281 case COLL_SYM: 3197 case COLL_SYM:
3282 *err = build_collating_symbol (sbcset, 3198 *err = build_collating_symbol (sbcset,
3283#ifdef RE_ENABLE_I18N
3284 mbcset, &coll_sym_alloc, 3199 mbcset, &coll_sym_alloc,
3285#endif /* RE_ENABLE_I18N */ 3200 start_elem.opr.name,
3286 start_elem.opr.name); 3201 nrules, table_size, symb_table, extra);
3287 if (__glibc_unlikely (*err != REG_NOERROR)) 3202 if (__glibc_unlikely (*err != REG_NOERROR))
3288 goto parse_bracket_exp_free_return; 3203 goto parse_bracket_exp_free_return;
3289 break; 3204 break;
3290 case CHAR_CLASS: 3205 case CHAR_CLASS:
3291 *err = build_charclass (regexp->trans, sbcset, 3206 *err = build_charclass (regexp->trans, sbcset,
3292#ifdef RE_ENABLE_I18N
3293 mbcset, &char_class_alloc, 3207 mbcset, &char_class_alloc,
3294#endif /* RE_ENABLE_I18N */
3295 (const char *) start_elem.opr.name, 3208 (const char *) start_elem.opr.name,
3296 syntax); 3209 syntax);
3297 if (__glibc_unlikely (*err != REG_NOERROR)) 3210 if (__glibc_unlikely (*err != REG_NOERROR))
@@ -3317,7 +3230,6 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3317 if (non_match) 3230 if (non_match)
3318 bitset_not (sbcset); 3231 bitset_not (sbcset);
3319 3232
3320#ifdef RE_ENABLE_I18N
3321 /* Ensure only single byte characters are set. */ 3233 /* Ensure only single byte characters are set. */
3322 if (dfa->mb_cur_max > 1) 3234 if (dfa->mb_cur_max > 1)
3323 bitset_mask (sbcset, dfa->sb_char); 3235 bitset_mask (sbcset, dfa->sb_char);
@@ -3361,11 +3273,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3361 } 3273 }
3362 } 3274 }
3363 else 3275 else
3364#endif /* not RE_ENABLE_I18N */
3365 { 3276 {
3366#ifdef RE_ENABLE_I18N
3367 free_charset (mbcset); 3277 free_charset (mbcset);
3368#endif
3369 /* Build a tree for simple bracket. */ 3278 /* Build a tree for simple bracket. */
3370 br_token.type = SIMPLE_BRACKET; 3279 br_token.type = SIMPLE_BRACKET;
3371 br_token.opr.sbcset = sbcset; 3280 br_token.opr.sbcset = sbcset;
@@ -3379,9 +3288,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3379 *err = REG_ESPACE; 3288 *err = REG_ESPACE;
3380 parse_bracket_exp_free_return: 3289 parse_bracket_exp_free_return:
3381 re_free (sbcset); 3290 re_free (sbcset);
3382#ifdef RE_ENABLE_I18N
3383 free_charset (mbcset); 3291 free_charset (mbcset);
3384#endif /* RE_ENABLE_I18N */
3385 return NULL; 3292 return NULL;
3386} 3293}
3387 3294
@@ -3392,7 +3299,6 @@ parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
3392 re_token_t *token, int token_len, re_dfa_t *dfa, 3299 re_token_t *token, int token_len, re_dfa_t *dfa,
3393 reg_syntax_t syntax, bool accept_hyphen) 3300 reg_syntax_t syntax, bool accept_hyphen)
3394{ 3301{
3395#ifdef RE_ENABLE_I18N
3396 int cur_char_size; 3302 int cur_char_size;
3397 cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); 3303 cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
3398 if (cur_char_size > 1) 3304 if (cur_char_size > 1)
@@ -3402,7 +3308,6 @@ parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
3402 re_string_skip_bytes (regexp, cur_char_size); 3308 re_string_skip_bytes (regexp, cur_char_size);
3403 return REG_NOERROR; 3309 return REG_NOERROR;
3404 } 3310 }
3405#endif /* RE_ENABLE_I18N */
3406 re_string_skip_bytes (regexp, token_len); /* Skip a token. */ 3311 re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3407 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS 3312 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
3408 || token->type == OP_OPEN_EQUIV_CLASS) 3313 || token->type == OP_OPEN_EQUIV_CLASS)
@@ -3475,12 +3380,8 @@ parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
3475 is a pointer argument since we may update it. */ 3380 is a pointer argument since we may update it. */
3476 3381
3477static reg_errcode_t 3382static reg_errcode_t
3478#ifdef RE_ENABLE_I18N
3479build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, 3383build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
3480 Idx *equiv_class_alloc, const unsigned char *name) 3384 Idx *equiv_class_alloc, const unsigned char *name)
3481#else /* not RE_ENABLE_I18N */
3482build_equiv_class (bitset_t sbcset, const unsigned char *name)
3483#endif /* not RE_ENABLE_I18N */
3484{ 3385{
3485#ifdef _LIBC 3386#ifdef _LIBC
3486 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3387 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
@@ -3560,14 +3461,9 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
3560 is a pointer argument since we may update it. */ 3461 is a pointer argument since we may update it. */
3561 3462
3562static reg_errcode_t 3463static reg_errcode_t
3563#ifdef RE_ENABLE_I18N
3564build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, 3464build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3565 re_charset_t *mbcset, Idx *char_class_alloc, 3465 re_charset_t *mbcset, Idx *char_class_alloc,
3566 const char *class_name, reg_syntax_t syntax) 3466 const char *class_name, reg_syntax_t syntax)
3567#else /* not RE_ENABLE_I18N */
3568build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3569 const char *class_name, reg_syntax_t syntax)
3570#endif /* not RE_ENABLE_I18N */
3571{ 3467{
3572 int i; 3468 int i;
3573 const char *name = class_name; 3469 const char *name = class_name;
@@ -3578,7 +3474,6 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3578 && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) 3474 && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
3579 name = "alpha"; 3475 name = "alpha";
3580 3476
3581#ifdef RE_ENABLE_I18N
3582 /* Check the space of the arrays. */ 3477 /* Check the space of the arrays. */
3583 if (__glibc_unlikely (*char_class_alloc == mbcset->nchar_classes)) 3478 if (__glibc_unlikely (*char_class_alloc == mbcset->nchar_classes))
3584 { 3479 {
@@ -3594,7 +3489,6 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3594 *char_class_alloc = new_char_class_alloc; 3489 *char_class_alloc = new_char_class_alloc;
3595 } 3490 }
3596 mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); 3491 mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
3597#endif /* RE_ENABLE_I18N */
3598 3492
3599#define BUILD_CHARCLASS_LOOP(ctype_func) \ 3493#define BUILD_CHARCLASS_LOOP(ctype_func) \
3600 do { \ 3494 do { \
@@ -3649,10 +3543,8 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3649 reg_errcode_t *err) 3543 reg_errcode_t *err)
3650{ 3544{
3651 re_bitset_ptr_t sbcset; 3545 re_bitset_ptr_t sbcset;
3652#ifdef RE_ENABLE_I18N
3653 re_charset_t *mbcset; 3546 re_charset_t *mbcset;
3654 Idx alloc = 0; 3547 Idx alloc = 0;
3655#endif /* not RE_ENABLE_I18N */
3656 reg_errcode_t ret; 3548 reg_errcode_t ret;
3657 bin_tree_t *tree; 3549 bin_tree_t *tree;
3658 3550
@@ -3662,7 +3554,6 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3662 *err = REG_ESPACE; 3554 *err = REG_ESPACE;
3663 return NULL; 3555 return NULL;
3664 } 3556 }
3665#ifdef RE_ENABLE_I18N
3666 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); 3557 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3667 if (__glibc_unlikely (mbcset == NULL)) 3558 if (__glibc_unlikely (mbcset == NULL))
3668 { 3559 {
@@ -3671,21 +3562,14 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3671 return NULL; 3562 return NULL;
3672 } 3563 }
3673 mbcset->non_match = non_match; 3564 mbcset->non_match = non_match;
3674#endif /* RE_ENABLE_I18N */
3675 3565
3676 /* We don't care the syntax in this case. */ 3566 /* We don't care the syntax in this case. */
3677 ret = build_charclass (trans, sbcset, 3567 ret = build_charclass (trans, sbcset, mbcset, &alloc, class_name, 0);
3678#ifdef RE_ENABLE_I18N
3679 mbcset, &alloc,
3680#endif /* RE_ENABLE_I18N */
3681 class_name, 0);
3682 3568
3683 if (__glibc_unlikely (ret != REG_NOERROR)) 3569 if (__glibc_unlikely (ret != REG_NOERROR))
3684 { 3570 {
3685 re_free (sbcset); 3571 re_free (sbcset);
3686#ifdef RE_ENABLE_I18N
3687 free_charset (mbcset); 3572 free_charset (mbcset);
3688#endif /* RE_ENABLE_I18N */
3689 *err = ret; 3573 *err = ret;
3690 return NULL; 3574 return NULL;
3691 } 3575 }
@@ -3697,11 +3581,9 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3697 if (non_match) 3581 if (non_match)
3698 bitset_not (sbcset); 3582 bitset_not (sbcset);
3699 3583
3700#ifdef RE_ENABLE_I18N
3701 /* Ensure only single byte characters are set. */ 3584 /* Ensure only single byte characters are set. */
3702 if (dfa->mb_cur_max > 1) 3585 if (dfa->mb_cur_max > 1)
3703 bitset_mask (sbcset, dfa->sb_char); 3586 bitset_mask (sbcset, dfa->sb_char);
3704#endif
3705 3587
3706 /* Build a tree for simple bracket. */ 3588 /* Build a tree for simple bracket. */
3707 re_token_t br_token = { .type = SIMPLE_BRACKET, .opr.sbcset = sbcset }; 3589 re_token_t br_token = { .type = SIMPLE_BRACKET, .opr.sbcset = sbcset };
@@ -3709,7 +3591,6 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3709 if (__glibc_unlikely (tree == NULL)) 3591 if (__glibc_unlikely (tree == NULL))
3710 goto build_word_op_espace; 3592 goto build_word_op_espace;
3711 3593
3712#ifdef RE_ENABLE_I18N
3713 if (dfa->mb_cur_max > 1) 3594 if (dfa->mb_cur_max > 1)
3714 { 3595 {
3715 bin_tree_t *mbc_tree; 3596 bin_tree_t *mbc_tree;
@@ -3730,15 +3611,10 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3730 free_charset (mbcset); 3611 free_charset (mbcset);
3731 return tree; 3612 return tree;
3732 } 3613 }
3733#else /* not RE_ENABLE_I18N */
3734 return tree;
3735#endif /* not RE_ENABLE_I18N */
3736 3614
3737 build_word_op_espace: 3615 build_word_op_espace:
3738 re_free (sbcset); 3616 re_free (sbcset);
3739#ifdef RE_ENABLE_I18N
3740 free_charset (mbcset); 3617 free_charset (mbcset);
3741#endif /* RE_ENABLE_I18N */
3742 *err = REG_ESPACE; 3618 *err = REG_ESPACE;
3743 return NULL; 3619 return NULL;
3744} 3620}
@@ -3771,21 +3647,19 @@ fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
3771 return num; 3647 return num;
3772} 3648}
3773 3649
3774#ifdef RE_ENABLE_I18N
3775static void 3650static void
3776free_charset (re_charset_t *cset) 3651free_charset (re_charset_t *cset)
3777{ 3652{
3778 re_free (cset->mbchars); 3653 re_free (cset->mbchars);
3779# ifdef _LIBC 3654#ifdef _LIBC
3780 re_free (cset->coll_syms); 3655 re_free (cset->coll_syms);
3781 re_free (cset->equiv_classes); 3656 re_free (cset->equiv_classes);
3782# endif 3657#endif
3783 re_free (cset->range_starts); 3658 re_free (cset->range_starts);
3784 re_free (cset->range_ends); 3659 re_free (cset->range_ends);
3785 re_free (cset->char_classes); 3660 re_free (cset->char_classes);
3786 re_free (cset); 3661 re_free (cset);
3787} 3662}
3788#endif /* RE_ENABLE_I18N */
3789 3663
3790/* Functions for binary tree operation. */ 3664/* Functions for binary tree operation. */
3791 3665
@@ -3851,13 +3725,10 @@ mark_opt_subexp (void *extra, bin_tree_t *node)
3851static void 3725static void
3852free_token (re_token_t *node) 3726free_token (re_token_t *node)
3853{ 3727{
3854#ifdef RE_ENABLE_I18N
3855 if (node->type == COMPLEX_BRACKET && node->duplicated == 0) 3728 if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
3856 free_charset (node->opr.mbcset); 3729 free_charset (node->opr.mbcset);
3857 else 3730 else if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
3858#endif /* RE_ENABLE_I18N */ 3731 re_free (node->opr.sbcset);
3859 if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
3860 re_free (node->opr.sbcset);
3861} 3732}
3862 3733
3863/* Worker function for tree walking. Free the allocated memory inside NODE 3734/* Worker function for tree walking. Free the allocated memory inside NODE