summaryrefslogtreecommitdiffstats
path: root/gl/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'gl/regexec.c')
-rw-r--r--gl/regexec.c1169
1 files changed, 490 insertions, 679 deletions
diff --git a/gl/regexec.c b/gl/regexec.c
index d29d442..521cb02 100644
--- a/gl/regexec.c
+++ b/gl/regexec.c
@@ -1,206 +1,170 @@
1/* Extended regular expression matching and search library. 1/* Extended regular expression matching and search library.
2 Copyright (C) 2002-2013 Free Software Foundation, Inc. 2 Copyright (C) 2002-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library. 3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5 5
6 The GNU C Library is free software; you can redistribute it and/or 6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public 7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either 8 License as published by the Free Software Foundation; either
9 version 3 of the License, or (at your option) any later version. 9 version 2.1 of the License, or (at your option) any later version.
10 10
11 The GNU C Library is distributed in the hope that it will be useful, 11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details. 14 Lesser General Public License for more details.
15 15
16 You should have received a copy of the GNU General Public 16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see 17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */ 18 <https://www.gnu.org/licenses/>. */
19 19
20static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, 20static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
21 Idx n) internal_function; 21 Idx n);
22static void match_ctx_clean (re_match_context_t *mctx) internal_function; 22static void match_ctx_clean (re_match_context_t *mctx);
23static void match_ctx_free (re_match_context_t *cache) internal_function; 23static void match_ctx_free (re_match_context_t *cache);
24static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node, 24static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25 Idx str_idx, Idx from, Idx to) 25 Idx str_idx, Idx from, Idx to);
26 internal_function; 26static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
27static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
28 internal_function;
29static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node, 27static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
30 Idx str_idx) internal_function; 28 Idx str_idx);
31static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, 29static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
32 Idx node, Idx str_idx) 30 Idx node, Idx str_idx);
33 internal_function;
34static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, 31static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
35 re_dfastate_t **limited_sts, Idx last_node, 32 re_dfastate_t **limited_sts, Idx last_node,
36 Idx last_str_idx) 33 Idx last_str_idx);
37 internal_function;
38static reg_errcode_t re_search_internal (const regex_t *preg, 34static reg_errcode_t re_search_internal (const regex_t *preg,
39 const char *string, Idx length, 35 const char *string, Idx length,
40 Idx start, Idx last_start, Idx stop, 36 Idx start, Idx last_start, Idx stop,
41 size_t nmatch, regmatch_t pmatch[], 37 size_t nmatch, regmatch_t pmatch[],
42 int eflags) internal_function; 38 int eflags);
43static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp, 39static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
44 const char *string1, Idx length1, 40 const char *string1, Idx length1,
45 const char *string2, Idx length2, 41 const char *string2, Idx length2,
46 Idx start, regoff_t range, 42 Idx start, regoff_t range,
47 struct re_registers *regs, 43 struct re_registers *regs,
48 Idx stop, bool ret_len) internal_function; 44 Idx stop, bool ret_len);
49static regoff_t re_search_stub (struct re_pattern_buffer *bufp, 45static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
50 const char *string, Idx length, Idx start, 46 const char *string, Idx length, Idx start,
51 regoff_t range, Idx stop, 47 regoff_t range, Idx stop,
52 struct re_registers *regs, 48 struct re_registers *regs,
53 bool ret_len) internal_function; 49 bool ret_len);
54static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, 50static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
55 Idx nregs, int regs_allocated) internal_function; 51 Idx nregs, int regs_allocated);
56static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) 52static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
57 internal_function;
58static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match, 53static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
59 Idx *p_match_first) internal_function; 54 Idx *p_match_first);
60static Idx check_halt_state_context (const re_match_context_t *mctx, 55static Idx check_halt_state_context (const re_match_context_t *mctx,
61 const re_dfastate_t *state, Idx idx) 56 const re_dfastate_t *state, Idx idx);
62 internal_function;
63static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, 57static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
64 regmatch_t *prev_idx_match, Idx cur_node, 58 regmatch_t *prev_idx_match, Idx cur_node,
65 Idx cur_idx, Idx nmatch) internal_function; 59 Idx cur_idx, Idx nmatch);
66static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, 60static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
67 Idx str_idx, Idx dest_node, Idx nregs, 61 Idx str_idx, Idx dest_node, Idx nregs,
68 regmatch_t *regs, 62 regmatch_t *regs, regmatch_t *prevregs,
69 re_node_set *eps_via_nodes) 63 re_node_set *eps_via_nodes);
70 internal_function;
71static reg_errcode_t set_regs (const regex_t *preg, 64static reg_errcode_t set_regs (const regex_t *preg,
72 const re_match_context_t *mctx, 65 const re_match_context_t *mctx,
73 size_t nmatch, regmatch_t *pmatch, 66 size_t nmatch, regmatch_t *pmatch,
74 bool fl_backtrack) internal_function; 67 bool fl_backtrack);
75static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) 68static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
76 internal_function;
77 69
78#ifdef RE_ENABLE_I18N
79static int sift_states_iter_mb (const re_match_context_t *mctx, 70static int sift_states_iter_mb (const re_match_context_t *mctx,
80 re_sift_context_t *sctx, 71 re_sift_context_t *sctx,
81 Idx node_idx, Idx str_idx, Idx max_str_idx) 72 Idx node_idx, Idx str_idx, Idx max_str_idx);
82 internal_function;
83#endif /* RE_ENABLE_I18N */
84static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, 73static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
85 re_sift_context_t *sctx) 74 re_sift_context_t *sctx);
86 internal_function;
87static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, 75static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
88 re_sift_context_t *sctx, Idx str_idx, 76 re_sift_context_t *sctx, Idx str_idx,
89 re_node_set *cur_dest) 77 re_node_set *cur_dest);
90 internal_function;
91static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, 78static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
92 re_sift_context_t *sctx, 79 re_sift_context_t *sctx,
93 Idx str_idx, 80 Idx str_idx,
94 re_node_set *dest_nodes) 81 re_node_set *dest_nodes);
95 internal_function;
96static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, 82static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
97 re_node_set *dest_nodes, 83 re_node_set *dest_nodes,
98 const re_node_set *candidates) 84 const re_node_set *candidates);
99 internal_function;
100static bool check_dst_limits (const re_match_context_t *mctx, 85static bool check_dst_limits (const re_match_context_t *mctx,
101 const re_node_set *limits, 86 const re_node_set *limits,
102 Idx dst_node, Idx dst_idx, Idx src_node, 87 Idx dst_node, Idx dst_idx, Idx src_node,
103 Idx src_idx) internal_function; 88 Idx src_idx);
104static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, 89static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
105 int boundaries, Idx subexp_idx, 90 int boundaries, Idx subexp_idx,
106 Idx from_node, Idx bkref_idx) 91 Idx from_node, Idx bkref_idx);
107 internal_function;
108static int check_dst_limits_calc_pos (const re_match_context_t *mctx, 92static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
109 Idx limit, Idx subexp_idx, 93 Idx limit, Idx subexp_idx,
110 Idx node, Idx str_idx, 94 Idx node, Idx str_idx,
111 Idx bkref_idx) internal_function; 95 Idx bkref_idx);
112static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, 96static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
113 re_node_set *dest_nodes, 97 re_node_set *dest_nodes,
114 const re_node_set *candidates, 98 const re_node_set *candidates,
115 re_node_set *limits, 99 re_node_set *limits,
116 struct re_backref_cache_entry *bkref_ents, 100 struct re_backref_cache_entry *bkref_ents,
117 Idx str_idx) internal_function; 101 Idx str_idx);
118static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, 102static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
119 re_sift_context_t *sctx, 103 re_sift_context_t *sctx,
120 Idx str_idx, const re_node_set *candidates) 104 Idx str_idx, const re_node_set *candidates);
121 internal_function;
122static reg_errcode_t merge_state_array (const re_dfa_t *dfa, 105static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
123 re_dfastate_t **dst, 106 re_dfastate_t **dst,
124 re_dfastate_t **src, Idx num) 107 re_dfastate_t **src, Idx num);
125 internal_function;
126static re_dfastate_t *find_recover_state (reg_errcode_t *err, 108static re_dfastate_t *find_recover_state (reg_errcode_t *err,
127 re_match_context_t *mctx) internal_function; 109 re_match_context_t *mctx);
128static re_dfastate_t *transit_state (reg_errcode_t *err, 110static re_dfastate_t *transit_state (reg_errcode_t *err,
129 re_match_context_t *mctx, 111 re_match_context_t *mctx,
130 re_dfastate_t *state) internal_function; 112 re_dfastate_t *state);
131static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, 113static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
132 re_match_context_t *mctx, 114 re_match_context_t *mctx,
133 re_dfastate_t *next_state) 115 re_dfastate_t *next_state);
134 internal_function;
135static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, 116static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
136 re_node_set *cur_nodes, 117 re_node_set *cur_nodes,
137 Idx str_idx) internal_function; 118 Idx str_idx);
138#if 0 119#if 0
139static re_dfastate_t *transit_state_sb (reg_errcode_t *err, 120static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
140 re_match_context_t *mctx, 121 re_match_context_t *mctx,
141 re_dfastate_t *pstate) 122 re_dfastate_t *pstate);
142 internal_function;
143#endif 123#endif
144#ifdef RE_ENABLE_I18N
145static reg_errcode_t transit_state_mb (re_match_context_t *mctx, 124static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
146 re_dfastate_t *pstate) 125 re_dfastate_t *pstate);
147 internal_function;
148#endif /* RE_ENABLE_I18N */
149static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, 126static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
150 const re_node_set *nodes) 127 const re_node_set *nodes);
151 internal_function;
152static reg_errcode_t get_subexp (re_match_context_t *mctx, 128static reg_errcode_t get_subexp (re_match_context_t *mctx,
153 Idx bkref_node, Idx bkref_str_idx) 129 Idx bkref_node, Idx bkref_str_idx);
154 internal_function;
155static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, 130static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
156 const re_sub_match_top_t *sub_top, 131 const re_sub_match_top_t *sub_top,
157 re_sub_match_last_t *sub_last, 132 re_sub_match_last_t *sub_last,
158 Idx bkref_node, Idx bkref_str) 133 Idx bkref_node, Idx bkref_str);
159 internal_function;
160static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, 134static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
161 Idx subexp_idx, int type) internal_function; 135 Idx subexp_idx, int type);
162static reg_errcode_t check_arrival (re_match_context_t *mctx, 136static reg_errcode_t check_arrival (re_match_context_t *mctx,
163 state_array_t *path, Idx top_node, 137 state_array_t *path, Idx top_node,
164 Idx top_str, Idx last_node, Idx last_str, 138 Idx top_str, Idx last_node, Idx last_str,
165 int type) internal_function; 139 int type);
166static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, 140static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
167 Idx str_idx, 141 Idx str_idx,
168 re_node_set *cur_nodes, 142 re_node_set *cur_nodes,
169 re_node_set *next_nodes) 143 re_node_set *next_nodes);
170 internal_function;
171static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, 144static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
172 re_node_set *cur_nodes, 145 re_node_set *cur_nodes,
173 Idx ex_subexp, int type) 146 Idx ex_subexp, int type);
174 internal_function;
175static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, 147static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
176 re_node_set *dst_nodes, 148 re_node_set *dst_nodes,
177 Idx target, Idx ex_subexp, 149 Idx target, Idx ex_subexp,
178 int type) internal_function; 150 int type);
179static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, 151static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
180 re_node_set *cur_nodes, Idx cur_str, 152 re_node_set *cur_nodes, Idx cur_str,
181 Idx subexp_num, int type) 153 Idx subexp_num, int type);
182 internal_function; 154static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
183static bool build_trtable (const re_dfa_t *dfa,
184 re_dfastate_t *state) internal_function;
185#ifdef RE_ENABLE_I18N
186static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, 155static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
187 const re_string_t *input, Idx idx) 156 const re_string_t *input, Idx idx);
188 internal_function; 157#ifdef _LIBC
189# ifdef _LIBC
190static unsigned int find_collation_sequence_value (const unsigned char *mbs, 158static unsigned int find_collation_sequence_value (const unsigned char *mbs,
191 size_t name_len) 159 size_t name_len);
192 internal_function; 160#endif
193# endif /* _LIBC */
194#endif /* RE_ENABLE_I18N */
195static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, 161static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
196 const re_dfastate_t *state, 162 const re_dfastate_t *state,
197 re_node_set *states_node, 163 re_node_set *states_node,
198 bitset_t *states_ch) internal_function; 164 bitset_t *states_ch);
199static bool check_node_accept (const re_match_context_t *mctx, 165static bool check_node_accept (const re_match_context_t *mctx,
200 const re_token_t *node, Idx idx) 166 const re_token_t *node, Idx idx);
201 internal_function; 167static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
202static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
203 internal_function;
204 168
205/* Entry point for POSIX code. */ 169/* Entry point for POSIX code. */
206 170
@@ -216,15 +180,12 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
216 REG_NOTBOL is set, then ^ does not match at the beginning of the 180 REG_NOTBOL is set, then ^ does not match at the beginning of the
217 string; if REG_NOTEOL is set, then $ does not match at the end. 181 string; if REG_NOTEOL is set, then $ does not match at the end.
218 182
219 We return 0 if we find a match and REG_NOMATCH if not. */ 183 Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if
184 EFLAGS is invalid. */
220 185
221int 186int
222regexec (preg, string, nmatch, pmatch, eflags) 187regexec (const regex_t *__restrict preg, const char *__restrict string,
223 const regex_t *_Restrict_ preg; 188 size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
224 const char *_Restrict_ string;
225 size_t nmatch;
226 regmatch_t pmatch[_Restrict_arr_];
227 int eflags;
228{ 189{
229 reg_errcode_t err; 190 reg_errcode_t err;
230 Idx start, length; 191 Idx start, length;
@@ -256,6 +217,8 @@ regexec (preg, string, nmatch, pmatch, eflags)
256} 217}
257 218
258#ifdef _LIBC 219#ifdef _LIBC
220libc_hidden_def (__regexec)
221
259# include <shlib-compat.h> 222# include <shlib-compat.h>
260versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4); 223versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
261 224
@@ -264,9 +227,9 @@ __typeof__ (__regexec) __compat_regexec;
264 227
265int 228int
266attribute_compat_text_section 229attribute_compat_text_section
267__compat_regexec (const regex_t *_Restrict_ preg, 230__compat_regexec (const regex_t *__restrict preg,
268 const char *_Restrict_ string, size_t nmatch, 231 const char *__restrict string, size_t nmatch,
269 regmatch_t pmatch[], int eflags) 232 regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
270{ 233{
271 return regexec (preg, string, nmatch, pmatch, 234 return regexec (preg, string, nmatch, pmatch,
272 eflags & (REG_NOTBOL | REG_NOTEOL)); 235 eflags & (REG_NOTBOL | REG_NOTEOL));
@@ -301,15 +264,12 @@ compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
301 strings.) 264 strings.)
302 265
303 On success, re_match* functions return the length of the match, re_search* 266 On success, re_match* functions return the length of the match, re_search*
304 return the position of the start of the match. Return value -1 means no 267 return the position of the start of the match. They return -1 on
305 match was found and -2 indicates an internal error. */ 268 match failure, -2 on error. */
306 269
307regoff_t 270regoff_t
308re_match (bufp, string, length, start, regs) 271re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
309 struct re_pattern_buffer *bufp; 272 Idx start, struct re_registers *regs)
310 const char *string;
311 Idx length, start;
312 struct re_registers *regs;
313{ 273{
314 return re_search_stub (bufp, string, length, start, 0, length, regs, true); 274 return re_search_stub (bufp, string, length, start, 0, length, regs, true);
315} 275}
@@ -318,12 +278,8 @@ weak_alias (__re_match, re_match)
318#endif 278#endif
319 279
320regoff_t 280regoff_t
321re_search (bufp, string, length, start, range, regs) 281re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
322 struct re_pattern_buffer *bufp; 282 Idx start, regoff_t range, struct re_registers *regs)
323 const char *string;
324 Idx length, start;
325 regoff_t range;
326 struct re_registers *regs;
327{ 283{
328 return re_search_stub (bufp, string, length, start, range, length, regs, 284 return re_search_stub (bufp, string, length, start, range, length, regs,
329 false); 285 false);
@@ -333,11 +289,9 @@ weak_alias (__re_search, re_search)
333#endif 289#endif
334 290
335regoff_t 291regoff_t
336re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop) 292re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
337 struct re_pattern_buffer *bufp; 293 const char *string2, Idx length2, Idx start,
338 const char *string1, *string2; 294 struct re_registers *regs, Idx stop)
339 Idx length1, length2, start, stop;
340 struct re_registers *regs;
341{ 295{
342 return re_search_2_stub (bufp, string1, length1, string2, length2, 296 return re_search_2_stub (bufp, string1, length1, string2, length2,
343 start, 0, regs, stop, true); 297 start, 0, regs, stop, true);
@@ -347,12 +301,9 @@ weak_alias (__re_match_2, re_match_2)
347#endif 301#endif
348 302
349regoff_t 303regoff_t
350re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop) 304re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
351 struct re_pattern_buffer *bufp; 305 const char *string2, Idx length2, Idx start, regoff_t range,
352 const char *string1, *string2; 306 struct re_registers *regs, Idx stop)
353 Idx length1, length2, start, stop;
354 regoff_t range;
355 struct re_registers *regs;
356{ 307{
357 return re_search_2_stub (bufp, string1, length1, string2, length2, 308 return re_search_2_stub (bufp, string1, length1, string2, length2,
358 start, range, regs, stop, false); 309 start, range, regs, stop, false);
@@ -362,18 +313,18 @@ weak_alias (__re_search_2, re_search_2)
362#endif 313#endif
363 314
364static regoff_t 315static regoff_t
365re_search_2_stub (struct re_pattern_buffer *bufp, 316re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
366 const char *string1, Idx length1, 317 Idx length1, const char *string2, Idx length2, Idx start,
367 const char *string2, Idx length2, 318 regoff_t range, struct re_registers *regs,
368 Idx start, regoff_t range, struct re_registers *regs,
369 Idx stop, bool ret_len) 319 Idx stop, bool ret_len)
370{ 320{
371 const char *str; 321 const char *str;
372 regoff_t rval; 322 regoff_t rval;
373 Idx len = length1 + length2; 323 Idx len;
374 char *s = NULL; 324 char *s = NULL;
375 325
376 if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0)) 326 if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0
327 || INT_ADD_WRAPV (length1, length2, &len))))
377 return -2; 328 return -2;
378 329
379 /* Concatenate the strings. */ 330 /* Concatenate the strings. */
@@ -382,7 +333,7 @@ re_search_2_stub (struct re_pattern_buffer *bufp,
382 { 333 {
383 s = re_malloc (char, len); 334 s = re_malloc (char, len);
384 335
385 if (BE (s == NULL, 0)) 336 if (__glibc_unlikely (s == NULL))
386 return -2; 337 return -2;
387#ifdef _LIBC 338#ifdef _LIBC
388 memcpy (__mempcpy (s, string1, length1), string2, length2); 339 memcpy (__mempcpy (s, string1, length1), string2, length2);
@@ -409,8 +360,7 @@ re_search_2_stub (struct re_pattern_buffer *bufp,
409 otherwise the position of the match is returned. */ 360 otherwise the position of the match is returned. */
410 361
411static regoff_t 362static regoff_t
412re_search_stub (struct re_pattern_buffer *bufp, 363re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
413 const char *string, Idx length,
414 Idx start, regoff_t range, Idx stop, struct re_registers *regs, 364 Idx start, regoff_t range, Idx stop, struct re_registers *regs,
415 bool ret_len) 365 bool ret_len)
416{ 366{
@@ -423,11 +373,13 @@ re_search_stub (struct re_pattern_buffer *bufp,
423 Idx last_start = start + range; 373 Idx last_start = start + range;
424 374
425 /* Check for out-of-range. */ 375 /* Check for out-of-range. */
426 if (BE (start < 0 || start > length, 0)) 376 if (__glibc_unlikely (start < 0 || start > length))
427 return -1; 377 return -1;
428 if (BE (length < last_start || (0 <= range && last_start < start), 0)) 378 if (__glibc_unlikely (length < last_start
379 || (0 <= range && last_start < start)))
429 last_start = length; 380 last_start = length;
430 else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0)) 381 else if (__glibc_unlikely (last_start < 0
382 || (range < 0 && start <= last_start)))
431 last_start = 0; 383 last_start = 0;
432 384
433 lock_lock (dfa->lock); 385 lock_lock (dfa->lock);
@@ -439,17 +391,17 @@ re_search_stub (struct re_pattern_buffer *bufp,
439 if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate) 391 if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
440 re_compile_fastmap (bufp); 392 re_compile_fastmap (bufp);
441 393
442 if (BE (bufp->no_sub, 0)) 394 if (__glibc_unlikely (bufp->no_sub))
443 regs = NULL; 395 regs = NULL;
444 396
445 /* We need at least 1 register. */ 397 /* We need at least 1 register. */
446 if (regs == NULL) 398 if (regs == NULL)
447 nregs = 1; 399 nregs = 1;
448 else if (BE (bufp->regs_allocated == REGS_FIXED 400 else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
449 && regs->num_regs <= bufp->re_nsub, 0)) 401 && regs->num_regs <= bufp->re_nsub))
450 { 402 {
451 nregs = regs->num_regs; 403 nregs = regs->num_regs;
452 if (BE (nregs < 1, 0)) 404 if (__glibc_unlikely (nregs < 1))
453 { 405 {
454 /* Nothing can be copied to regs. */ 406 /* Nothing can be copied to regs. */
455 regs = NULL; 407 regs = NULL;
@@ -459,7 +411,7 @@ re_search_stub (struct re_pattern_buffer *bufp,
459 else 411 else
460 nregs = bufp->re_nsub + 1; 412 nregs = bufp->re_nsub + 1;
461 pmatch = re_malloc (regmatch_t, nregs); 413 pmatch = re_malloc (regmatch_t, nregs);
462 if (BE (pmatch == NULL, 0)) 414 if (__glibc_unlikely (pmatch == NULL))
463 { 415 {
464 rval = -2; 416 rval = -2;
465 goto out; 417 goto out;
@@ -478,15 +430,15 @@ re_search_stub (struct re_pattern_buffer *bufp,
478 /* If caller wants register contents data back, copy them. */ 430 /* If caller wants register contents data back, copy them. */
479 bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, 431 bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
480 bufp->regs_allocated); 432 bufp->regs_allocated);
481 if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) 433 if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
482 rval = -2; 434 rval = -2;
483 } 435 }
484 436
485 if (BE (rval == 0, 1)) 437 if (__glibc_likely (rval == 0))
486 { 438 {
487 if (ret_len) 439 if (ret_len)
488 { 440 {
489 assert (pmatch[0].rm_so == start); 441 DEBUG_ASSERT (pmatch[0].rm_so == start);
490 rval = pmatch[0].rm_eo - start; 442 rval = pmatch[0].rm_eo - start;
491 } 443 }
492 else 444 else
@@ -512,10 +464,10 @@ re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
512 if (regs_allocated == REGS_UNALLOCATED) 464 if (regs_allocated == REGS_UNALLOCATED)
513 { /* No. So allocate them with malloc. */ 465 { /* No. So allocate them with malloc. */
514 regs->start = re_malloc (regoff_t, need_regs); 466 regs->start = re_malloc (regoff_t, need_regs);
515 if (BE (regs->start == NULL, 0)) 467 if (__glibc_unlikely (regs->start == NULL))
516 return REGS_UNALLOCATED; 468 return REGS_UNALLOCATED;
517 regs->end = re_malloc (regoff_t, need_regs); 469 regs->end = re_malloc (regoff_t, need_regs);
518 if (BE (regs->end == NULL, 0)) 470 if (__glibc_unlikely (regs->end == NULL))
519 { 471 {
520 re_free (regs->start); 472 re_free (regs->start);
521 return REGS_UNALLOCATED; 473 return REGS_UNALLOCATED;
@@ -526,14 +478,14 @@ re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
526 { /* Yes. If we need more elements than were already 478 { /* Yes. If we need more elements than were already
527 allocated, reallocate them. If we need fewer, just 479 allocated, reallocate them. If we need fewer, just
528 leave it alone. */ 480 leave it alone. */
529 if (BE (need_regs > regs->num_regs, 0)) 481 if (__glibc_unlikely (need_regs > regs->num_regs))
530 { 482 {
531 regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); 483 regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
532 regoff_t *new_end; 484 regoff_t *new_end;
533 if (BE (new_start == NULL, 0)) 485 if (__glibc_unlikely (new_start == NULL))
534 return REGS_UNALLOCATED; 486 return REGS_UNALLOCATED;
535 new_end = re_realloc (regs->end, regoff_t, need_regs); 487 new_end = re_realloc (regs->end, regoff_t, need_regs);
536 if (BE (new_end == NULL, 0)) 488 if (__glibc_unlikely (new_end == NULL))
537 { 489 {
538 re_free (new_start); 490 re_free (new_start);
539 return REGS_UNALLOCATED; 491 return REGS_UNALLOCATED;
@@ -545,9 +497,9 @@ re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
545 } 497 }
546 else 498 else
547 { 499 {
548 assert (regs_allocated == REGS_FIXED); 500 DEBUG_ASSERT (regs_allocated == REGS_FIXED);
549 /* This function may not be called with REGS_FIXED and nregs too big. */ 501 /* This function may not be called with REGS_FIXED and nregs too big. */
550 assert (regs->num_regs >= nregs); 502 DEBUG_ASSERT (nregs <= regs->num_regs);
551 rval = REGS_FIXED; 503 rval = REGS_FIXED;
552 } 504 }
553 505
@@ -577,11 +529,8 @@ re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
577 freeing the old data. */ 529 freeing the old data. */
578 530
579void 531void
580re_set_registers (bufp, regs, num_regs, starts, ends) 532re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
581 struct re_pattern_buffer *bufp; 533 __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
582 struct re_registers *regs;
583 __re_size_t num_regs;
584 regoff_t *starts, *ends;
585{ 534{
586 if (num_regs) 535 if (num_regs)
587 { 536 {
@@ -609,8 +558,7 @@ int
609# ifdef _LIBC 558# ifdef _LIBC
610weak_function 559weak_function
611# endif 560# endif
612re_exec (s) 561re_exec (const char *s)
613 const char *s;
614{ 562{
615 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); 563 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
616} 564}
@@ -629,11 +577,9 @@ re_exec (s)
629 577
630static reg_errcode_t 578static reg_errcode_t
631__attribute_warn_unused_result__ 579__attribute_warn_unused_result__
632re_search_internal (const regex_t *preg, 580re_search_internal (const regex_t *preg, const char *string, Idx length,
633 const char *string, Idx length, 581 Idx start, Idx last_start, Idx stop, size_t nmatch,
634 Idx start, Idx last_start, Idx stop, 582 regmatch_t pmatch[], int eflags)
635 size_t nmatch, regmatch_t pmatch[],
636 int eflags)
637{ 583{
638 reg_errcode_t err; 584 reg_errcode_t err;
639 const re_dfa_t *dfa = preg->buffer; 585 const re_dfa_t *dfa = preg->buffer;
@@ -642,38 +588,28 @@ re_search_internal (const regex_t *preg,
642 bool fl_longest_match; 588 bool fl_longest_match;
643 int match_kind; 589 int match_kind;
644 Idx match_first; 590 Idx match_first;
645 Idx match_last = REG_MISSING; 591 Idx match_last = -1;
646 Idx extra_nmatch; 592 Idx extra_nmatch;
647 bool sb; 593 bool sb;
648 int ch; 594 int ch;
649#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
650 re_match_context_t mctx = { .dfa = dfa }; 595 re_match_context_t mctx = { .dfa = dfa };
651#else
652 re_match_context_t mctx;
653#endif
654 char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate 596 char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
655 && start != last_start && !preg->can_be_null) 597 && start != last_start && !preg->can_be_null)
656 ? preg->fastmap : NULL); 598 ? preg->fastmap : NULL);
657 RE_TRANSLATE_TYPE t = preg->translate; 599 RE_TRANSLATE_TYPE t = preg->translate;
658 600
659#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
660 memset (&mctx, '\0', sizeof (re_match_context_t));
661 mctx.dfa = dfa;
662#endif
663
664 extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; 601 extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
665 nmatch -= extra_nmatch; 602 nmatch -= extra_nmatch;
666 603
667 /* Check if the DFA haven't been compiled. */ 604 /* Check if the DFA haven't been compiled. */
668 if (BE (preg->used == 0 || dfa->init_state == NULL 605 if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL
669 || dfa->init_state_word == NULL || dfa->init_state_nl == NULL 606 || dfa->init_state_word == NULL
670 || dfa->init_state_begbuf == NULL, 0)) 607 || dfa->init_state_nl == NULL
608 || dfa->init_state_begbuf == NULL))
671 return REG_NOMATCH; 609 return REG_NOMATCH;
672 610
673#ifdef DEBUG
674 /* We assume front-end functions already check them. */ 611 /* We assume front-end functions already check them. */
675 assert (0 <= last_start && last_start <= length); 612 DEBUG_ASSERT (0 <= last_start && last_start <= length);
676#endif
677 613
678 /* If initial states with non-begbuf contexts have no elements, 614 /* If initial states with non-begbuf contexts have no elements,
679 the regex must be anchored. If preg->newline_anchor is set, 615 the regex must be anchored. If preg->newline_anchor is set,
@@ -694,14 +630,14 @@ re_search_internal (const regex_t *preg,
694 err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, 630 err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
695 preg->translate, (preg->syntax & RE_ICASE) != 0, 631 preg->translate, (preg->syntax & RE_ICASE) != 0,
696 dfa); 632 dfa);
697 if (BE (err != REG_NOERROR, 0)) 633 if (__glibc_unlikely (err != REG_NOERROR))
698 goto free_return; 634 goto free_return;
699 mctx.input.stop = stop; 635 mctx.input.stop = stop;
700 mctx.input.raw_stop = stop; 636 mctx.input.raw_stop = stop;
701 mctx.input.newline_anchor = preg->newline_anchor; 637 mctx.input.newline_anchor = preg->newline_anchor;
702 638
703 err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); 639 err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
704 if (BE (err != REG_NOERROR, 0)) 640 if (__glibc_unlikely (err != REG_NOERROR))
705 goto free_return; 641 goto free_return;
706 642
707 /* We will log all the DFA states through which the dfa pass, 643 /* We will log all the DFA states through which the dfa pass,
@@ -711,22 +647,20 @@ re_search_internal (const regex_t *preg,
711 if (nmatch > 1 || dfa->has_mb_node) 647 if (nmatch > 1 || dfa->has_mb_node)
712 { 648 {
713 /* Avoid overflow. */ 649 /* Avoid overflow. */
714 if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) 650 if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
715 <= mctx.input.bufs_len), 0)) 651 <= mctx.input.bufs_len)))
716 { 652 {
717 err = REG_ESPACE; 653 err = REG_ESPACE;
718 goto free_return; 654 goto free_return;
719 } 655 }
720 656
721 mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); 657 mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
722 if (BE (mctx.state_log == NULL, 0)) 658 if (__glibc_unlikely (mctx.state_log == NULL))
723 { 659 {
724 err = REG_ESPACE; 660 err = REG_ESPACE;
725 goto free_return; 661 goto free_return;
726 } 662 }
727 } 663 }
728 else
729 mctx.state_log = NULL;
730 664
731 match_first = start; 665 match_first = start;
732 mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF 666 mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
@@ -763,19 +697,19 @@ re_search_internal (const regex_t *preg,
763 697
764 case 7: 698 case 7:
765 /* Fastmap with single-byte translation, match forward. */ 699 /* Fastmap with single-byte translation, match forward. */
766 while (BE (match_first < right_lim, 1) 700 while (__glibc_likely (match_first < right_lim)
767 && !fastmap[t[(unsigned char) string[match_first]]]) 701 && !fastmap[t[(unsigned char) string[match_first]]])
768 ++match_first; 702 ++match_first;
769 goto forward_match_found_start_or_reached_end; 703 goto forward_match_found_start_or_reached_end;
770 704
771 case 6: 705 case 6:
772 /* Fastmap without translation, match forward. */ 706 /* Fastmap without translation, match forward. */
773 while (BE (match_first < right_lim, 1) 707 while (__glibc_likely (match_first < right_lim)
774 && !fastmap[(unsigned char) string[match_first]]) 708 && !fastmap[(unsigned char) string[match_first]])
775 ++match_first; 709 ++match_first;
776 710
777 forward_match_found_start_or_reached_end: 711 forward_match_found_start_or_reached_end:
778 if (BE (match_first == right_lim, 0)) 712 if (__glibc_unlikely (match_first == right_lim))
779 { 713 {
780 ch = match_first >= length 714 ch = match_first >= length
781 ? 0 : (unsigned char) string[match_first]; 715 ? 0 : (unsigned char) string[match_first];
@@ -808,19 +742,19 @@ re_search_internal (const regex_t *preg,
808 /* If MATCH_FIRST is out of the valid range, reconstruct the 742 /* If MATCH_FIRST is out of the valid range, reconstruct the
809 buffers. */ 743 buffers. */
810 __re_size_t offset = match_first - mctx.input.raw_mbs_idx; 744 __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
811 if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0)) 745 if (__glibc_unlikely (offset
746 >= (__re_size_t) mctx.input.valid_raw_len))
812 { 747 {
813 err = re_string_reconstruct (&mctx.input, match_first, 748 err = re_string_reconstruct (&mctx.input, match_first,
814 eflags); 749 eflags);
815 if (BE (err != REG_NOERROR, 0)) 750 if (__glibc_unlikely (err != REG_NOERROR))
816 goto free_return; 751 goto free_return;
817 752
818 offset = match_first - mctx.input.raw_mbs_idx; 753 offset = match_first - mctx.input.raw_mbs_idx;
819 } 754 }
820 /* If MATCH_FIRST is out of the buffer, leave it as '\0'. 755 /* Use buffer byte if OFFSET is in buffer, otherwise '\0'. */
821 Note that MATCH_FIRST must not be smaller than 0. */ 756 ch = (offset < mctx.input.valid_len
822 ch = (match_first >= length 757 ? re_string_byte_at (&mctx.input, offset) : 0);
823 ? 0 : re_string_byte_at (&mctx.input, offset));
824 if (fastmap[ch]) 758 if (fastmap[ch])
825 break; 759 break;
826 match_first += incr; 760 match_first += incr;
@@ -836,24 +770,22 @@ re_search_internal (const regex_t *preg,
836 /* Reconstruct the buffers so that the matcher can assume that 770 /* Reconstruct the buffers so that the matcher can assume that
837 the matching starts from the beginning of the buffer. */ 771 the matching starts from the beginning of the buffer. */
838 err = re_string_reconstruct (&mctx.input, match_first, eflags); 772 err = re_string_reconstruct (&mctx.input, match_first, eflags);
839 if (BE (err != REG_NOERROR, 0)) 773 if (__glibc_unlikely (err != REG_NOERROR))
840 goto free_return; 774 goto free_return;
841 775
842#ifdef RE_ENABLE_I18N 776 /* Don't consider this char as a possible match start if it part,
843 /* Don't consider this char as a possible match start if it part, 777 yet isn't the head, of a multibyte character. */
844 yet isn't the head, of a multibyte character. */
845 if (!sb && !re_string_first_byte (&mctx.input, 0)) 778 if (!sb && !re_string_first_byte (&mctx.input, 0))
846 continue; 779 continue;
847#endif
848 780
849 /* It seems to be appropriate one, then use the matcher. */ 781 /* It seems to be appropriate one, then use the matcher. */
850 /* We assume that the matching starts from 0. */ 782 /* We assume that the matching starts from 0. */
851 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; 783 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
852 match_last = check_matching (&mctx, fl_longest_match, 784 match_last = check_matching (&mctx, fl_longest_match,
853 start <= last_start ? &match_first : NULL); 785 start <= last_start ? &match_first : NULL);
854 if (match_last != REG_MISSING) 786 if (match_last != -1)
855 { 787 {
856 if (BE (match_last == REG_ERROR, 0)) 788 if (__glibc_unlikely (match_last == -2))
857 { 789 {
858 err = REG_ESPACE; 790 err = REG_ESPACE;
859 goto free_return; 791 goto free_return;
@@ -873,9 +805,9 @@ re_search_internal (const regex_t *preg,
873 err = prune_impossible_nodes (&mctx); 805 err = prune_impossible_nodes (&mctx);
874 if (err == REG_NOERROR) 806 if (err == REG_NOERROR)
875 break; 807 break;
876 if (BE (err != REG_NOMATCH, 0)) 808 if (__glibc_unlikely (err != REG_NOMATCH))
877 goto free_return; 809 goto free_return;
878 match_last = REG_MISSING; 810 match_last = -1;
879 } 811 }
880 else 812 else
881 break; /* We found a match. */ 813 break; /* We found a match. */
@@ -885,10 +817,8 @@ re_search_internal (const regex_t *preg,
885 match_ctx_clean (&mctx); 817 match_ctx_clean (&mctx);
886 } 818 }
887 819
888#ifdef DEBUG 820 DEBUG_ASSERT (match_last != -1);
889 assert (match_last != REG_MISSING); 821 DEBUG_ASSERT (err == REG_NOERROR);
890 assert (err == REG_NOERROR);
891#endif
892 822
893 /* Set pmatch[] if we need. */ 823 /* Set pmatch[] if we need. */
894 if (nmatch > 0) 824 if (nmatch > 0)
@@ -910,7 +840,7 @@ re_search_internal (const regex_t *preg,
910 { 840 {
911 err = set_regs (preg, &mctx, nmatch, pmatch, 841 err = set_regs (preg, &mctx, nmatch, pmatch,
912 dfa->has_plural_match && dfa->nbackref > 0); 842 dfa->has_plural_match && dfa->nbackref > 0);
913 if (BE (err != REG_NOERROR, 0)) 843 if (__glibc_unlikely (err != REG_NOERROR))
914 goto free_return; 844 goto free_return;
915 } 845 }
916 846
@@ -920,8 +850,7 @@ re_search_internal (const regex_t *preg,
920 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) 850 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
921 if (pmatch[reg_idx].rm_so != -1) 851 if (pmatch[reg_idx].rm_so != -1)
922 { 852 {
923#ifdef RE_ENABLE_I18N 853 if (__glibc_unlikely (mctx.input.offsets_needed != 0))
924 if (BE (mctx.input.offsets_needed != 0, 0))
925 { 854 {
926 pmatch[reg_idx].rm_so = 855 pmatch[reg_idx].rm_so =
927 (pmatch[reg_idx].rm_so == mctx.input.valid_len 856 (pmatch[reg_idx].rm_so == mctx.input.valid_len
@@ -932,9 +861,6 @@ re_search_internal (const regex_t *preg,
932 ? mctx.input.valid_raw_len 861 ? mctx.input.valid_raw_len
933 : mctx.input.offsets[pmatch[reg_idx].rm_eo]); 862 : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
934 } 863 }
935#else
936 assert (mctx.input.offsets_needed == 0);
937#endif
938 pmatch[reg_idx].rm_so += match_first; 864 pmatch[reg_idx].rm_so += match_first;
939 pmatch[reg_idx].rm_eo += match_first; 865 pmatch[reg_idx].rm_eo += match_first;
940 } 866 }
@@ -973,18 +899,17 @@ prune_impossible_nodes (re_match_context_t *mctx)
973 re_dfastate_t **sifted_states; 899 re_dfastate_t **sifted_states;
974 re_dfastate_t **lim_states = NULL; 900 re_dfastate_t **lim_states = NULL;
975 re_sift_context_t sctx; 901 re_sift_context_t sctx;
976#ifdef DEBUG 902 DEBUG_ASSERT (mctx->state_log != NULL);
977 assert (mctx->state_log != NULL);
978#endif
979 match_last = mctx->match_last; 903 match_last = mctx->match_last;
980 halt_node = mctx->last_node; 904 halt_node = mctx->last_node;
981 905
982 /* Avoid overflow. */ 906 /* Avoid overflow. */
983 if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0)) 907 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
908 <= match_last))
984 return REG_ESPACE; 909 return REG_ESPACE;
985 910
986 sifted_states = re_malloc (re_dfastate_t *, match_last + 1); 911 sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
987 if (BE (sifted_states == NULL, 0)) 912 if (__glibc_unlikely (sifted_states == NULL))
988 { 913 {
989 ret = REG_ESPACE; 914 ret = REG_ESPACE;
990 goto free_return; 915 goto free_return;
@@ -992,7 +917,7 @@ prune_impossible_nodes (re_match_context_t *mctx)
992 if (dfa->nbackref) 917 if (dfa->nbackref)
993 { 918 {
994 lim_states = re_malloc (re_dfastate_t *, match_last + 1); 919 lim_states = re_malloc (re_dfastate_t *, match_last + 1);
995 if (BE (lim_states == NULL, 0)) 920 if (__glibc_unlikely (lim_states == NULL))
996 { 921 {
997 ret = REG_ESPACE; 922 ret = REG_ESPACE;
998 goto free_return; 923 goto free_return;
@@ -1005,14 +930,14 @@ prune_impossible_nodes (re_match_context_t *mctx)
1005 match_last); 930 match_last);
1006 ret = sift_states_backward (mctx, &sctx); 931 ret = sift_states_backward (mctx, &sctx);
1007 re_node_set_free (&sctx.limits); 932 re_node_set_free (&sctx.limits);
1008 if (BE (ret != REG_NOERROR, 0)) 933 if (__glibc_unlikely (ret != REG_NOERROR))
1009 goto free_return; 934 goto free_return;
1010 if (sifted_states[0] != NULL || lim_states[0] != NULL) 935 if (sifted_states[0] != NULL || lim_states[0] != NULL)
1011 break; 936 break;
1012 do 937 do
1013 { 938 {
1014 --match_last; 939 --match_last;
1015 if (! REG_VALID_INDEX (match_last)) 940 if (match_last < 0)
1016 { 941 {
1017 ret = REG_NOMATCH; 942 ret = REG_NOMATCH;
1018 goto free_return; 943 goto free_return;
@@ -1027,7 +952,7 @@ prune_impossible_nodes (re_match_context_t *mctx)
1027 match_last + 1); 952 match_last + 1);
1028 re_free (lim_states); 953 re_free (lim_states);
1029 lim_states = NULL; 954 lim_states = NULL;
1030 if (BE (ret != REG_NOERROR, 0)) 955 if (__glibc_unlikely (ret != REG_NOERROR))
1031 goto free_return; 956 goto free_return;
1032 } 957 }
1033 else 958 else
@@ -1035,7 +960,7 @@ prune_impossible_nodes (re_match_context_t *mctx)
1035 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last); 960 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1036 ret = sift_states_backward (mctx, &sctx); 961 ret = sift_states_backward (mctx, &sctx);
1037 re_node_set_free (&sctx.limits); 962 re_node_set_free (&sctx.limits);
1038 if (BE (ret != REG_NOERROR, 0)) 963 if (__glibc_unlikely (ret != REG_NOERROR))
1039 goto free_return; 964 goto free_return;
1040 if (sifted_states[0] == NULL) 965 if (sifted_states[0] == NULL)
1041 { 966 {
@@ -1059,8 +984,7 @@ prune_impossible_nodes (re_match_context_t *mctx)
1059 We must select appropriate initial state depending on the context, 984 We must select appropriate initial state depending on the context,
1060 since initial states may have constraints like "\<", "^", etc.. */ 985 since initial states may have constraints like "\<", "^", etc.. */
1061 986
1062static inline re_dfastate_t * 987static __always_inline re_dfastate_t *
1063__attribute__ ((always_inline)) internal_function
1064acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, 988acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1065 Idx idx) 989 Idx idx)
1066{ 990{
@@ -1093,8 +1017,8 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1093} 1017}
1094 1018
1095/* Check whether the regular expression match input string INPUT or not, 1019/* Check whether the regular expression match input string INPUT or not,
1096 and return the index where the matching end. Return REG_MISSING if 1020 and return the index where the matching end. Return -1 if
1097 there is no match, and return REG_ERROR in case of an error. 1021 there is no match, and return -2 in case of an error.
1098 FL_LONGEST_MATCH means we want the POSIX longest matching. 1022 FL_LONGEST_MATCH means we want the POSIX longest matching.
1099 If P_MATCH_FIRST is not NULL, and the match fails, it is set to the 1023 If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1100 next place where we may want to try matching. 1024 next place where we may want to try matching.
@@ -1102,14 +1026,14 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1102 index of the buffer. */ 1026 index of the buffer. */
1103 1027
1104static Idx 1028static Idx
1105internal_function __attribute_warn_unused_result__ 1029__attribute_warn_unused_result__
1106check_matching (re_match_context_t *mctx, bool fl_longest_match, 1030check_matching (re_match_context_t *mctx, bool fl_longest_match,
1107 Idx *p_match_first) 1031 Idx *p_match_first)
1108{ 1032{
1109 const re_dfa_t *const dfa = mctx->dfa; 1033 const re_dfa_t *const dfa = mctx->dfa;
1110 reg_errcode_t err; 1034 reg_errcode_t err;
1111 Idx match = 0; 1035 Idx match = 0;
1112 Idx match_last = REG_MISSING; 1036 Idx match_last = -1;
1113 Idx cur_str_idx = re_string_cur_idx (&mctx->input); 1037 Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1114 re_dfastate_t *cur_state; 1038 re_dfastate_t *cur_state;
1115 bool at_init_state = p_match_first != NULL; 1039 bool at_init_state = p_match_first != NULL;
@@ -1118,10 +1042,10 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
1118 err = REG_NOERROR; 1042 err = REG_NOERROR;
1119 cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); 1043 cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1120 /* An initial state must not be NULL (invalid). */ 1044 /* An initial state must not be NULL (invalid). */
1121 if (BE (cur_state == NULL, 0)) 1045 if (__glibc_unlikely (cur_state == NULL))
1122 { 1046 {
1123 assert (err == REG_ESPACE); 1047 DEBUG_ASSERT (err == REG_ESPACE);
1124 return REG_ERROR; 1048 return -2;
1125 } 1049 }
1126 1050
1127 if (mctx->state_log != NULL) 1051 if (mctx->state_log != NULL)
@@ -1130,24 +1054,24 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
1130 1054
1131 /* Check OP_OPEN_SUBEXP in the initial state in case that we use them 1055 /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1132 later. E.g. Processing back references. */ 1056 later. E.g. Processing back references. */
1133 if (BE (dfa->nbackref, 0)) 1057 if (__glibc_unlikely (dfa->nbackref))
1134 { 1058 {
1135 at_init_state = false; 1059 at_init_state = false;
1136 err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); 1060 err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1137 if (BE (err != REG_NOERROR, 0)) 1061 if (__glibc_unlikely (err != REG_NOERROR))
1138 return err; 1062 return err;
1139 1063
1140 if (cur_state->has_backref) 1064 if (cur_state->has_backref)
1141 { 1065 {
1142 err = transit_state_bkref (mctx, &cur_state->nodes); 1066 err = transit_state_bkref (mctx, &cur_state->nodes);
1143 if (BE (err != REG_NOERROR, 0)) 1067 if (__glibc_unlikely (err != REG_NOERROR))
1144 return err; 1068 return err;
1145 } 1069 }
1146 } 1070 }
1147 } 1071 }
1148 1072
1149 /* If the RE accepts NULL string. */ 1073 /* If the RE accepts NULL string. */
1150 if (BE (cur_state->halt, 0)) 1074 if (__glibc_unlikely (cur_state->halt))
1151 { 1075 {
1152 if (!cur_state->has_constraint 1076 if (!cur_state->has_constraint
1153 || check_halt_state_context (mctx, cur_state, cur_str_idx)) 1077 || check_halt_state_context (mctx, cur_state, cur_str_idx))
@@ -1167,16 +1091,16 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
1167 re_dfastate_t *old_state = cur_state; 1091 re_dfastate_t *old_state = cur_state;
1168 Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1; 1092 Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1169 1093
1170 if ((BE (next_char_idx >= mctx->input.bufs_len, 0) 1094 if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
1171 && mctx->input.bufs_len < mctx->input.len) 1095 && mctx->input.bufs_len < mctx->input.len)
1172 || (BE (next_char_idx >= mctx->input.valid_len, 0) 1096 || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
1173 && mctx->input.valid_len < mctx->input.len)) 1097 && mctx->input.valid_len < mctx->input.len))
1174 { 1098 {
1175 err = extend_buffers (mctx, next_char_idx + 1); 1099 err = extend_buffers (mctx, next_char_idx + 1);
1176 if (BE (err != REG_NOERROR, 0)) 1100 if (__glibc_unlikely (err != REG_NOERROR))
1177 { 1101 {
1178 assert (err == REG_ESPACE); 1102 DEBUG_ASSERT (err == REG_ESPACE);
1179 return REG_ERROR; 1103 return -2;
1180 } 1104 }
1181 } 1105 }
1182 1106
@@ -1189,8 +1113,8 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
1189 /* Reached the invalid state or an error. Try to recover a valid 1113 /* Reached the invalid state or an error. Try to recover a valid
1190 state using the state log, if available and if we have not 1114 state using the state log, if available and if we have not
1191 already found a valid (even if not the longest) match. */ 1115 already found a valid (even if not the longest) match. */
1192 if (BE (err != REG_NOERROR, 0)) 1116 if (__glibc_unlikely (err != REG_NOERROR))
1193 return REG_ERROR; 1117 return -2;
1194 1118
1195 if (mctx->state_log == NULL 1119 if (mctx->state_log == NULL
1196 || (match && !fl_longest_match) 1120 || (match && !fl_longest_match)
@@ -1198,7 +1122,7 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
1198 break; 1122 break;
1199 } 1123 }
1200 1124
1201 if (BE (at_init_state, 0)) 1125 if (__glibc_unlikely (at_init_state))
1202 { 1126 {
1203 if (old_state == cur_state) 1127 if (old_state == cur_state)
1204 next_start_idx = next_char_idx; 1128 next_start_idx = next_char_idx;
@@ -1235,7 +1159,6 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
1235/* Check NODE match the current context. */ 1159/* Check NODE match the current context. */
1236 1160
1237static bool 1161static bool
1238internal_function
1239check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context) 1162check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1240{ 1163{
1241 re_token_type_t type = dfa->nodes[node].type; 1164 re_token_type_t type = dfa->nodes[node].type;
@@ -1254,15 +1177,12 @@ check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1254 match the context, return the node. */ 1177 match the context, return the node. */
1255 1178
1256static Idx 1179static Idx
1257internal_function
1258check_halt_state_context (const re_match_context_t *mctx, 1180check_halt_state_context (const re_match_context_t *mctx,
1259 const re_dfastate_t *state, Idx idx) 1181 const re_dfastate_t *state, Idx idx)
1260{ 1182{
1261 Idx i; 1183 Idx i;
1262 unsigned int context; 1184 unsigned int context;
1263#ifdef DEBUG 1185 DEBUG_ASSERT (state->halt);
1264 assert (state->halt);
1265#endif
1266 context = re_string_context_at (&mctx->input, idx, mctx->eflags); 1186 context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1267 for (i = 0; i < state->nodes.nelem; ++i) 1187 for (i = 0; i < state->nodes.nelem; ++i)
1268 if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context)) 1188 if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
@@ -1273,33 +1193,35 @@ check_halt_state_context (const re_match_context_t *mctx,
1273/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA 1193/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1274 corresponding to the DFA). 1194 corresponding to the DFA).
1275 Return the destination node, and update EPS_VIA_NODES; 1195 Return the destination node, and update EPS_VIA_NODES;
1276 return REG_MISSING in case of errors. */ 1196 return -1 on match failure, -2 on error. */
1277 1197
1278static Idx 1198static Idx
1279internal_function
1280proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, 1199proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1200 regmatch_t *prevregs,
1281 Idx *pidx, Idx node, re_node_set *eps_via_nodes, 1201 Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1282 struct re_fail_stack_t *fs) 1202 struct re_fail_stack_t *fs)
1283{ 1203{
1284 const re_dfa_t *const dfa = mctx->dfa; 1204 const re_dfa_t *const dfa = mctx->dfa;
1285 Idx i;
1286 bool ok;
1287 if (IS_EPSILON_NODE (dfa->nodes[node].type)) 1205 if (IS_EPSILON_NODE (dfa->nodes[node].type))
1288 { 1206 {
1289 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; 1207 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1290 re_node_set *edests = &dfa->edests[node]; 1208 re_node_set *edests = &dfa->edests[node];
1291 Idx dest_node; 1209
1292 ok = re_node_set_insert (eps_via_nodes, node); 1210 if (! re_node_set_contains (eps_via_nodes, node))
1293 if (BE (! ok, 0)) 1211 {
1294 return REG_ERROR; 1212 bool ok = re_node_set_insert (eps_via_nodes, node);
1295 /* Pick up a valid destination, or return REG_MISSING if none 1213 if (__glibc_unlikely (! ok))
1296 is found. */ 1214 return -2;
1297 for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i) 1215 }
1216
1217 /* Pick a valid destination, or return -1 if none is found. */
1218 Idx dest_node = -1;
1219 for (Idx i = 0; i < edests->nelem; i++)
1298 { 1220 {
1299 Idx candidate = edests->elems[i]; 1221 Idx candidate = edests->elems[i];
1300 if (!re_node_set_contains (cur_nodes, candidate)) 1222 if (!re_node_set_contains (cur_nodes, candidate))
1301 continue; 1223 continue;
1302 if (dest_node == REG_MISSING) 1224 if (dest_node == -1)
1303 dest_node = candidate; 1225 dest_node = candidate;
1304 1226
1305 else 1227 else
@@ -1312,8 +1234,8 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1312 /* Otherwise, push the second epsilon-transition on the fail stack. */ 1234 /* Otherwise, push the second epsilon-transition on the fail stack. */
1313 else if (fs != NULL 1235 else if (fs != NULL
1314 && push_fail_stack (fs, *pidx, candidate, nregs, regs, 1236 && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1315 eps_via_nodes)) 1237 prevregs, eps_via_nodes))
1316 return REG_ERROR; 1238 return -2;
1317 1239
1318 /* We know we are going to exit. */ 1240 /* We know we are going to exit. */
1319 break; 1241 break;
@@ -1326,34 +1248,36 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1326 Idx naccepted = 0; 1248 Idx naccepted = 0;
1327 re_token_type_t type = dfa->nodes[node].type; 1249 re_token_type_t type = dfa->nodes[node].type;
1328 1250
1329#ifdef RE_ENABLE_I18N
1330 if (dfa->nodes[node].accept_mb) 1251 if (dfa->nodes[node].accept_mb)
1331 naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); 1252 naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1332 else 1253 else if (type == OP_BACK_REF)
1333#endif /* RE_ENABLE_I18N */
1334 if (type == OP_BACK_REF)
1335 { 1254 {
1336 Idx subexp_idx = dfa->nodes[node].opr.idx + 1; 1255 Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1337 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; 1256 if (subexp_idx < nregs)
1257 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1338 if (fs != NULL) 1258 if (fs != NULL)
1339 { 1259 {
1340 if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) 1260 if (subexp_idx >= nregs
1341 return REG_MISSING; 1261 || regs[subexp_idx].rm_so == -1
1262 || regs[subexp_idx].rm_eo == -1)
1263 return -1;
1342 else if (naccepted) 1264 else if (naccepted)
1343 { 1265 {
1344 char *buf = (char *) re_string_get_buffer (&mctx->input); 1266 char *buf = (char *) re_string_get_buffer (&mctx->input);
1345 if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, 1267 if (mctx->input.valid_len - *pidx < naccepted
1346 naccepted) != 0) 1268 || (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1347 return REG_MISSING; 1269 naccepted)
1270 != 0))
1271 return -1;
1348 } 1272 }
1349 } 1273 }
1350 1274
1351 if (naccepted == 0) 1275 if (naccepted == 0)
1352 { 1276 {
1353 Idx dest_node; 1277 Idx dest_node;
1354 ok = re_node_set_insert (eps_via_nodes, node); 1278 bool ok = re_node_set_insert (eps_via_nodes, node);
1355 if (BE (! ok, 0)) 1279 if (__glibc_unlikely (! ok))
1356 return REG_ERROR; 1280 return -2;
1357 dest_node = dfa->edests[node].elems[0]; 1281 dest_node = dfa->edests[node].elems[0];
1358 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, 1282 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1359 dest_node)) 1283 dest_node))
@@ -1369,26 +1293,27 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1369 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL 1293 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1370 || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, 1294 || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1371 dest_node))) 1295 dest_node)))
1372 return REG_MISSING; 1296 return -1;
1373 re_node_set_empty (eps_via_nodes); 1297 re_node_set_empty (eps_via_nodes);
1374 return dest_node; 1298 return dest_node;
1375 } 1299 }
1376 } 1300 }
1377 return REG_MISSING; 1301 return -1;
1378} 1302}
1379 1303
1380static reg_errcode_t 1304static reg_errcode_t
1381internal_function __attribute_warn_unused_result__ 1305__attribute_warn_unused_result__
1382push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, 1306push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1383 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) 1307 Idx nregs, regmatch_t *regs, regmatch_t *prevregs,
1308 re_node_set *eps_via_nodes)
1384{ 1309{
1385 reg_errcode_t err; 1310 reg_errcode_t err;
1386 Idx num = fs->num++; 1311 Idx num = fs->num;
1387 if (fs->num == fs->alloc) 1312 if (num == fs->alloc)
1388 { 1313 {
1389 struct re_fail_stack_ent_t *new_array; 1314 struct re_fail_stack_ent_t *new_array;
1390 new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) 1315 new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
1391 * fs->alloc * 2)); 1316 fs->alloc * 2);
1392 if (new_array == NULL) 1317 if (new_array == NULL)
1393 return REG_ESPACE; 1318 return REG_ESPACE;
1394 fs->alloc *= 2; 1319 fs->alloc *= 2;
@@ -1396,36 +1321,47 @@ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1396 } 1321 }
1397 fs->stack[num].idx = str_idx; 1322 fs->stack[num].idx = str_idx;
1398 fs->stack[num].node = dest_node; 1323 fs->stack[num].node = dest_node;
1399 fs->stack[num].regs = re_malloc (regmatch_t, nregs); 1324 fs->stack[num].regs = re_malloc (regmatch_t, 2 * nregs);
1400 if (fs->stack[num].regs == NULL) 1325 if (fs->stack[num].regs == NULL)
1401 return REG_ESPACE; 1326 return REG_ESPACE;
1327 fs->num = num + 1;
1402 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); 1328 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1329 memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs);
1403 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); 1330 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1404 return err; 1331 return err;
1405} 1332}
1406 1333
1407static Idx 1334static Idx
1408internal_function
1409pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs, 1335pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1410 regmatch_t *regs, re_node_set *eps_via_nodes) 1336 regmatch_t *regs, regmatch_t *prevregs,
1337 re_node_set *eps_via_nodes)
1411{ 1338{
1339 if (fs == NULL || fs->num == 0)
1340 return -1;
1412 Idx num = --fs->num; 1341 Idx num = --fs->num;
1413 assert (REG_VALID_INDEX (num));
1414 *pidx = fs->stack[num].idx; 1342 *pidx = fs->stack[num].idx;
1415 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); 1343 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1344 memcpy (prevregs, fs->stack[num].regs + nregs, sizeof (regmatch_t) * nregs);
1416 re_node_set_free (eps_via_nodes); 1345 re_node_set_free (eps_via_nodes);
1417 re_free (fs->stack[num].regs); 1346 re_free (fs->stack[num].regs);
1418 *eps_via_nodes = fs->stack[num].eps_via_nodes; 1347 *eps_via_nodes = fs->stack[num].eps_via_nodes;
1348 DEBUG_ASSERT (0 <= fs->stack[num].node);
1419 return fs->stack[num].node; 1349 return fs->stack[num].node;
1420} 1350}
1421 1351
1352
1353#define DYNARRAY_STRUCT regmatch_list
1354#define DYNARRAY_ELEMENT regmatch_t
1355#define DYNARRAY_PREFIX regmatch_list_
1356#include <malloc/dynarray-skeleton.c>
1357
1422/* Set the positions where the subexpressions are starts/ends to registers 1358/* Set the positions where the subexpressions are starts/ends to registers
1423 PMATCH. 1359 PMATCH.
1424 Note: We assume that pmatch[0] is already set, and 1360 Note: We assume that pmatch[0] is already set, and
1425 pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ 1361 pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
1426 1362
1427static reg_errcode_t 1363static reg_errcode_t
1428internal_function __attribute_warn_unused_result__ 1364__attribute_warn_unused_result__
1429set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, 1365set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1430 regmatch_t *pmatch, bool fl_backtrack) 1366 regmatch_t *pmatch, bool fl_backtrack)
1431{ 1367{
@@ -1434,13 +1370,11 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1434 re_node_set eps_via_nodes; 1370 re_node_set eps_via_nodes;
1435 struct re_fail_stack_t *fs; 1371 struct re_fail_stack_t *fs;
1436 struct re_fail_stack_t fs_body = { 0, 2, NULL }; 1372 struct re_fail_stack_t fs_body = { 0, 2, NULL };
1437 regmatch_t *prev_idx_match; 1373 struct regmatch_list prev_match;
1438 bool prev_idx_match_malloced = false; 1374 regmatch_list_init (&prev_match);
1439 1375
1440#ifdef DEBUG 1376 DEBUG_ASSERT (nmatch > 1);
1441 assert (nmatch > 1); 1377 DEBUG_ASSERT (mctx->state_log != NULL);
1442 assert (mctx->state_log != NULL);
1443#endif
1444 if (fl_backtrack) 1378 if (fl_backtrack)
1445 { 1379 {
1446 fs = &fs_body; 1380 fs = &fs_body;
@@ -1454,85 +1388,73 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1454 cur_node = dfa->init_node; 1388 cur_node = dfa->init_node;
1455 re_node_set_init_empty (&eps_via_nodes); 1389 re_node_set_init_empty (&eps_via_nodes);
1456 1390
1457 if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) 1391 if (!regmatch_list_resize (&prev_match, nmatch))
1458 prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1459 else
1460 { 1392 {
1461 prev_idx_match = re_malloc (regmatch_t, nmatch); 1393 regmatch_list_free (&prev_match);
1462 if (prev_idx_match == NULL) 1394 free_fail_stack_return (fs);
1463 { 1395 return REG_ESPACE;
1464 free_fail_stack_return (fs);
1465 return REG_ESPACE;
1466 }
1467 prev_idx_match_malloced = true;
1468 } 1396 }
1397 regmatch_t *prev_idx_match = regmatch_list_begin (&prev_match);
1469 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); 1398 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1470 1399
1471 for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) 1400 for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1472 { 1401 {
1473 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); 1402 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1474 1403
1475 if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) 1404 if ((idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1405 || (fs && re_node_set_contains (&eps_via_nodes, cur_node)))
1476 { 1406 {
1477 Idx reg_idx; 1407 Idx reg_idx;
1408 cur_node = -1;
1478 if (fs) 1409 if (fs)
1479 { 1410 {
1480 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) 1411 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1481 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) 1412 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1482 break; 1413 {
1483 if (reg_idx == nmatch) 1414 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1484 { 1415 prev_idx_match, &eps_via_nodes);
1485 re_node_set_free (&eps_via_nodes); 1416 break;
1486 if (prev_idx_match_malloced) 1417 }
1487 re_free (prev_idx_match);
1488 return free_fail_stack_return (fs);
1489 }
1490 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1491 &eps_via_nodes);
1492 } 1418 }
1493 else 1419 if (cur_node < 0)
1494 { 1420 {
1495 re_node_set_free (&eps_via_nodes); 1421 re_node_set_free (&eps_via_nodes);
1496 if (prev_idx_match_malloced) 1422 regmatch_list_free (&prev_match);
1497 re_free (prev_idx_match); 1423 return free_fail_stack_return (fs);
1498 return REG_NOERROR;
1499 } 1424 }
1500 } 1425 }
1501 1426
1502 /* Proceed to next node. */ 1427 /* Proceed to next node. */
1503 cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, 1428 cur_node = proceed_next_node (mctx, nmatch, pmatch, prev_idx_match,
1429 &idx, cur_node,
1504 &eps_via_nodes, fs); 1430 &eps_via_nodes, fs);
1505 1431
1506 if (BE (! REG_VALID_INDEX (cur_node), 0)) 1432 if (__glibc_unlikely (cur_node < 0))
1507 { 1433 {
1508 if (BE (cur_node == REG_ERROR, 0)) 1434 if (__glibc_unlikely (cur_node == -2))
1509 { 1435 {
1510 re_node_set_free (&eps_via_nodes); 1436 re_node_set_free (&eps_via_nodes);
1511 if (prev_idx_match_malloced) 1437 regmatch_list_free (&prev_match);
1512 re_free (prev_idx_match);
1513 free_fail_stack_return (fs); 1438 free_fail_stack_return (fs);
1514 return REG_ESPACE; 1439 return REG_ESPACE;
1515 } 1440 }
1516 if (fs) 1441 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1517 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, 1442 prev_idx_match, &eps_via_nodes);
1518 &eps_via_nodes); 1443 if (cur_node < 0)
1519 else
1520 { 1444 {
1521 re_node_set_free (&eps_via_nodes); 1445 re_node_set_free (&eps_via_nodes);
1522 if (prev_idx_match_malloced) 1446 regmatch_list_free (&prev_match);
1523 re_free (prev_idx_match); 1447 free_fail_stack_return (fs);
1524 return REG_NOMATCH; 1448 return REG_NOMATCH;
1525 } 1449 }
1526 } 1450 }
1527 } 1451 }
1528 re_node_set_free (&eps_via_nodes); 1452 re_node_set_free (&eps_via_nodes);
1529 if (prev_idx_match_malloced) 1453 regmatch_list_free (&prev_match);
1530 re_free (prev_idx_match);
1531 return free_fail_stack_return (fs); 1454 return free_fail_stack_return (fs);
1532} 1455}
1533 1456
1534static reg_errcode_t 1457static reg_errcode_t
1535internal_function
1536free_fail_stack_return (struct re_fail_stack_t *fs) 1458free_fail_stack_return (struct re_fail_stack_t *fs)
1537{ 1459{
1538 if (fs) 1460 if (fs)
@@ -1549,7 +1471,6 @@ free_fail_stack_return (struct re_fail_stack_t *fs)
1549} 1471}
1550 1472
1551static void 1473static void
1552internal_function
1553update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, 1474update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1554 regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch) 1475 regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1555{ 1476{
@@ -1567,10 +1488,10 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1567 } 1488 }
1568 else if (type == OP_CLOSE_SUBEXP) 1489 else if (type == OP_CLOSE_SUBEXP)
1569 { 1490 {
1491 /* We are at the last node of this sub expression. */
1570 Idx reg_num = dfa->nodes[cur_node].opr.idx + 1; 1492 Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1571 if (reg_num < nmatch) 1493 if (reg_num < nmatch)
1572 { 1494 {
1573 /* We are at the last node of this sub expression. */
1574 if (pmatch[reg_num].rm_so < cur_idx) 1495 if (pmatch[reg_num].rm_so < cur_idx)
1575 { 1496 {
1576 pmatch[reg_num].rm_eo = cur_idx; 1497 pmatch[reg_num].rm_eo = cur_idx;
@@ -1621,7 +1542,6 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1621 ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) 1542 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1622 1543
1623static reg_errcode_t 1544static reg_errcode_t
1624internal_function
1625sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) 1545sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1626{ 1546{
1627 reg_errcode_t err; 1547 reg_errcode_t err;
@@ -1629,17 +1549,15 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1629 Idx str_idx = sctx->last_str_idx; 1549 Idx str_idx = sctx->last_str_idx;
1630 re_node_set cur_dest; 1550 re_node_set cur_dest;
1631 1551
1632#ifdef DEBUG 1552 DEBUG_ASSERT (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1633 assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1634#endif
1635 1553
1636 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon 1554 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1637 transit to the last_node and the last_node itself. */ 1555 transit to the last_node and the last_node itself. */
1638 err = re_node_set_init_1 (&cur_dest, sctx->last_node); 1556 err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1639 if (BE (err != REG_NOERROR, 0)) 1557 if (__glibc_unlikely (err != REG_NOERROR))
1640 return err; 1558 return err;
1641 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); 1559 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1642 if (BE (err != REG_NOERROR, 0)) 1560 if (__glibc_unlikely (err != REG_NOERROR))
1643 goto free_return; 1561 goto free_return;
1644 1562
1645 /* Then check each states in the state_log. */ 1563 /* Then check each states in the state_log. */
@@ -1660,7 +1578,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1660 if (mctx->state_log[str_idx]) 1578 if (mctx->state_log[str_idx])
1661 { 1579 {
1662 err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); 1580 err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1663 if (BE (err != REG_NOERROR, 0)) 1581 if (__glibc_unlikely (err != REG_NOERROR))
1664 goto free_return; 1582 goto free_return;
1665 } 1583 }
1666 1584
@@ -1669,7 +1587,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1669 - It is in CUR_SRC. 1587 - It is in CUR_SRC.
1670 And update state_log. */ 1588 And update state_log. */
1671 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); 1589 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1672 if (BE (err != REG_NOERROR, 0)) 1590 if (__glibc_unlikely (err != REG_NOERROR))
1673 goto free_return; 1591 goto free_return;
1674 } 1592 }
1675 err = REG_NOERROR; 1593 err = REG_NOERROR;
@@ -1679,7 +1597,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1679} 1597}
1680 1598
1681static reg_errcode_t 1599static reg_errcode_t
1682internal_function __attribute_warn_unused_result__ 1600__attribute_warn_unused_result__
1683build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, 1601build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1684 Idx str_idx, re_node_set *cur_dest) 1602 Idx str_idx, re_node_set *cur_dest)
1685{ 1603{
@@ -1699,17 +1617,12 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1699 Idx prev_node = cur_src->elems[i]; 1617 Idx prev_node = cur_src->elems[i];
1700 int naccepted = 0; 1618 int naccepted = 0;
1701 bool ok; 1619 bool ok;
1620 DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type));
1702 1621
1703#ifdef DEBUG
1704 re_token_type_t type = dfa->nodes[prev_node].type;
1705 assert (!IS_EPSILON_NODE (type));
1706#endif
1707#ifdef RE_ENABLE_I18N
1708 /* If the node may accept "multi byte". */ 1622 /* If the node may accept "multi byte". */
1709 if (dfa->nodes[prev_node].accept_mb) 1623 if (dfa->nodes[prev_node].accept_mb)
1710 naccepted = sift_states_iter_mb (mctx, sctx, prev_node, 1624 naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1711 str_idx, sctx->last_str_idx); 1625 str_idx, sctx->last_str_idx);
1712#endif /* RE_ENABLE_I18N */
1713 1626
1714 /* We don't check backreferences here. 1627 /* We don't check backreferences here.
1715 See update_cur_sifted_state(). */ 1628 See update_cur_sifted_state(). */
@@ -1731,7 +1644,7 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1731 continue; 1644 continue;
1732 } 1645 }
1733 ok = re_node_set_insert (cur_dest, prev_node); 1646 ok = re_node_set_insert (cur_dest, prev_node);
1734 if (BE (! ok, 0)) 1647 if (__glibc_unlikely (! ok))
1735 return REG_ESPACE; 1648 return REG_ESPACE;
1736 } 1649 }
1737 1650
@@ -1741,7 +1654,6 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1741/* Helper functions. */ 1654/* Helper functions. */
1742 1655
1743static reg_errcode_t 1656static reg_errcode_t
1744internal_function
1745clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) 1657clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1746{ 1658{
1747 Idx top = mctx->state_log_top; 1659 Idx top = mctx->state_log_top;
@@ -1753,12 +1665,13 @@ clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1753 { 1665 {
1754 reg_errcode_t err; 1666 reg_errcode_t err;
1755 err = extend_buffers (mctx, next_state_log_idx + 1); 1667 err = extend_buffers (mctx, next_state_log_idx + 1);
1756 if (BE (err != REG_NOERROR, 0)) 1668 if (__glibc_unlikely (err != REG_NOERROR))
1757 return err; 1669 return err;
1758 } 1670 }
1759 1671
1760 if (top < next_state_log_idx) 1672 if (top < next_state_log_idx)
1761 { 1673 {
1674 DEBUG_ASSERT (mctx->state_log != NULL);
1762 memset (mctx->state_log + top + 1, '\0', 1675 memset (mctx->state_log + top + 1, '\0',
1763 sizeof (re_dfastate_t *) * (next_state_log_idx - top)); 1676 sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1764 mctx->state_log_top = next_state_log_idx; 1677 mctx->state_log_top = next_state_log_idx;
@@ -1767,7 +1680,6 @@ clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1767} 1680}
1768 1681
1769static reg_errcode_t 1682static reg_errcode_t
1770internal_function
1771merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, 1683merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1772 re_dfastate_t **src, Idx num) 1684 re_dfastate_t **src, Idx num)
1773{ 1685{
@@ -1782,11 +1694,11 @@ merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1782 re_node_set merged_set; 1694 re_node_set merged_set;
1783 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, 1695 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1784 &src[st_idx]->nodes); 1696 &src[st_idx]->nodes);
1785 if (BE (err != REG_NOERROR, 0)) 1697 if (__glibc_unlikely (err != REG_NOERROR))
1786 return err; 1698 return err;
1787 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); 1699 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1788 re_node_set_free (&merged_set); 1700 re_node_set_free (&merged_set);
1789 if (BE (err != REG_NOERROR, 0)) 1701 if (__glibc_unlikely (err != REG_NOERROR))
1790 return err; 1702 return err;
1791 } 1703 }
1792 } 1704 }
@@ -1794,7 +1706,6 @@ merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1794} 1706}
1795 1707
1796static reg_errcode_t 1708static reg_errcode_t
1797internal_function
1798update_cur_sifted_state (const re_match_context_t *mctx, 1709update_cur_sifted_state (const re_match_context_t *mctx,
1799 re_sift_context_t *sctx, Idx str_idx, 1710 re_sift_context_t *sctx, Idx str_idx,
1800 re_node_set *dest_nodes) 1711 re_node_set *dest_nodes)
@@ -1814,7 +1725,7 @@ update_cur_sifted_state (const re_match_context_t *mctx,
1814 /* At first, add the nodes which can epsilon transit to a node in 1725 /* At first, add the nodes which can epsilon transit to a node in
1815 DEST_NODE. */ 1726 DEST_NODE. */
1816 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); 1727 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1817 if (BE (err != REG_NOERROR, 0)) 1728 if (__glibc_unlikely (err != REG_NOERROR))
1818 return err; 1729 return err;
1819 1730
1820 /* Then, check the limitations in the current sift_context. */ 1731 /* Then, check the limitations in the current sift_context. */
@@ -1822,27 +1733,27 @@ update_cur_sifted_state (const re_match_context_t *mctx,
1822 { 1733 {
1823 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, 1734 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1824 mctx->bkref_ents, str_idx); 1735 mctx->bkref_ents, str_idx);
1825 if (BE (err != REG_NOERROR, 0)) 1736 if (__glibc_unlikely (err != REG_NOERROR))
1826 return err; 1737 return err;
1827 } 1738 }
1828 } 1739 }
1829 1740
1830 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); 1741 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1831 if (BE (err != REG_NOERROR, 0)) 1742 if (__glibc_unlikely (err != REG_NOERROR))
1832 return err; 1743 return err;
1833 } 1744 }
1834 1745
1835 if (candidates && mctx->state_log[str_idx]->has_backref) 1746 if (candidates && mctx->state_log[str_idx]->has_backref)
1836 { 1747 {
1837 err = sift_states_bkref (mctx, sctx, str_idx, candidates); 1748 err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1838 if (BE (err != REG_NOERROR, 0)) 1749 if (__glibc_unlikely (err != REG_NOERROR))
1839 return err; 1750 return err;
1840 } 1751 }
1841 return REG_NOERROR; 1752 return REG_NOERROR;
1842} 1753}
1843 1754
1844static reg_errcode_t 1755static reg_errcode_t
1845internal_function __attribute_warn_unused_result__ 1756__attribute_warn_unused_result__
1846add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, 1757add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1847 const re_node_set *candidates) 1758 const re_node_set *candidates)
1848{ 1759{
@@ -1850,19 +1761,19 @@ add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1850 Idx i; 1761 Idx i;
1851 1762
1852 re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); 1763 re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1853 if (BE (err != REG_NOERROR, 0)) 1764 if (__glibc_unlikely (err != REG_NOERROR))
1854 return err; 1765 return err;
1855 1766
1856 if (!state->inveclosure.alloc) 1767 if (!state->inveclosure.alloc)
1857 { 1768 {
1858 err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); 1769 err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1859 if (BE (err != REG_NOERROR, 0)) 1770 if (__glibc_unlikely (err != REG_NOERROR))
1860 return REG_ESPACE; 1771 return REG_ESPACE;
1861 for (i = 0; i < dest_nodes->nelem; i++) 1772 for (i = 0; i < dest_nodes->nelem; i++)
1862 { 1773 {
1863 err = re_node_set_merge (&state->inveclosure, 1774 err = re_node_set_merge (&state->inveclosure,
1864 dfa->inveclosures + dest_nodes->elems[i]); 1775 dfa->inveclosures + dest_nodes->elems[i]);
1865 if (BE (err != REG_NOERROR, 0)) 1776 if (__glibc_unlikely (err != REG_NOERROR))
1866 return REG_ESPACE; 1777 return REG_ESPACE;
1867 } 1778 }
1868 } 1779 }
@@ -1871,7 +1782,6 @@ add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1871} 1782}
1872 1783
1873static reg_errcode_t 1784static reg_errcode_t
1874internal_function
1875sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes, 1785sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1876 const re_node_set *candidates) 1786 const re_node_set *candidates)
1877{ 1787{
@@ -1889,16 +1799,16 @@ sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1889 { 1799 {
1890 Idx edst1 = dfa->edests[cur_node].elems[0]; 1800 Idx edst1 = dfa->edests[cur_node].elems[0];
1891 Idx edst2 = ((dfa->edests[cur_node].nelem > 1) 1801 Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1892 ? dfa->edests[cur_node].elems[1] : REG_MISSING); 1802 ? dfa->edests[cur_node].elems[1] : -1);
1893 if ((!re_node_set_contains (inv_eclosure, edst1) 1803 if ((!re_node_set_contains (inv_eclosure, edst1)
1894 && re_node_set_contains (dest_nodes, edst1)) 1804 && re_node_set_contains (dest_nodes, edst1))
1895 || (REG_VALID_NONZERO_INDEX (edst2) 1805 || (edst2 > 0
1896 && !re_node_set_contains (inv_eclosure, edst2) 1806 && !re_node_set_contains (inv_eclosure, edst2)
1897 && re_node_set_contains (dest_nodes, edst2))) 1807 && re_node_set_contains (dest_nodes, edst2)))
1898 { 1808 {
1899 err = re_node_set_add_intersect (&except_nodes, candidates, 1809 err = re_node_set_add_intersect (&except_nodes, candidates,
1900 dfa->inveclosures + cur_node); 1810 dfa->inveclosures + cur_node);
1901 if (BE (err != REG_NOERROR, 0)) 1811 if (__glibc_unlikely (err != REG_NOERROR))
1902 { 1812 {
1903 re_node_set_free (&except_nodes); 1813 re_node_set_free (&except_nodes);
1904 return err; 1814 return err;
@@ -1920,7 +1830,6 @@ sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1920} 1830}
1921 1831
1922static bool 1832static bool
1923internal_function
1924check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits, 1833check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1925 Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx) 1834 Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1926{ 1835{
@@ -1956,7 +1865,6 @@ check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1956} 1865}
1957 1866
1958static int 1867static int
1959internal_function
1960check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, 1868check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1961 Idx subexp_idx, Idx from_node, Idx bkref_idx) 1869 Idx subexp_idx, Idx from_node, Idx bkref_idx)
1962{ 1870{
@@ -1972,7 +1880,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1972 switch (dfa->nodes[node].type) 1880 switch (dfa->nodes[node].type)
1973 { 1881 {
1974 case OP_BACK_REF: 1882 case OP_BACK_REF:
1975 if (bkref_idx != REG_MISSING) 1883 if (bkref_idx != -1)
1976 { 1884 {
1977 struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; 1885 struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1978 do 1886 do
@@ -2038,7 +1946,6 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
2038} 1946}
2039 1947
2040static int 1948static int
2041internal_function
2042check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit, 1949check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
2043 Idx subexp_idx, Idx from_node, Idx str_idx, 1950 Idx subexp_idx, Idx from_node, Idx str_idx,
2044 Idx bkref_idx) 1951 Idx bkref_idx)
@@ -2068,7 +1975,6 @@ check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
2068 which are against limitations from DEST_NODES. */ 1975 which are against limitations from DEST_NODES. */
2069 1976
2070static reg_errcode_t 1977static reg_errcode_t
2071internal_function
2072check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, 1978check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2073 const re_node_set *candidates, re_node_set *limits, 1979 const re_node_set *candidates, re_node_set *limits,
2074 struct re_backref_cache_entry *bkref_ents, Idx str_idx) 1980 struct re_backref_cache_entry *bkref_ents, Idx str_idx)
@@ -2088,8 +1994,8 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2088 subexp_idx = dfa->nodes[ent->node].opr.idx; 1994 subexp_idx = dfa->nodes[ent->node].opr.idx;
2089 if (ent->subexp_to == str_idx) 1995 if (ent->subexp_to == str_idx)
2090 { 1996 {
2091 Idx ops_node = REG_MISSING; 1997 Idx ops_node = -1;
2092 Idx cls_node = REG_MISSING; 1998 Idx cls_node = -1;
2093 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) 1999 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2094 { 2000 {
2095 Idx node = dest_nodes->elems[node_idx]; 2001 Idx node = dest_nodes->elems[node_idx];
@@ -2104,16 +2010,16 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2104 2010
2105 /* Check the limitation of the open subexpression. */ 2011 /* Check the limitation of the open subexpression. */
2106 /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ 2012 /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
2107 if (REG_VALID_INDEX (ops_node)) 2013 if (ops_node >= 0)
2108 { 2014 {
2109 err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, 2015 err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2110 candidates); 2016 candidates);
2111 if (BE (err != REG_NOERROR, 0)) 2017 if (__glibc_unlikely (err != REG_NOERROR))
2112 return err; 2018 return err;
2113 } 2019 }
2114 2020
2115 /* Check the limitation of the close subexpression. */ 2021 /* Check the limitation of the close subexpression. */
2116 if (REG_VALID_INDEX (cls_node)) 2022 if (cls_node >= 0)
2117 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) 2023 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2118 { 2024 {
2119 Idx node = dest_nodes->elems[node_idx]; 2025 Idx node = dest_nodes->elems[node_idx];
@@ -2126,7 +2032,7 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2126 Remove it form the current sifted state. */ 2032 Remove it form the current sifted state. */
2127 err = sub_epsilon_src_nodes (dfa, node, dest_nodes, 2033 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2128 candidates); 2034 candidates);
2129 if (BE (err != REG_NOERROR, 0)) 2035 if (__glibc_unlikely (err != REG_NOERROR))
2130 return err; 2036 return err;
2131 --node_idx; 2037 --node_idx;
2132 } 2038 }
@@ -2146,7 +2052,7 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2146 Remove it form the current sifted state. */ 2052 Remove it form the current sifted state. */
2147 err = sub_epsilon_src_nodes (dfa, node, dest_nodes, 2053 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2148 candidates); 2054 candidates);
2149 if (BE (err != REG_NOERROR, 0)) 2055 if (__glibc_unlikely (err != REG_NOERROR))
2150 return err; 2056 return err;
2151 } 2057 }
2152 } 2058 }
@@ -2156,7 +2062,7 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2156} 2062}
2157 2063
2158static reg_errcode_t 2064static reg_errcode_t
2159internal_function __attribute_warn_unused_result__ 2065__attribute_warn_unused_result__
2160sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, 2066sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2161 Idx str_idx, const re_node_set *candidates) 2067 Idx str_idx, const re_node_set *candidates)
2162{ 2068{
@@ -2166,7 +2072,7 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2166 re_sift_context_t local_sctx; 2072 re_sift_context_t local_sctx;
2167 Idx first_idx = search_cur_bkref_entry (mctx, str_idx); 2073 Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2168 2074
2169 if (first_idx == REG_MISSING) 2075 if (first_idx == -1)
2170 return REG_NOERROR; 2076 return REG_NOERROR;
2171 2077
2172 local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ 2078 local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
@@ -2212,27 +2118,27 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2212 { 2118 {
2213 local_sctx = *sctx; 2119 local_sctx = *sctx;
2214 err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); 2120 err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2215 if (BE (err != REG_NOERROR, 0)) 2121 if (__glibc_unlikely (err != REG_NOERROR))
2216 goto free_return; 2122 goto free_return;
2217 } 2123 }
2218 local_sctx.last_node = node; 2124 local_sctx.last_node = node;
2219 local_sctx.last_str_idx = str_idx; 2125 local_sctx.last_str_idx = str_idx;
2220 ok = re_node_set_insert (&local_sctx.limits, enabled_idx); 2126 ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2221 if (BE (! ok, 0)) 2127 if (__glibc_unlikely (! ok))
2222 { 2128 {
2223 err = REG_ESPACE; 2129 err = REG_ESPACE;
2224 goto free_return; 2130 goto free_return;
2225 } 2131 }
2226 cur_state = local_sctx.sifted_states[str_idx]; 2132 cur_state = local_sctx.sifted_states[str_idx];
2227 err = sift_states_backward (mctx, &local_sctx); 2133 err = sift_states_backward (mctx, &local_sctx);
2228 if (BE (err != REG_NOERROR, 0)) 2134 if (__glibc_unlikely (err != REG_NOERROR))
2229 goto free_return; 2135 goto free_return;
2230 if (sctx->limited_states != NULL) 2136 if (sctx->limited_states != NULL)
2231 { 2137 {
2232 err = merge_state_array (dfa, sctx->limited_states, 2138 err = merge_state_array (dfa, sctx->limited_states,
2233 local_sctx.sifted_states, 2139 local_sctx.sifted_states,
2234 str_idx + 1); 2140 str_idx + 1);
2235 if (BE (err != REG_NOERROR, 0)) 2141 if (__glibc_unlikely (err != REG_NOERROR))
2236 goto free_return; 2142 goto free_return;
2237 } 2143 }
2238 local_sctx.sifted_states[str_idx] = cur_state; 2144 local_sctx.sifted_states[str_idx] = cur_state;
@@ -2254,9 +2160,7 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2254} 2160}
2255 2161
2256 2162
2257#ifdef RE_ENABLE_I18N
2258static int 2163static int
2259internal_function
2260sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, 2164sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2261 Idx node_idx, Idx str_idx, Idx max_str_idx) 2165 Idx node_idx, Idx str_idx, Idx max_str_idx)
2262{ 2166{
@@ -2264,44 +2168,41 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2264 int naccepted; 2168 int naccepted;
2265 /* Check the node can accept "multi byte". */ 2169 /* Check the node can accept "multi byte". */
2266 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); 2170 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2267 if (naccepted > 0 && str_idx + naccepted <= max_str_idx && 2171 if (naccepted > 0 && str_idx + naccepted <= max_str_idx
2268 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], 2172 && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2269 dfa->nexts[node_idx])) 2173 dfa->nexts[node_idx]))
2270 /* The node can't accept the "multi byte", or the 2174 /* The node can't accept the "multi byte", or the
2271 destination was already thrown away, then the node 2175 destination was already thrown away, then the node
2272 could't accept the current input "multi byte". */ 2176 couldn't accept the current input "multi byte". */
2273 naccepted = 0; 2177 naccepted = 0;
2274 /* Otherwise, it is sure that the node could accept 2178 /* Otherwise, it is sure that the node could accept
2275 'naccepted' bytes input. */ 2179 'naccepted' bytes input. */
2276 return naccepted; 2180 return naccepted;
2277} 2181}
2278#endif /* RE_ENABLE_I18N */
2279
2280 2182
2281/* Functions for state transition. */ 2183/* Functions for state transition. */
2282 2184
2283/* Return the next state to which the current state STATE will transit by 2185/* Return the next state to which the current state STATE will transit by
2284 accepting the current input byte, and update STATE_LOG if necessary. 2186 accepting the current input byte, and update STATE_LOG if necessary.
2187 Return NULL on failure.
2285 If STATE can accept a multibyte char/collating element/back reference 2188 If STATE can accept a multibyte char/collating element/back reference
2286 update the destination of STATE_LOG. */ 2189 update the destination of STATE_LOG. */
2287 2190
2288static re_dfastate_t * 2191static re_dfastate_t *
2289internal_function __attribute_warn_unused_result__ 2192__attribute_warn_unused_result__
2290transit_state (reg_errcode_t *err, re_match_context_t *mctx, 2193transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2291 re_dfastate_t *state) 2194 re_dfastate_t *state)
2292{ 2195{
2293 re_dfastate_t **trtable; 2196 re_dfastate_t **trtable;
2294 unsigned char ch; 2197 unsigned char ch;
2295 2198
2296#ifdef RE_ENABLE_I18N
2297 /* If the current state can accept multibyte. */ 2199 /* If the current state can accept multibyte. */
2298 if (BE (state->accept_mb, 0)) 2200 if (__glibc_unlikely (state->accept_mb))
2299 { 2201 {
2300 *err = transit_state_mb (mctx, state); 2202 *err = transit_state_mb (mctx, state);
2301 if (BE (*err != REG_NOERROR, 0)) 2203 if (__glibc_unlikely (*err != REG_NOERROR))
2302 return NULL; 2204 return NULL;
2303 } 2205 }
2304#endif /* RE_ENABLE_I18N */
2305 2206
2306 /* Then decide the next state with the single byte. */ 2207 /* Then decide the next state with the single byte. */
2307#if 0 2208#if 0
@@ -2315,11 +2216,11 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2315 for (;;) 2216 for (;;)
2316 { 2217 {
2317 trtable = state->trtable; 2218 trtable = state->trtable;
2318 if (BE (trtable != NULL, 1)) 2219 if (__glibc_likely (trtable != NULL))
2319 return trtable[ch]; 2220 return trtable[ch];
2320 2221
2321 trtable = state->word_trtable; 2222 trtable = state->word_trtable;
2322 if (BE (trtable != NULL, 1)) 2223 if (__glibc_likely (trtable != NULL))
2323 { 2224 {
2324 unsigned int context; 2225 unsigned int context;
2325 context 2226 context
@@ -2344,7 +2245,6 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2344 2245
2345/* Update the state_log if we need */ 2246/* Update the state_log if we need */
2346static re_dfastate_t * 2247static re_dfastate_t *
2347internal_function
2348merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, 2248merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2349 re_dfastate_t *next_state) 2249 re_dfastate_t *next_state)
2350{ 2250{
@@ -2376,7 +2276,7 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2376 table_nodes = next_state->entrance_nodes; 2276 table_nodes = next_state->entrance_nodes;
2377 *err = re_node_set_init_union (&next_nodes, table_nodes, 2277 *err = re_node_set_init_union (&next_nodes, table_nodes,
2378 log_nodes); 2278 log_nodes);
2379 if (BE (*err != REG_NOERROR, 0)) 2279 if (__glibc_unlikely (*err != REG_NOERROR))
2380 return NULL; 2280 return NULL;
2381 } 2281 }
2382 else 2282 else
@@ -2396,21 +2296,21 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2396 re_node_set_free (&next_nodes); 2296 re_node_set_free (&next_nodes);
2397 } 2297 }
2398 2298
2399 if (BE (dfa->nbackref, 0) && next_state != NULL) 2299 if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
2400 { 2300 {
2401 /* Check OP_OPEN_SUBEXP in the current state in case that we use them 2301 /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2402 later. We must check them here, since the back references in the 2302 later. We must check them here, since the back references in the
2403 next state might use them. */ 2303 next state might use them. */
2404 *err = check_subexp_matching_top (mctx, &next_state->nodes, 2304 *err = check_subexp_matching_top (mctx, &next_state->nodes,
2405 cur_idx); 2305 cur_idx);
2406 if (BE (*err != REG_NOERROR, 0)) 2306 if (__glibc_unlikely (*err != REG_NOERROR))
2407 return NULL; 2307 return NULL;
2408 2308
2409 /* If the next state has back references. */ 2309 /* If the next state has back references. */
2410 if (next_state->has_backref) 2310 if (next_state->has_backref)
2411 { 2311 {
2412 *err = transit_state_bkref (mctx, &next_state->nodes); 2312 *err = transit_state_bkref (mctx, &next_state->nodes);
2413 if (BE (*err != REG_NOERROR, 0)) 2313 if (__glibc_unlikely (*err != REG_NOERROR))
2414 return NULL; 2314 return NULL;
2415 next_state = mctx->state_log[cur_idx]; 2315 next_state = mctx->state_log[cur_idx];
2416 } 2316 }
@@ -2423,7 +2323,6 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2423 multi-byte match, then look in the log for a state 2323 multi-byte match, then look in the log for a state
2424 from which to restart matching. */ 2324 from which to restart matching. */
2425static re_dfastate_t * 2325static re_dfastate_t *
2426internal_function
2427find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) 2326find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2428{ 2327{
2429 re_dfastate_t *cur_state; 2328 re_dfastate_t *cur_state;
@@ -2454,7 +2353,6 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2454 corresponding back references. */ 2353 corresponding back references. */
2455 2354
2456static reg_errcode_t 2355static reg_errcode_t
2457internal_function
2458check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, 2356check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2459 Idx str_idx) 2357 Idx str_idx)
2460{ 2358{
@@ -2476,7 +2374,7 @@ check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2476 & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx))) 2374 & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2477 { 2375 {
2478 err = match_ctx_add_subtop (mctx, node, str_idx); 2376 err = match_ctx_add_subtop (mctx, node, str_idx);
2479 if (BE (err != REG_NOERROR, 0)) 2377 if (__glibc_unlikely (err != REG_NOERROR))
2480 return err; 2378 return err;
2481 } 2379 }
2482 } 2380 }
@@ -2485,7 +2383,7 @@ check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2485 2383
2486#if 0 2384#if 0
2487/* Return the next state to which the current state STATE will transit by 2385/* Return the next state to which the current state STATE will transit by
2488 accepting the current input byte. */ 2386 accepting the current input byte. Return NULL on failure. */
2489 2387
2490static re_dfastate_t * 2388static re_dfastate_t *
2491transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, 2389transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
@@ -2498,7 +2396,7 @@ transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2498 unsigned int context; 2396 unsigned int context;
2499 2397
2500 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); 2398 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2501 if (BE (*err != REG_NOERROR, 0)) 2399 if (__glibc_unlikely (*err != REG_NOERROR))
2502 return NULL; 2400 return NULL;
2503 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) 2401 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2504 { 2402 {
@@ -2507,7 +2405,7 @@ transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2507 { 2405 {
2508 *err = re_node_set_merge (&next_nodes, 2406 *err = re_node_set_merge (&next_nodes,
2509 dfa->eclosures + dfa->nexts[cur_node]); 2407 dfa->eclosures + dfa->nexts[cur_node]);
2510 if (BE (*err != REG_NOERROR, 0)) 2408 if (__glibc_unlikely (*err != REG_NOERROR))
2511 { 2409 {
2512 re_node_set_free (&next_nodes); 2410 re_node_set_free (&next_nodes);
2513 return NULL; 2411 return NULL;
@@ -2525,9 +2423,7 @@ transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2525} 2423}
2526#endif 2424#endif
2527 2425
2528#ifdef RE_ENABLE_I18N
2529static reg_errcode_t 2426static reg_errcode_t
2530internal_function
2531transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) 2427transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2532{ 2428{
2533 const re_dfa_t *const dfa = mctx->dfa; 2429 const re_dfa_t *const dfa = mctx->dfa;
@@ -2567,11 +2463,9 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2567 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted 2463 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2568 : mctx->max_mb_elem_len); 2464 : mctx->max_mb_elem_len);
2569 err = clean_state_log_if_needed (mctx, dest_idx); 2465 err = clean_state_log_if_needed (mctx, dest_idx);
2570 if (BE (err != REG_NOERROR, 0)) 2466 if (__glibc_unlikely (err != REG_NOERROR))
2571 return err; 2467 return err;
2572#ifdef DEBUG 2468 DEBUG_ASSERT (dfa->nexts[cur_node_idx] != -1);
2573 assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2574#endif
2575 new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; 2469 new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2576 2470
2577 dest_state = mctx->state_log[dest_idx]; 2471 dest_state = mctx->state_log[dest_idx];
@@ -2581,7 +2475,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2581 { 2475 {
2582 err = re_node_set_init_union (&dest_nodes, 2476 err = re_node_set_init_union (&dest_nodes,
2583 dest_state->entrance_nodes, new_nodes); 2477 dest_state->entrance_nodes, new_nodes);
2584 if (BE (err != REG_NOERROR, 0)) 2478 if (__glibc_unlikely (err != REG_NOERROR))
2585 return err; 2479 return err;
2586 } 2480 }
2587 context = re_string_context_at (&mctx->input, dest_idx - 1, 2481 context = re_string_context_at (&mctx->input, dest_idx - 1,
@@ -2590,15 +2484,14 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2590 = re_acquire_state_context (&err, dfa, &dest_nodes, context); 2484 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2591 if (dest_state != NULL) 2485 if (dest_state != NULL)
2592 re_node_set_free (&dest_nodes); 2486 re_node_set_free (&dest_nodes);
2593 if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) 2487 if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
2488 && err != REG_NOERROR))
2594 return err; 2489 return err;
2595 } 2490 }
2596 return REG_NOERROR; 2491 return REG_NOERROR;
2597} 2492}
2598#endif /* RE_ENABLE_I18N */
2599 2493
2600static reg_errcode_t 2494static reg_errcode_t
2601internal_function
2602transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) 2495transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2603{ 2496{
2604 const re_dfa_t *const dfa = mctx->dfa; 2497 const re_dfa_t *const dfa = mctx->dfa;
@@ -2630,14 +2523,12 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2630 Check the substring which the substring matched. */ 2523 Check the substring which the substring matched. */
2631 bkc_idx = mctx->nbkref_ents; 2524 bkc_idx = mctx->nbkref_ents;
2632 err = get_subexp (mctx, node_idx, cur_str_idx); 2525 err = get_subexp (mctx, node_idx, cur_str_idx);
2633 if (BE (err != REG_NOERROR, 0)) 2526 if (__glibc_unlikely (err != REG_NOERROR))
2634 goto free_return; 2527 goto free_return;
2635 2528
2636 /* And add the epsilon closures (which is 'new_dest_nodes') of 2529 /* And add the epsilon closures (which is 'new_dest_nodes') of
2637 the backreference to appropriate state_log. */ 2530 the backreference to appropriate state_log. */
2638#ifdef DEBUG 2531 DEBUG_ASSERT (dfa->nexts[node_idx] != -1);
2639 assert (dfa->nexts[node_idx] != REG_MISSING);
2640#endif
2641 for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) 2532 for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2642 { 2533 {
2643 Idx subexp_len; 2534 Idx subexp_len;
@@ -2663,8 +2554,8 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2663 mctx->state_log[dest_str_idx] 2554 mctx->state_log[dest_str_idx]
2664 = re_acquire_state_context (&err, dfa, new_dest_nodes, 2555 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2665 context); 2556 context);
2666 if (BE (mctx->state_log[dest_str_idx] == NULL 2557 if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2667 && err != REG_NOERROR, 0)) 2558 && err != REG_NOERROR))
2668 goto free_return; 2559 goto free_return;
2669 } 2560 }
2670 else 2561 else
@@ -2673,7 +2564,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2673 err = re_node_set_init_union (&dest_nodes, 2564 err = re_node_set_init_union (&dest_nodes,
2674 dest_state->entrance_nodes, 2565 dest_state->entrance_nodes,
2675 new_dest_nodes); 2566 new_dest_nodes);
2676 if (BE (err != REG_NOERROR, 0)) 2567 if (__glibc_unlikely (err != REG_NOERROR))
2677 { 2568 {
2678 re_node_set_free (&dest_nodes); 2569 re_node_set_free (&dest_nodes);
2679 goto free_return; 2570 goto free_return;
@@ -2681,8 +2572,8 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2681 mctx->state_log[dest_str_idx] 2572 mctx->state_log[dest_str_idx]
2682 = re_acquire_state_context (&err, dfa, &dest_nodes, context); 2573 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2683 re_node_set_free (&dest_nodes); 2574 re_node_set_free (&dest_nodes);
2684 if (BE (mctx->state_log[dest_str_idx] == NULL 2575 if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2685 && err != REG_NOERROR, 0)) 2576 && err != REG_NOERROR))
2686 goto free_return; 2577 goto free_return;
2687 } 2578 }
2688 /* We need to check recursively if the backreference can epsilon 2579 /* We need to check recursively if the backreference can epsilon
@@ -2692,10 +2583,10 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2692 { 2583 {
2693 err = check_subexp_matching_top (mctx, new_dest_nodes, 2584 err = check_subexp_matching_top (mctx, new_dest_nodes,
2694 cur_str_idx); 2585 cur_str_idx);
2695 if (BE (err != REG_NOERROR, 0)) 2586 if (__glibc_unlikely (err != REG_NOERROR))
2696 goto free_return; 2587 goto free_return;
2697 err = transit_state_bkref (mctx, new_dest_nodes); 2588 err = transit_state_bkref (mctx, new_dest_nodes);
2698 if (BE (err != REG_NOERROR, 0)) 2589 if (__glibc_unlikely (err != REG_NOERROR))
2699 goto free_return; 2590 goto free_return;
2700 } 2591 }
2701 } 2592 }
@@ -2712,7 +2603,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2712 delay these checking for prune_impossible_nodes(). */ 2603 delay these checking for prune_impossible_nodes(). */
2713 2604
2714static reg_errcode_t 2605static reg_errcode_t
2715internal_function __attribute_warn_unused_result__ 2606__attribute_warn_unused_result__
2716get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) 2607get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2717{ 2608{
2718 const re_dfa_t *const dfa = mctx->dfa; 2609 const re_dfa_t *const dfa = mctx->dfa;
@@ -2720,7 +2611,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2720 const char *buf = (const char *) re_string_get_buffer (&mctx->input); 2611 const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2721 /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ 2612 /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
2722 Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); 2613 Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2723 if (cache_idx != REG_MISSING) 2614 if (cache_idx != -1)
2724 { 2615 {
2725 const struct re_backref_cache_entry *entry 2616 const struct re_backref_cache_entry *entry
2726 = mctx->bkref_ents + cache_idx; 2617 = mctx->bkref_ents + cache_idx;
@@ -2756,7 +2647,8 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2756 at the back reference? */ 2647 at the back reference? */
2757 if (sl_str_diff > 0) 2648 if (sl_str_diff > 0)
2758 { 2649 {
2759 if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)) 2650 if (__glibc_unlikely (bkref_str_off + sl_str_diff
2651 > mctx->input.valid_len))
2760 { 2652 {
2761 /* Not enough chars for a successful match. */ 2653 /* Not enough chars for a successful match. */
2762 if (bkref_str_off + sl_str_diff > mctx->input.len) 2654 if (bkref_str_off + sl_str_diff > mctx->input.len)
@@ -2765,7 +2657,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2765 err = clean_state_log_if_needed (mctx, 2657 err = clean_state_log_if_needed (mctx,
2766 bkref_str_off 2658 bkref_str_off
2767 + sl_str_diff); 2659 + sl_str_diff);
2768 if (BE (err != REG_NOERROR, 0)) 2660 if (__glibc_unlikely (err != REG_NOERROR))
2769 return err; 2661 return err;
2770 buf = (const char *) re_string_get_buffer (&mctx->input); 2662 buf = (const char *) re_string_get_buffer (&mctx->input);
2771 } 2663 }
@@ -2784,7 +2676,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2784 2676
2785 if (err == REG_NOMATCH) 2677 if (err == REG_NOMATCH)
2786 continue; 2678 continue;
2787 if (BE (err != REG_NOERROR, 0)) 2679 if (__glibc_unlikely (err != REG_NOERROR))
2788 return err; 2680 return err;
2789 } 2681 }
2790 2682
@@ -2803,14 +2695,14 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2803 at the back reference? */ 2695 at the back reference? */
2804 if (sl_str_off > 0) 2696 if (sl_str_off > 0)
2805 { 2697 {
2806 if (BE (bkref_str_off >= mctx->input.valid_len, 0)) 2698 if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
2807 { 2699 {
2808 /* If we are at the end of the input, we cannot match. */ 2700 /* If we are at the end of the input, we cannot match. */
2809 if (bkref_str_off >= mctx->input.len) 2701 if (bkref_str_off >= mctx->input.len)
2810 break; 2702 break;
2811 2703
2812 err = extend_buffers (mctx, bkref_str_off + 1); 2704 err = extend_buffers (mctx, bkref_str_off + 1);
2813 if (BE (err != REG_NOERROR, 0)) 2705 if (__glibc_unlikely (err != REG_NOERROR))
2814 return err; 2706 return err;
2815 2707
2816 buf = (const char *) re_string_get_buffer (&mctx->input); 2708 buf = (const char *) re_string_get_buffer (&mctx->input);
@@ -2825,7 +2717,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2825 nodes = &mctx->state_log[sl_str]->nodes; 2717 nodes = &mctx->state_log[sl_str]->nodes;
2826 cls_node = find_subexp_node (dfa, nodes, subexp_num, 2718 cls_node = find_subexp_node (dfa, nodes, subexp_num,
2827 OP_CLOSE_SUBEXP); 2719 OP_CLOSE_SUBEXP);
2828 if (cls_node == REG_MISSING) 2720 if (cls_node == -1)
2829 continue; /* No. */ 2721 continue; /* No. */
2830 if (sub_top->path == NULL) 2722 if (sub_top->path == NULL)
2831 { 2723 {
@@ -2841,15 +2733,18 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2841 OP_CLOSE_SUBEXP); 2733 OP_CLOSE_SUBEXP);
2842 if (err == REG_NOMATCH) 2734 if (err == REG_NOMATCH)
2843 continue; 2735 continue;
2844 if (BE (err != REG_NOERROR, 0)) 2736 if (__glibc_unlikely (err != REG_NOERROR))
2845 return err; 2737 return err;
2846 sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); 2738 sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2847 if (BE (sub_last == NULL, 0)) 2739 if (__glibc_unlikely (sub_last == NULL))
2848 return REG_ESPACE; 2740 return REG_ESPACE;
2849 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, 2741 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2850 bkref_str_idx); 2742 bkref_str_idx);
2743 buf = (const char *) re_string_get_buffer (&mctx->input);
2851 if (err == REG_NOMATCH) 2744 if (err == REG_NOMATCH)
2852 continue; 2745 continue;
2746 if (__glibc_unlikely (err != REG_NOERROR))
2747 return err;
2853 } 2748 }
2854 } 2749 }
2855 return REG_NOERROR; 2750 return REG_NOERROR;
@@ -2862,7 +2757,6 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2862 and SUB_LAST. */ 2757 and SUB_LAST. */
2863 2758
2864static reg_errcode_t 2759static reg_errcode_t
2865internal_function
2866get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, 2760get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2867 re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str) 2761 re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2868{ 2762{
@@ -2876,7 +2770,7 @@ get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2876 return err; 2770 return err;
2877 err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, 2771 err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2878 sub_last->str_idx); 2772 sub_last->str_idx);
2879 if (BE (err != REG_NOERROR, 0)) 2773 if (__glibc_unlikely (err != REG_NOERROR))
2880 return err; 2774 return err;
2881 to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; 2775 to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2882 return clean_state_log_if_needed (mctx, to_idx); 2776 return clean_state_log_if_needed (mctx, to_idx);
@@ -2891,7 +2785,6 @@ get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2891 E.g. RE: (a){2} */ 2785 E.g. RE: (a){2} */
2892 2786
2893static Idx 2787static Idx
2894internal_function
2895find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, 2788find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2896 Idx subexp_idx, int type) 2789 Idx subexp_idx, int type)
2897{ 2790{
@@ -2904,16 +2797,17 @@ find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2904 && node->opr.idx == subexp_idx) 2797 && node->opr.idx == subexp_idx)
2905 return cls_node; 2798 return cls_node;
2906 } 2799 }
2907 return REG_MISSING; 2800 return -1;
2908} 2801}
2909 2802
2910/* Check whether the node TOP_NODE at TOP_STR can arrive to the node 2803/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2911 LAST_NODE at LAST_STR. We record the path onto PATH since it will be 2804 LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2912 heavily reused. 2805 heavily reused.
2913 Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ 2806 Return REG_NOERROR if it can arrive, REG_NOMATCH if it cannot,
2807 REG_ESPACE if memory is exhausted. */
2914 2808
2915static reg_errcode_t 2809static reg_errcode_t
2916internal_function __attribute_warn_unused_result__ 2810__attribute_warn_unused_result__
2917check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, 2811check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2918 Idx top_str, Idx last_node, Idx last_str, int type) 2812 Idx top_str, Idx last_node, Idx last_str, int type)
2919{ 2813{
@@ -2927,19 +2821,19 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2927 2821
2928 subexp_num = dfa->nodes[top_node].opr.idx; 2822 subexp_num = dfa->nodes[top_node].opr.idx;
2929 /* Extend the buffer if we need. */ 2823 /* Extend the buffer if we need. */
2930 if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0)) 2824 if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1))
2931 { 2825 {
2932 re_dfastate_t **new_array; 2826 re_dfastate_t **new_array;
2933 Idx old_alloc = path->alloc; 2827 Idx old_alloc = path->alloc;
2934 Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1; 2828 Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2935 Idx new_alloc; 2829 Idx new_alloc;
2936 if (BE (IDX_MAX - old_alloc < incr_alloc, 0)) 2830 if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
2937 return REG_ESPACE; 2831 return REG_ESPACE;
2938 new_alloc = old_alloc + incr_alloc; 2832 new_alloc = old_alloc + incr_alloc;
2939 if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0)) 2833 if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
2940 return REG_ESPACE; 2834 return REG_ESPACE;
2941 new_array = re_realloc (path->array, re_dfastate_t *, new_alloc); 2835 new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2942 if (BE (new_array == NULL, 0)) 2836 if (__glibc_unlikely (new_array == NULL))
2943 return REG_ESPACE; 2837 return REG_ESPACE;
2944 path->array = new_array; 2838 path->array = new_array;
2945 path->alloc = new_alloc; 2839 path->alloc = new_alloc;
@@ -2960,10 +2854,10 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2960 if (str_idx == top_str) 2854 if (str_idx == top_str)
2961 { 2855 {
2962 err = re_node_set_init_1 (&next_nodes, top_node); 2856 err = re_node_set_init_1 (&next_nodes, top_node);
2963 if (BE (err != REG_NOERROR, 0)) 2857 if (__glibc_unlikely (err != REG_NOERROR))
2964 return err; 2858 return err;
2965 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); 2859 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2966 if (BE (err != REG_NOERROR, 0)) 2860 if (__glibc_unlikely (err != REG_NOERROR))
2967 { 2861 {
2968 re_node_set_free (&next_nodes); 2862 re_node_set_free (&next_nodes);
2969 return err; 2863 return err;
@@ -2975,7 +2869,7 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2975 if (cur_state && cur_state->has_backref) 2869 if (cur_state && cur_state->has_backref)
2976 { 2870 {
2977 err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); 2871 err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2978 if (BE (err != REG_NOERROR, 0)) 2872 if (__glibc_unlikely (err != REG_NOERROR))
2979 return err; 2873 return err;
2980 } 2874 }
2981 else 2875 else
@@ -2987,14 +2881,14 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2987 { 2881 {
2988 err = expand_bkref_cache (mctx, &next_nodes, str_idx, 2882 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2989 subexp_num, type); 2883 subexp_num, type);
2990 if (BE (err != REG_NOERROR, 0)) 2884 if (__glibc_unlikely (err != REG_NOERROR))
2991 { 2885 {
2992 re_node_set_free (&next_nodes); 2886 re_node_set_free (&next_nodes);
2993 return err; 2887 return err;
2994 } 2888 }
2995 } 2889 }
2996 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); 2890 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2997 if (BE (cur_state == NULL && err != REG_NOERROR, 0)) 2891 if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2998 { 2892 {
2999 re_node_set_free (&next_nodes); 2893 re_node_set_free (&next_nodes);
3000 return err; 2894 return err;
@@ -3009,7 +2903,7 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
3009 { 2903 {
3010 err = re_node_set_merge (&next_nodes, 2904 err = re_node_set_merge (&next_nodes,
3011 &mctx->state_log[str_idx + 1]->nodes); 2905 &mctx->state_log[str_idx + 1]->nodes);
3012 if (BE (err != REG_NOERROR, 0)) 2906 if (__glibc_unlikely (err != REG_NOERROR))
3013 { 2907 {
3014 re_node_set_free (&next_nodes); 2908 re_node_set_free (&next_nodes);
3015 return err; 2909 return err;
@@ -3020,7 +2914,7 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
3020 err = check_arrival_add_next_nodes (mctx, str_idx, 2914 err = check_arrival_add_next_nodes (mctx, str_idx,
3021 &cur_state->non_eps_nodes, 2915 &cur_state->non_eps_nodes,
3022 &next_nodes); 2916 &next_nodes);
3023 if (BE (err != REG_NOERROR, 0)) 2917 if (__glibc_unlikely (err != REG_NOERROR))
3024 { 2918 {
3025 re_node_set_free (&next_nodes); 2919 re_node_set_free (&next_nodes);
3026 return err; 2920 return err;
@@ -3030,14 +2924,14 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
3030 if (next_nodes.nelem) 2924 if (next_nodes.nelem)
3031 { 2925 {
3032 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); 2926 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
3033 if (BE (err != REG_NOERROR, 0)) 2927 if (__glibc_unlikely (err != REG_NOERROR))
3034 { 2928 {
3035 re_node_set_free (&next_nodes); 2929 re_node_set_free (&next_nodes);
3036 return err; 2930 return err;
3037 } 2931 }
3038 err = expand_bkref_cache (mctx, &next_nodes, str_idx, 2932 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3039 subexp_num, type); 2933 subexp_num, type);
3040 if (BE (err != REG_NOERROR, 0)) 2934 if (__glibc_unlikely (err != REG_NOERROR))
3041 { 2935 {
3042 re_node_set_free (&next_nodes); 2936 re_node_set_free (&next_nodes);
3043 return err; 2937 return err;
@@ -3045,7 +2939,7 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
3045 } 2939 }
3046 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); 2940 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3047 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); 2941 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3048 if (BE (cur_state == NULL && err != REG_NOERROR, 0)) 2942 if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
3049 { 2943 {
3050 re_node_set_free (&next_nodes); 2944 re_node_set_free (&next_nodes);
3051 return err; 2945 return err;
@@ -3078,27 +2972,22 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
3078 Can't we unify them? */ 2972 Can't we unify them? */
3079 2973
3080static reg_errcode_t 2974static reg_errcode_t
3081internal_function __attribute_warn_unused_result__ 2975__attribute_warn_unused_result__
3082check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, 2976check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3083 re_node_set *cur_nodes, re_node_set *next_nodes) 2977 re_node_set *cur_nodes, re_node_set *next_nodes)
3084{ 2978{
3085 const re_dfa_t *const dfa = mctx->dfa; 2979 const re_dfa_t *const dfa = mctx->dfa;
3086 bool ok; 2980 bool ok;
3087 Idx cur_idx; 2981 Idx cur_idx;
3088#ifdef RE_ENABLE_I18N
3089 reg_errcode_t err = REG_NOERROR; 2982 reg_errcode_t err = REG_NOERROR;
3090#endif
3091 re_node_set union_set; 2983 re_node_set union_set;
3092 re_node_set_init_empty (&union_set); 2984 re_node_set_init_empty (&union_set);
3093 for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) 2985 for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3094 { 2986 {
3095 int naccepted = 0; 2987 int naccepted = 0;
3096 Idx cur_node = cur_nodes->elems[cur_idx]; 2988 Idx cur_node = cur_nodes->elems[cur_idx];
3097#ifdef DEBUG 2989 DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type));
3098 re_token_type_t type = dfa->nodes[cur_node].type; 2990
3099 assert (!IS_EPSILON_NODE (type));
3100#endif
3101#ifdef RE_ENABLE_I18N
3102 /* If the node may accept "multi byte". */ 2991 /* If the node may accept "multi byte". */
3103 if (dfa->nodes[cur_node].accept_mb) 2992 if (dfa->nodes[cur_node].accept_mb)
3104 { 2993 {
@@ -3114,34 +3003,34 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3114 if (dest_state) 3003 if (dest_state)
3115 { 3004 {
3116 err = re_node_set_merge (&union_set, &dest_state->nodes); 3005 err = re_node_set_merge (&union_set, &dest_state->nodes);
3117 if (BE (err != REG_NOERROR, 0)) 3006 if (__glibc_unlikely (err != REG_NOERROR))
3118 { 3007 {
3119 re_node_set_free (&union_set); 3008 re_node_set_free (&union_set);
3120 return err; 3009 return err;
3121 } 3010 }
3122 } 3011 }
3123 ok = re_node_set_insert (&union_set, next_node); 3012 ok = re_node_set_insert (&union_set, next_node);
3124 if (BE (! ok, 0)) 3013 if (__glibc_unlikely (! ok))
3125 { 3014 {
3126 re_node_set_free (&union_set); 3015 re_node_set_free (&union_set);
3127 return REG_ESPACE; 3016 return REG_ESPACE;
3128 } 3017 }
3129 mctx->state_log[next_idx] = re_acquire_state (&err, dfa, 3018 mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3130 &union_set); 3019 &union_set);
3131 if (BE (mctx->state_log[next_idx] == NULL 3020 if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
3132 && err != REG_NOERROR, 0)) 3021 && err != REG_NOERROR))
3133 { 3022 {
3134 re_node_set_free (&union_set); 3023 re_node_set_free (&union_set);
3135 return err; 3024 return err;
3136 } 3025 }
3137 } 3026 }
3138 } 3027 }
3139#endif /* RE_ENABLE_I18N */ 3028
3140 if (naccepted 3029 if (naccepted
3141 || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) 3030 || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3142 { 3031 {
3143 ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); 3032 ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3144 if (BE (! ok, 0)) 3033 if (__glibc_unlikely (! ok))
3145 { 3034 {
3146 re_node_set_free (&union_set); 3035 re_node_set_free (&union_set);
3147 return REG_ESPACE; 3036 return REG_ESPACE;
@@ -3159,18 +3048,15 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3159*/ 3048*/
3160 3049
3161static reg_errcode_t 3050static reg_errcode_t
3162internal_function
3163check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, 3051check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3164 Idx ex_subexp, int type) 3052 Idx ex_subexp, int type)
3165{ 3053{
3166 reg_errcode_t err; 3054 reg_errcode_t err;
3167 Idx idx, outside_node; 3055 Idx idx, outside_node;
3168 re_node_set new_nodes; 3056 re_node_set new_nodes;
3169#ifdef DEBUG 3057 DEBUG_ASSERT (cur_nodes->nelem);
3170 assert (cur_nodes->nelem);
3171#endif
3172 err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); 3058 err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3173 if (BE (err != REG_NOERROR, 0)) 3059 if (__glibc_unlikely (err != REG_NOERROR))
3174 return err; 3060 return err;
3175 /* Create a new node set NEW_NODES with the nodes which are epsilon 3061 /* Create a new node set NEW_NODES with the nodes which are epsilon
3176 closures of the node in CUR_NODES. */ 3062 closures of the node in CUR_NODES. */
@@ -3180,11 +3066,11 @@ check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3180 Idx cur_node = cur_nodes->elems[idx]; 3066 Idx cur_node = cur_nodes->elems[idx];
3181 const re_node_set *eclosure = dfa->eclosures + cur_node; 3067 const re_node_set *eclosure = dfa->eclosures + cur_node;
3182 outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); 3068 outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3183 if (outside_node == REG_MISSING) 3069 if (outside_node == -1)
3184 { 3070 {
3185 /* There are no problematic nodes, just merge them. */ 3071 /* There are no problematic nodes, just merge them. */
3186 err = re_node_set_merge (&new_nodes, eclosure); 3072 err = re_node_set_merge (&new_nodes, eclosure);
3187 if (BE (err != REG_NOERROR, 0)) 3073 if (__glibc_unlikely (err != REG_NOERROR))
3188 { 3074 {
3189 re_node_set_free (&new_nodes); 3075 re_node_set_free (&new_nodes);
3190 return err; 3076 return err;
@@ -3195,7 +3081,7 @@ check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3195 /* There are problematic nodes, re-calculate incrementally. */ 3081 /* There are problematic nodes, re-calculate incrementally. */
3196 err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, 3082 err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3197 ex_subexp, type); 3083 ex_subexp, type);
3198 if (BE (err != REG_NOERROR, 0)) 3084 if (__glibc_unlikely (err != REG_NOERROR))
3199 { 3085 {
3200 re_node_set_free (&new_nodes); 3086 re_node_set_free (&new_nodes);
3201 return err; 3087 return err;
@@ -3212,7 +3098,7 @@ check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3212 problematic append it to DST_NODES. */ 3098 problematic append it to DST_NODES. */
3213 3099
3214static reg_errcode_t 3100static reg_errcode_t
3215internal_function __attribute_warn_unused_result__ 3101__attribute_warn_unused_result__
3216check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, 3102check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3217 Idx target, Idx ex_subexp, int type) 3103 Idx target, Idx ex_subexp, int type)
3218{ 3104{
@@ -3227,13 +3113,13 @@ check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3227 if (type == OP_CLOSE_SUBEXP) 3113 if (type == OP_CLOSE_SUBEXP)
3228 { 3114 {
3229 ok = re_node_set_insert (dst_nodes, cur_node); 3115 ok = re_node_set_insert (dst_nodes, cur_node);
3230 if (BE (! ok, 0)) 3116 if (__glibc_unlikely (! ok))
3231 return REG_ESPACE; 3117 return REG_ESPACE;
3232 } 3118 }
3233 break; 3119 break;
3234 } 3120 }
3235 ok = re_node_set_insert (dst_nodes, cur_node); 3121 ok = re_node_set_insert (dst_nodes, cur_node);
3236 if (BE (! ok, 0)) 3122 if (__glibc_unlikely (! ok))
3237 return REG_ESPACE; 3123 return REG_ESPACE;
3238 if (dfa->edests[cur_node].nelem == 0) 3124 if (dfa->edests[cur_node].nelem == 0)
3239 break; 3125 break;
@@ -3243,7 +3129,7 @@ check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3243 err = check_arrival_expand_ecl_sub (dfa, dst_nodes, 3129 err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3244 dfa->edests[cur_node].elems[1], 3130 dfa->edests[cur_node].elems[1],
3245 ex_subexp, type); 3131 ex_subexp, type);
3246 if (BE (err != REG_NOERROR, 0)) 3132 if (__glibc_unlikely (err != REG_NOERROR))
3247 return err; 3133 return err;
3248 } 3134 }
3249 cur_node = dfa->edests[cur_node].elems[0]; 3135 cur_node = dfa->edests[cur_node].elems[0];
@@ -3257,7 +3143,7 @@ check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3257 in MCTX->BKREF_ENTS. */ 3143 in MCTX->BKREF_ENTS. */
3258 3144
3259static reg_errcode_t 3145static reg_errcode_t
3260internal_function __attribute_warn_unused_result__ 3146__attribute_warn_unused_result__
3261expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, 3147expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3262 Idx cur_str, Idx subexp_num, int type) 3148 Idx cur_str, Idx subexp_num, int type)
3263{ 3149{
@@ -3266,7 +3152,7 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3266 Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str); 3152 Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3267 struct re_backref_cache_entry *ent; 3153 struct re_backref_cache_entry *ent;
3268 3154
3269 if (cache_idx_start == REG_MISSING) 3155 if (cache_idx_start == -1)
3270 return REG_NOERROR; 3156 return REG_NOERROR;
3271 3157
3272 restart: 3158 restart:
@@ -3295,8 +3181,8 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3295 err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type); 3181 err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3296 err3 = re_node_set_merge (cur_nodes, &new_dests); 3182 err3 = re_node_set_merge (cur_nodes, &new_dests);
3297 re_node_set_free (&new_dests); 3183 re_node_set_free (&new_dests);
3298 if (BE (err != REG_NOERROR || err2 != REG_NOERROR 3184 if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR
3299 || err3 != REG_NOERROR, 0)) 3185 || err3 != REG_NOERROR))
3300 { 3186 {
3301 err = (err != REG_NOERROR ? err 3187 err = (err != REG_NOERROR ? err
3302 : (err2 != REG_NOERROR ? err2 : err3)); 3188 : (err2 != REG_NOERROR ? err2 : err3));
@@ -3318,7 +3204,7 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3318 err = re_node_set_init_copy (&union_set, 3204 err = re_node_set_init_copy (&union_set,
3319 &mctx->state_log[to_idx]->nodes); 3205 &mctx->state_log[to_idx]->nodes);
3320 ok = re_node_set_insert (&union_set, next_node); 3206 ok = re_node_set_insert (&union_set, next_node);
3321 if (BE (err != REG_NOERROR || ! ok, 0)) 3207 if (__glibc_unlikely (err != REG_NOERROR || ! ok))
3322 { 3208 {
3323 re_node_set_free (&union_set); 3209 re_node_set_free (&union_set);
3324 err = err != REG_NOERROR ? err : REG_ESPACE; 3210 err = err != REG_NOERROR ? err : REG_ESPACE;
@@ -3328,13 +3214,13 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3328 else 3214 else
3329 { 3215 {
3330 err = re_node_set_init_1 (&union_set, next_node); 3216 err = re_node_set_init_1 (&union_set, next_node);
3331 if (BE (err != REG_NOERROR, 0)) 3217 if (__glibc_unlikely (err != REG_NOERROR))
3332 return err; 3218 return err;
3333 } 3219 }
3334 mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); 3220 mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3335 re_node_set_free (&union_set); 3221 re_node_set_free (&union_set);
3336 if (BE (mctx->state_log[to_idx] == NULL 3222 if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
3337 && err != REG_NOERROR, 0)) 3223 && err != REG_NOERROR))
3338 return err; 3224 return err;
3339 } 3225 }
3340 } 3226 }
@@ -3345,8 +3231,7 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3345/* Build transition table for the state. 3231/* Build transition table for the state.
3346 Return true if successful. */ 3232 Return true if successful. */
3347 3233
3348static bool 3234static bool __attribute_noinline__
3349internal_function
3350build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) 3235build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3351{ 3236{
3352 reg_errcode_t err; 3237 reg_errcode_t err;
@@ -3354,36 +3239,20 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3354 int ch; 3239 int ch;
3355 bool need_word_trtable = false; 3240 bool need_word_trtable = false;
3356 bitset_word_t elem, mask; 3241 bitset_word_t elem, mask;
3357 bool dests_node_malloced = false;
3358 bool dest_states_malloced = false;
3359 Idx ndests; /* Number of the destination states from 'state'. */ 3242 Idx ndests; /* Number of the destination states from 'state'. */
3360 re_dfastate_t **trtable; 3243 re_dfastate_t **trtable;
3361 re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; 3244 re_dfastate_t *dest_states[SBC_MAX];
3362 re_node_set follows, *dests_node; 3245 re_dfastate_t *dest_states_word[SBC_MAX];
3363 bitset_t *dests_ch; 3246 re_dfastate_t *dest_states_nl[SBC_MAX];
3247 re_node_set follows;
3364 bitset_t acceptable; 3248 bitset_t acceptable;
3365 3249
3366 struct dests_alloc
3367 {
3368 re_node_set dests_node[SBC_MAX];
3369 bitset_t dests_ch[SBC_MAX];
3370 } *dests_alloc;
3371
3372 /* We build DFA states which corresponds to the destination nodes 3250 /* We build DFA states which corresponds to the destination nodes
3373 from 'state'. 'dests_node[i]' represents the nodes which i-th 3251 from 'state'. 'dests_node[i]' represents the nodes which i-th
3374 destination state contains, and 'dests_ch[i]' represents the 3252 destination state contains, and 'dests_ch[i]' represents the
3375 characters which i-th destination state accepts. */ 3253 characters which i-th destination state accepts. */
3376 if (__libc_use_alloca (sizeof (struct dests_alloc))) 3254 re_node_set dests_node[SBC_MAX];
3377 dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); 3255 bitset_t dests_ch[SBC_MAX];
3378 else
3379 {
3380 dests_alloc = re_malloc (struct dests_alloc, 1);
3381 if (BE (dests_alloc == NULL, 0))
3382 return false;
3383 dests_node_malloced = true;
3384 }
3385 dests_node = dests_alloc->dests_node;
3386 dests_ch = dests_alloc->dests_ch;
3387 3256
3388 /* Initialize transition table. */ 3257 /* Initialize transition table. */
3389 state->word_trtable = state->trtable = NULL; 3258 state->word_trtable = state->trtable = NULL;
@@ -3391,16 +3260,14 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3391 /* At first, group all nodes belonging to 'state' into several 3260 /* At first, group all nodes belonging to 'state' into several
3392 destinations. */ 3261 destinations. */
3393 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); 3262 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3394 if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0)) 3263 if (__glibc_unlikely (ndests <= 0))
3395 { 3264 {
3396 if (dests_node_malloced)
3397 free (dests_alloc);
3398 /* Return false in case of an error, true otherwise. */ 3265 /* Return false in case of an error, true otherwise. */
3399 if (ndests == 0) 3266 if (ndests == 0)
3400 { 3267 {
3401 state->trtable = (re_dfastate_t **) 3268 state->trtable = (re_dfastate_t **)
3402 calloc (sizeof (re_dfastate_t *), SBC_MAX); 3269 calloc (sizeof (re_dfastate_t *), SBC_MAX);
3403 if (BE (state->trtable == NULL, 0)) 3270 if (__glibc_unlikely (state->trtable == NULL))
3404 return false; 3271 return false;
3405 return true; 3272 return true;
3406 } 3273 }
@@ -3408,40 +3275,15 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3408 } 3275 }
3409 3276
3410 err = re_node_set_alloc (&follows, ndests + 1); 3277 err = re_node_set_alloc (&follows, ndests + 1);
3411 if (BE (err != REG_NOERROR, 0)) 3278 if (__glibc_unlikely (err != REG_NOERROR))
3412 goto out_free;
3413
3414 /* Avoid arithmetic overflow in size calculation. */
3415 if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3416 / (3 * sizeof (re_dfastate_t *)))
3417 < ndests),
3418 0))
3419 goto out_free;
3420
3421 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3422 + ndests * 3 * sizeof (re_dfastate_t *)))
3423 dest_states = (re_dfastate_t **)
3424 alloca (ndests * 3 * sizeof (re_dfastate_t *));
3425 else
3426 { 3279 {
3427 dest_states = (re_dfastate_t **) 3280 out_free:
3428 malloc (ndests * 3 * sizeof (re_dfastate_t *)); 3281 re_node_set_free (&follows);
3429 if (BE (dest_states == NULL, 0)) 3282 for (i = 0; i < ndests; ++i)
3430 { 3283 re_node_set_free (dests_node + i);
3431out_free: 3284 return false;
3432 if (dest_states_malloced)
3433 free (dest_states);
3434 re_node_set_free (&follows);
3435 for (i = 0; i < ndests; ++i)
3436 re_node_set_free (dests_node + i);
3437 if (dests_node_malloced)
3438 free (dests_alloc);
3439 return false;
3440 }
3441 dest_states_malloced = true;
3442 } 3285 }
3443 dest_states_word = dest_states + ndests; 3286
3444 dest_states_nl = dest_states_word + ndests;
3445 bitset_empty (acceptable); 3287 bitset_empty (acceptable);
3446 3288
3447 /* Then build the states for all destinations. */ 3289 /* Then build the states for all destinations. */
@@ -3453,15 +3295,15 @@ out_free:
3453 for (j = 0; j < dests_node[i].nelem; ++j) 3295 for (j = 0; j < dests_node[i].nelem; ++j)
3454 { 3296 {
3455 next_node = dfa->nexts[dests_node[i].elems[j]]; 3297 next_node = dfa->nexts[dests_node[i].elems[j]];
3456 if (next_node != REG_MISSING) 3298 if (next_node != -1)
3457 { 3299 {
3458 err = re_node_set_merge (&follows, dfa->eclosures + next_node); 3300 err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3459 if (BE (err != REG_NOERROR, 0)) 3301 if (__glibc_unlikely (err != REG_NOERROR))
3460 goto out_free; 3302 goto out_free;
3461 } 3303 }
3462 } 3304 }
3463 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); 3305 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3464 if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) 3306 if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
3465 goto out_free; 3307 goto out_free;
3466 /* If the new state has context constraint, 3308 /* If the new state has context constraint,
3467 build appropriate states for these contexts. */ 3309 build appropriate states for these contexts. */
@@ -3469,7 +3311,8 @@ out_free:
3469 { 3311 {
3470 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, 3312 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3471 CONTEXT_WORD); 3313 CONTEXT_WORD);
3472 if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) 3314 if (__glibc_unlikely (dest_states_word[i] == NULL
3315 && err != REG_NOERROR))
3473 goto out_free; 3316 goto out_free;
3474 3317
3475 if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) 3318 if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
@@ -3477,7 +3320,7 @@ out_free:
3477 3320
3478 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, 3321 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3479 CONTEXT_NEWLINE); 3322 CONTEXT_NEWLINE);
3480 if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) 3323 if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
3481 goto out_free; 3324 goto out_free;
3482 } 3325 }
3483 else 3326 else
@@ -3488,7 +3331,7 @@ out_free:
3488 bitset_merge (acceptable, dests_ch[i]); 3331 bitset_merge (acceptable, dests_ch[i]);
3489 } 3332 }
3490 3333
3491 if (!BE (need_word_trtable, 0)) 3334 if (!__glibc_unlikely (need_word_trtable))
3492 { 3335 {
3493 /* We don't care about whether the following character is a word 3336 /* We don't care about whether the following character is a word
3494 character, or we are in a single-byte character set so we can 3337 character, or we are in a single-byte character set so we can
@@ -3496,7 +3339,7 @@ out_free:
3496 256-entry transition table. */ 3339 256-entry transition table. */
3497 trtable = state->trtable = 3340 trtable = state->trtable =
3498 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); 3341 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3499 if (BE (trtable == NULL, 0)) 3342 if (__glibc_unlikely (trtable == NULL))
3500 goto out_free; 3343 goto out_free;
3501 3344
3502 /* For all characters ch...: */ 3345 /* For all characters ch...: */
@@ -3504,7 +3347,7 @@ out_free:
3504 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; 3347 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3505 elem; 3348 elem;
3506 mask <<= 1, elem >>= 1, ++ch) 3349 mask <<= 1, elem >>= 1, ++ch)
3507 if (BE (elem & 1, 0)) 3350 if (__glibc_unlikely (elem & 1))
3508 { 3351 {
3509 /* There must be exactly one destination which accepts 3352 /* There must be exactly one destination which accepts
3510 character ch. See group_nodes_into_DFAstates. */ 3353 character ch. See group_nodes_into_DFAstates. */
@@ -3527,7 +3370,7 @@ out_free:
3527 starting at trtable[SBC_MAX]. */ 3370 starting at trtable[SBC_MAX]. */
3528 trtable = state->word_trtable = 3371 trtable = state->word_trtable =
3529 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX); 3372 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3530 if (BE (trtable == NULL, 0)) 3373 if (__glibc_unlikely (trtable == NULL))
3531 goto out_free; 3374 goto out_free;
3532 3375
3533 /* For all characters ch...: */ 3376 /* For all characters ch...: */
@@ -3535,7 +3378,7 @@ out_free:
3535 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; 3378 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3536 elem; 3379 elem;
3537 mask <<= 1, elem >>= 1, ++ch) 3380 mask <<= 1, elem >>= 1, ++ch)
3538 if (BE (elem & 1, 0)) 3381 if (__glibc_unlikely (elem & 1))
3539 { 3382 {
3540 /* There must be exactly one destination which accepts 3383 /* There must be exactly one destination which accepts
3541 character ch. See group_nodes_into_DFAstates. */ 3384 character ch. See group_nodes_into_DFAstates. */
@@ -3565,26 +3408,19 @@ out_free:
3565 } 3408 }
3566 } 3409 }
3567 3410
3568 if (dest_states_malloced)
3569 free (dest_states);
3570
3571 re_node_set_free (&follows); 3411 re_node_set_free (&follows);
3572 for (i = 0; i < ndests; ++i) 3412 for (i = 0; i < ndests; ++i)
3573 re_node_set_free (dests_node + i); 3413 re_node_set_free (dests_node + i);
3574
3575 if (dests_node_malloced)
3576 free (dests_alloc);
3577
3578 return true; 3414 return true;
3579} 3415}
3580 3416
3581/* Group all nodes belonging to STATE into several destinations. 3417/* Group all nodes belonging to STATE into several destinations.
3582 Then for all destinations, set the nodes belonging to the destination 3418 Then for all destinations, set the nodes belonging to the destination
3583 to DESTS_NODE[i] and set the characters accepted by the destination 3419 to DESTS_NODE[i] and set the characters accepted by the destination
3584 to DEST_CH[i]. This function return the number of destinations. */ 3420 to DEST_CH[i]. Return the number of destinations if successful,
3421 -1 on internal error. */
3585 3422
3586static Idx 3423static Idx
3587internal_function
3588group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, 3424group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3589 re_node_set *dests_node, bitset_t *dests_ch) 3425 re_node_set *dests_node, bitset_t *dests_ch)
3590{ 3426{
@@ -3613,18 +3449,15 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3613 } 3449 }
3614 else if (type == OP_PERIOD) 3450 else if (type == OP_PERIOD)
3615 { 3451 {
3616#ifdef RE_ENABLE_I18N
3617 if (dfa->mb_cur_max > 1) 3452 if (dfa->mb_cur_max > 1)
3618 bitset_merge (accepts, dfa->sb_char); 3453 bitset_merge (accepts, dfa->sb_char);
3619 else 3454 else
3620#endif
3621 bitset_set_all (accepts); 3455 bitset_set_all (accepts);
3622 if (!(dfa->syntax & RE_DOT_NEWLINE)) 3456 if (!(dfa->syntax & RE_DOT_NEWLINE))
3623 bitset_clear (accepts, '\n'); 3457 bitset_clear (accepts, '\n');
3624 if (dfa->syntax & RE_DOT_NOT_NULL) 3458 if (dfa->syntax & RE_DOT_NOT_NULL)
3625 bitset_clear (accepts, '\0'); 3459 bitset_clear (accepts, '\0');
3626 } 3460 }
3627#ifdef RE_ENABLE_I18N
3628 else if (type == OP_UTF8_PERIOD) 3461 else if (type == OP_UTF8_PERIOD)
3629 { 3462 {
3630 if (ASCII_CHARS % BITSET_WORD_BITS == 0) 3463 if (ASCII_CHARS % BITSET_WORD_BITS == 0)
@@ -3636,7 +3469,6 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3636 if (dfa->syntax & RE_DOT_NOT_NULL) 3469 if (dfa->syntax & RE_DOT_NOT_NULL)
3637 bitset_clear (accepts, '\0'); 3470 bitset_clear (accepts, '\0');
3638 } 3471 }
3639#endif
3640 else 3472 else
3641 continue; 3473 continue;
3642 3474
@@ -3667,12 +3499,10 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3667 bitset_empty (accepts); 3499 bitset_empty (accepts);
3668 continue; 3500 continue;
3669 } 3501 }
3670#ifdef RE_ENABLE_I18N
3671 if (dfa->mb_cur_max > 1) 3502 if (dfa->mb_cur_max > 1)
3672 for (j = 0; j < BITSET_WORDS; ++j) 3503 for (j = 0; j < BITSET_WORDS; ++j)
3673 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); 3504 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3674 else 3505 else
3675#endif
3676 for (j = 0; j < BITSET_WORDS; ++j) 3506 for (j = 0; j < BITSET_WORDS; ++j)
3677 any_set |= (accepts[j] &= dfa->word_char[j]); 3507 any_set |= (accepts[j] &= dfa->word_char[j]);
3678 if (!any_set) 3508 if (!any_set)
@@ -3686,12 +3516,10 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3686 bitset_empty (accepts); 3516 bitset_empty (accepts);
3687 continue; 3517 continue;
3688 } 3518 }
3689#ifdef RE_ENABLE_I18N
3690 if (dfa->mb_cur_max > 1) 3519 if (dfa->mb_cur_max > 1)
3691 for (j = 0; j < BITSET_WORDS; ++j) 3520 for (j = 0; j < BITSET_WORDS; ++j)
3692 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); 3521 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3693 else 3522 else
3694#endif
3695 for (j = 0; j < BITSET_WORDS; ++j) 3523 for (j = 0; j < BITSET_WORDS; ++j)
3696 any_set |= (accepts[j] &= ~dfa->word_char[j]); 3524 any_set |= (accepts[j] &= ~dfa->word_char[j]);
3697 if (!any_set) 3525 if (!any_set)
@@ -3735,14 +3563,14 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3735 bitset_copy (dests_ch[ndests], remains); 3563 bitset_copy (dests_ch[ndests], remains);
3736 bitset_copy (dests_ch[j], intersec); 3564 bitset_copy (dests_ch[j], intersec);
3737 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); 3565 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3738 if (BE (err != REG_NOERROR, 0)) 3566 if (__glibc_unlikely (err != REG_NOERROR))
3739 goto error_return; 3567 goto error_return;
3740 ++ndests; 3568 ++ndests;
3741 } 3569 }
3742 3570
3743 /* Put the position in the current group. */ 3571 /* Put the position in the current group. */
3744 ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); 3572 ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3745 if (BE (! ok, 0)) 3573 if (__glibc_unlikely (! ok))
3746 goto error_return; 3574 goto error_return;
3747 3575
3748 /* If all characters are consumed, go to next node. */ 3576 /* If all characters are consumed, go to next node. */
@@ -3754,20 +3582,20 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3754 { 3582 {
3755 bitset_copy (dests_ch[ndests], accepts); 3583 bitset_copy (dests_ch[ndests], accepts);
3756 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); 3584 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3757 if (BE (err != REG_NOERROR, 0)) 3585 if (__glibc_unlikely (err != REG_NOERROR))
3758 goto error_return; 3586 goto error_return;
3759 ++ndests; 3587 ++ndests;
3760 bitset_empty (accepts); 3588 bitset_empty (accepts);
3761 } 3589 }
3762 } 3590 }
3591 assume (ndests <= SBC_MAX);
3763 return ndests; 3592 return ndests;
3764 error_return: 3593 error_return:
3765 for (j = 0; j < ndests; ++j) 3594 for (j = 0; j < ndests; ++j)
3766 re_node_set_free (dests_node + j); 3595 re_node_set_free (dests_node + j);
3767 return REG_MISSING; 3596 return -1;
3768} 3597}
3769 3598
3770#ifdef RE_ENABLE_I18N
3771/* Check how many bytes the node 'dfa->nodes[node_idx]' accepts. 3599/* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
3772 Return the number of the bytes the node accepts. 3600 Return the number of the bytes the node accepts.
3773 STR_IDX is the current index of the input string. 3601 STR_IDX is the current index of the input string.
@@ -3776,8 +3604,11 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3776 one collating element like '.', '[a-z]', opposite to the other nodes 3604 one collating element like '.', '[a-z]', opposite to the other nodes
3777 can only accept one byte. */ 3605 can only accept one byte. */
3778 3606
3607#ifdef _LIBC
3608# include <locale/weight.h>
3609#endif
3610
3779static int 3611static int
3780internal_function
3781check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, 3612check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3782 const re_string_t *input, Idx str_idx) 3613 const re_string_t *input, Idx str_idx)
3783{ 3614{
@@ -3785,10 +3616,10 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3785 int char_len, elem_len; 3616 int char_len, elem_len;
3786 Idx i; 3617 Idx i;
3787 3618
3788 if (BE (node->type == OP_UTF8_PERIOD, 0)) 3619 if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
3789 { 3620 {
3790 unsigned char c = re_string_byte_at (input, str_idx), d; 3621 unsigned char c = re_string_byte_at (input, str_idx), d;
3791 if (BE (c < 0xc2, 1)) 3622 if (__glibc_likely (c < 0xc2))
3792 return 0; 3623 return 0;
3793 3624
3794 if (str_idx + 2 > input->len) 3625 if (str_idx + 2 > input->len)
@@ -3844,10 +3675,10 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3844 /* FIXME: I don't think this if is needed, as both '\n' 3675 /* FIXME: I don't think this if is needed, as both '\n'
3845 and '\0' are char_len == 1. */ 3676 and '\0' are char_len == 1. */
3846 /* '.' accepts any one character except the following two cases. */ 3677 /* '.' accepts any one character except the following two cases. */
3847 if ((!(dfa->syntax & RE_DOT_NEWLINE) && 3678 if ((!(dfa->syntax & RE_DOT_NEWLINE)
3848 re_string_byte_at (input, str_idx) == '\n') || 3679 && re_string_byte_at (input, str_idx) == '\n')
3849 ((dfa->syntax & RE_DOT_NOT_NULL) && 3680 || ((dfa->syntax & RE_DOT_NOT_NULL)
3850 re_string_byte_at (input, str_idx) == '\0')) 3681 && re_string_byte_at (input, str_idx) == '\0'))
3851 return 0; 3682 return 0;
3852 return char_len; 3683 return char_len;
3853 } 3684 }
@@ -3859,12 +3690,12 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3859 if (node->type == COMPLEX_BRACKET) 3690 if (node->type == COMPLEX_BRACKET)
3860 { 3691 {
3861 const re_charset_t *cset = node->opr.mbcset; 3692 const re_charset_t *cset = node->opr.mbcset;
3862# ifdef _LIBC 3693#ifdef _LIBC
3863 const unsigned char *pin 3694 const unsigned char *pin
3864 = ((const unsigned char *) re_string_get_buffer (input) + str_idx); 3695 = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3865 Idx j; 3696 Idx j;
3866 uint32_t nrules; 3697 uint32_t nrules;
3867# endif /* _LIBC */ 3698#endif
3868 int match_len = 0; 3699 int match_len = 0;
3869 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) 3700 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3870 ? re_string_wchar_at (input, str_idx) : 0); 3701 ? re_string_wchar_at (input, str_idx) : 0);
@@ -3887,7 +3718,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3887 } 3718 }
3888 } 3719 }
3889 3720
3890# ifdef _LIBC 3721#ifdef _LIBC
3891 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3722 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3892 if (nrules != 0) 3723 if (nrules != 0)
3893 { 3724 {
@@ -3895,8 +3726,6 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3895 const int32_t *table, *indirect; 3726 const int32_t *table, *indirect;
3896 const unsigned char *weights, *extra; 3727 const unsigned char *weights, *extra;
3897 const char *collseqwc; 3728 const char *collseqwc;
3898 /* This #include defines a local function! */
3899# include <locale/weight.h>
3900 3729
3901 /* match with collating_symbol? */ 3730 /* match with collating_symbol? */
3902 if (cset->ncoll_syms) 3731 if (cset->ncoll_syms)
@@ -3953,35 +3782,32 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3953 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); 3782 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3954 indirect = (const int32_t *) 3783 indirect = (const int32_t *)
3955 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); 3784 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3956 int32_t idx = findidx (&cp, elem_len); 3785 int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3786 int32_t rule = idx >> 24;
3787 idx &= 0xffffff;
3957 if (idx > 0) 3788 if (idx > 0)
3958 for (i = 0; i < cset->nequiv_classes; ++i) 3789 {
3959 { 3790 size_t weight_len = weights[idx];
3960 int32_t equiv_class_idx = cset->equiv_classes[i]; 3791 for (i = 0; i < cset->nequiv_classes; ++i)
3961 size_t weight_len = weights[idx & 0xffffff]; 3792 {
3962 if (weight_len == weights[equiv_class_idx & 0xffffff] 3793 int32_t equiv_class_idx = cset->equiv_classes[i];
3963 && (idx >> 24) == (equiv_class_idx >> 24)) 3794 int32_t equiv_class_rule = equiv_class_idx >> 24;
3964 { 3795 equiv_class_idx &= 0xffffff;
3965 Idx cnt = 0; 3796 if (weights[equiv_class_idx] == weight_len
3966 3797 && equiv_class_rule == rule
3967 idx &= 0xffffff; 3798 && memcmp (weights + idx + 1,
3968 equiv_class_idx &= 0xffffff; 3799 weights + equiv_class_idx + 1,
3969 3800 weight_len) == 0)
3970 while (cnt <= weight_len 3801 {
3971 && (weights[equiv_class_idx + 1 + cnt] 3802 match_len = elem_len;
3972 == weights[idx + 1 + cnt])) 3803 goto check_node_accept_bytes_match;
3973 ++cnt; 3804 }
3974 if (cnt > weight_len) 3805 }
3975 { 3806 }
3976 match_len = elem_len;
3977 goto check_node_accept_bytes_match;
3978 }
3979 }
3980 }
3981 } 3807 }
3982 } 3808 }
3983 else 3809 else
3984# endif /* _LIBC */ 3810#endif /* _LIBC */
3985 { 3811 {
3986 /* match with range expression? */ 3812 /* match with range expression? */
3987 for (i = 0; i < cset->nranges; ++i) 3813 for (i = 0; i < cset->nranges; ++i)
@@ -4007,9 +3833,8 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
4007 return 0; 3833 return 0;
4008} 3834}
4009 3835
4010# ifdef _LIBC 3836#ifdef _LIBC
4011static unsigned int 3837static unsigned int
4012internal_function
4013find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) 3838find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
4014{ 3839{
4015 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3840 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
@@ -4066,14 +3891,12 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
4066 return UINT_MAX; 3891 return UINT_MAX;
4067 } 3892 }
4068} 3893}
4069# endif /* _LIBC */ 3894#endif /* _LIBC */
4070#endif /* RE_ENABLE_I18N */
4071 3895
4072/* Check whether the node accepts the byte which is IDX-th 3896/* Check whether the node accepts the byte which is IDX-th
4073 byte of the INPUT. */ 3897 byte of the INPUT. */
4074 3898
4075static bool 3899static bool
4076internal_function
4077check_node_accept (const re_match_context_t *mctx, const re_token_t *node, 3900check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4078 Idx idx) 3901 Idx idx)
4079{ 3902{
@@ -4091,12 +3914,10 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4091 return false; 3914 return false;
4092 break; 3915 break;
4093 3916
4094#ifdef RE_ENABLE_I18N
4095 case OP_UTF8_PERIOD: 3917 case OP_UTF8_PERIOD:
4096 if (ch >= ASCII_CHARS) 3918 if (ch >= ASCII_CHARS)
4097 return false; 3919 return false;
4098 /* FALLTHROUGH */ 3920 FALLTHROUGH;
4099#endif
4100 case OP_PERIOD: 3921 case OP_PERIOD:
4101 if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) 3922 if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4102 || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) 3923 || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
@@ -4123,22 +3944,22 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4123/* Extend the buffers, if the buffers have run out. */ 3944/* Extend the buffers, if the buffers have run out. */
4124 3945
4125static reg_errcode_t 3946static reg_errcode_t
4126internal_function __attribute_warn_unused_result__ 3947__attribute_warn_unused_result__
4127extend_buffers (re_match_context_t *mctx, int min_len) 3948extend_buffers (re_match_context_t *mctx, int min_len)
4128{ 3949{
4129 reg_errcode_t ret; 3950 reg_errcode_t ret;
4130 re_string_t *pstr = &mctx->input; 3951 re_string_t *pstr = &mctx->input;
4131 3952
4132 /* Avoid overflow. */ 3953 /* Avoid overflow. */
4133 if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2 3954 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
4134 <= pstr->bufs_len, 0)) 3955 <= pstr->bufs_len))
4135 return REG_ESPACE; 3956 return REG_ESPACE;
4136 3957
4137 /* Double the lengths of the buffers, but allocate at least MIN_LEN. */ 3958 /* Double the lengths of the buffers, but allocate at least MIN_LEN. */
4138 ret = re_string_realloc_buffers (pstr, 3959 ret = re_string_realloc_buffers (pstr,
4139 MAX (min_len, 3960 MAX (min_len,
4140 MIN (pstr->len, pstr->bufs_len * 2))); 3961 MIN (pstr->len, pstr->bufs_len * 2)));
4141 if (BE (ret != REG_NOERROR, 0)) 3962 if (__glibc_unlikely (ret != REG_NOERROR))
4142 return ret; 3963 return ret;
4143 3964
4144 if (mctx->state_log != NULL) 3965 if (mctx->state_log != NULL)
@@ -4149,7 +3970,7 @@ extend_buffers (re_match_context_t *mctx, int min_len)
4149 does not have the right size. */ 3970 does not have the right size. */
4150 re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *, 3971 re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4151 pstr->bufs_len + 1); 3972 pstr->bufs_len + 1);
4152 if (BE (new_array == NULL, 0)) 3973 if (__glibc_unlikely (new_array == NULL))
4153 return REG_ESPACE; 3974 return REG_ESPACE;
4154 mctx->state_log = new_array; 3975 mctx->state_log = new_array;
4155 } 3976 }
@@ -4157,24 +3978,20 @@ extend_buffers (re_match_context_t *mctx, int min_len)
4157 /* Then reconstruct the buffers. */ 3978 /* Then reconstruct the buffers. */
4158 if (pstr->icase) 3979 if (pstr->icase)
4159 { 3980 {
4160#ifdef RE_ENABLE_I18N
4161 if (pstr->mb_cur_max > 1) 3981 if (pstr->mb_cur_max > 1)
4162 { 3982 {
4163 ret = build_wcs_upper_buffer (pstr); 3983 ret = build_wcs_upper_buffer (pstr);
4164 if (BE (ret != REG_NOERROR, 0)) 3984 if (__glibc_unlikely (ret != REG_NOERROR))
4165 return ret; 3985 return ret;
4166 } 3986 }
4167 else 3987 else
4168#endif /* RE_ENABLE_I18N */
4169 build_upper_buffer (pstr); 3988 build_upper_buffer (pstr);
4170 } 3989 }
4171 else 3990 else
4172 { 3991 {
4173#ifdef RE_ENABLE_I18N
4174 if (pstr->mb_cur_max > 1) 3992 if (pstr->mb_cur_max > 1)
4175 build_wcs_buffer (pstr); 3993 build_wcs_buffer (pstr);
4176 else 3994 else
4177#endif /* RE_ENABLE_I18N */
4178 { 3995 {
4179 if (pstr->trans != NULL) 3996 if (pstr->trans != NULL)
4180 re_string_translate_buffer (pstr); 3997 re_string_translate_buffer (pstr);
@@ -4189,23 +4006,23 @@ extend_buffers (re_match_context_t *mctx, int min_len)
4189/* Initialize MCTX. */ 4006/* Initialize MCTX. */
4190 4007
4191static reg_errcode_t 4008static reg_errcode_t
4192internal_function __attribute_warn_unused_result__ 4009__attribute_warn_unused_result__
4193match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) 4010match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4194{ 4011{
4195 mctx->eflags = eflags; 4012 mctx->eflags = eflags;
4196 mctx->match_last = REG_MISSING; 4013 mctx->match_last = -1;
4197 if (n > 0) 4014 if (n > 0)
4198 { 4015 {
4199 /* Avoid overflow. */ 4016 /* Avoid overflow. */
4200 size_t max_object_size = 4017 size_t max_object_size =
4201 MAX (sizeof (struct re_backref_cache_entry), 4018 MAX (sizeof (struct re_backref_cache_entry),
4202 sizeof (re_sub_match_top_t *)); 4019 sizeof (re_sub_match_top_t *));
4203 if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0)) 4020 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
4204 return REG_ESPACE; 4021 return REG_ESPACE;
4205 4022
4206 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); 4023 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4207 mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); 4024 mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4208 if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) 4025 if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL))
4209 return REG_ESPACE; 4026 return REG_ESPACE;
4210 } 4027 }
4211 /* Already zero-ed by the caller. 4028 /* Already zero-ed by the caller.
@@ -4224,7 +4041,6 @@ match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4224 of the input, or changes the input string. */ 4041 of the input, or changes the input string. */
4225 4042
4226static void 4043static void
4227internal_function
4228match_ctx_clean (re_match_context_t *mctx) 4044match_ctx_clean (re_match_context_t *mctx)
4229{ 4045{
4230 Idx st_idx; 4046 Idx st_idx;
@@ -4244,7 +4060,7 @@ match_ctx_clean (re_match_context_t *mctx)
4244 re_free (top->path->array); 4060 re_free (top->path->array);
4245 re_free (top->path); 4061 re_free (top->path);
4246 } 4062 }
4247 free (top); 4063 re_free (top);
4248 } 4064 }
4249 4065
4250 mctx->nsub_tops = 0; 4066 mctx->nsub_tops = 0;
@@ -4254,7 +4070,6 @@ match_ctx_clean (re_match_context_t *mctx)
4254/* Free all the memory associated with MCTX. */ 4070/* Free all the memory associated with MCTX. */
4255 4071
4256static void 4072static void
4257internal_function
4258match_ctx_free (re_match_context_t *mctx) 4073match_ctx_free (re_match_context_t *mctx)
4259{ 4074{
4260 /* First, free all the memory associated with MCTX->SUB_TOPS. */ 4075 /* First, free all the memory associated with MCTX->SUB_TOPS. */
@@ -4269,7 +4084,7 @@ match_ctx_free (re_match_context_t *mctx)
4269*/ 4084*/
4270 4085
4271static reg_errcode_t 4086static reg_errcode_t
4272internal_function __attribute_warn_unused_result__ 4087__attribute_warn_unused_result__
4273match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from, 4088match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4274 Idx to) 4089 Idx to)
4275{ 4090{
@@ -4278,7 +4093,7 @@ match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4278 struct re_backref_cache_entry* new_entry; 4093 struct re_backref_cache_entry* new_entry;
4279 new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, 4094 new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4280 mctx->abkref_ents * 2); 4095 mctx->abkref_ents * 2);
4281 if (BE (new_entry == NULL, 0)) 4096 if (__glibc_unlikely (new_entry == NULL))
4282 { 4097 {
4283 re_free (mctx->bkref_ents); 4098 re_free (mctx->bkref_ents);
4284 return REG_ESPACE; 4099 return REG_ESPACE;
@@ -4314,11 +4129,10 @@ match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4314 return REG_NOERROR; 4129 return REG_NOERROR;
4315} 4130}
4316 4131
4317/* Return the first entry with the same str_idx, or REG_MISSING if none is 4132/* Return the first entry with the same str_idx, or -1 if none is
4318 found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ 4133 found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
4319 4134
4320static Idx 4135static Idx
4321internal_function
4322search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx) 4136search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4323{ 4137{
4324 Idx left, right, mid, last; 4138 Idx left, right, mid, last;
@@ -4334,33 +4148,31 @@ search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4334 if (left < last && mctx->bkref_ents[left].str_idx == str_idx) 4148 if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4335 return left; 4149 return left;
4336 else 4150 else
4337 return REG_MISSING; 4151 return -1;
4338} 4152}
4339 4153
4340/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches 4154/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4341 at STR_IDX. */ 4155 at STR_IDX. */
4342 4156
4343static reg_errcode_t 4157static reg_errcode_t
4344internal_function __attribute_warn_unused_result__ 4158__attribute_warn_unused_result__
4345match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) 4159match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4346{ 4160{
4347#ifdef DEBUG 4161 DEBUG_ASSERT (mctx->sub_tops != NULL);
4348 assert (mctx->sub_tops != NULL); 4162 DEBUG_ASSERT (mctx->asub_tops > 0);
4349 assert (mctx->asub_tops > 0); 4163 if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
4350#endif
4351 if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4352 { 4164 {
4353 Idx new_asub_tops = mctx->asub_tops * 2; 4165 Idx new_asub_tops = mctx->asub_tops * 2;
4354 re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, 4166 re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4355 re_sub_match_top_t *, 4167 re_sub_match_top_t *,
4356 new_asub_tops); 4168 new_asub_tops);
4357 if (BE (new_array == NULL, 0)) 4169 if (__glibc_unlikely (new_array == NULL))
4358 return REG_ESPACE; 4170 return REG_ESPACE;
4359 mctx->sub_tops = new_array; 4171 mctx->sub_tops = new_array;
4360 mctx->asub_tops = new_asub_tops; 4172 mctx->asub_tops = new_asub_tops;
4361 } 4173 }
4362 mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); 4174 mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4363 if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0)) 4175 if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
4364 return REG_ESPACE; 4176 return REG_ESPACE;
4365 mctx->sub_tops[mctx->nsub_tops]->node = node; 4177 mctx->sub_tops[mctx->nsub_tops]->node = node;
4366 mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx; 4178 mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
@@ -4368,26 +4180,26 @@ match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4368} 4180}
4369 4181
4370/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches 4182/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4371 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ 4183 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.
4184 Return the new entry if successful, NULL if memory is exhausted. */
4372 4185
4373static re_sub_match_last_t * 4186static re_sub_match_last_t *
4374internal_function
4375match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx) 4187match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4376{ 4188{
4377 re_sub_match_last_t *new_entry; 4189 re_sub_match_last_t *new_entry;
4378 if (BE (subtop->nlasts == subtop->alasts, 0)) 4190 if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
4379 { 4191 {
4380 Idx new_alasts = 2 * subtop->alasts + 1; 4192 Idx new_alasts = 2 * subtop->alasts + 1;
4381 re_sub_match_last_t **new_array = re_realloc (subtop->lasts, 4193 re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4382 re_sub_match_last_t *, 4194 re_sub_match_last_t *,
4383 new_alasts); 4195 new_alasts);
4384 if (BE (new_array == NULL, 0)) 4196 if (__glibc_unlikely (new_array == NULL))
4385 return NULL; 4197 return NULL;
4386 subtop->lasts = new_array; 4198 subtop->lasts = new_array;
4387 subtop->alasts = new_alasts; 4199 subtop->alasts = new_alasts;
4388 } 4200 }
4389 new_entry = calloc (1, sizeof (re_sub_match_last_t)); 4201 new_entry = calloc (1, sizeof (re_sub_match_last_t));
4390 if (BE (new_entry != NULL, 1)) 4202 if (__glibc_likely (new_entry != NULL))
4391 { 4203 {
4392 subtop->lasts[subtop->nlasts] = new_entry; 4204 subtop->lasts[subtop->nlasts] = new_entry;
4393 new_entry->node = node; 4205 new_entry->node = node;
@@ -4398,7 +4210,6 @@ match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4398} 4210}
4399 4211
4400static void 4212static void
4401internal_function
4402sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, 4213sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4403 re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx) 4214 re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4404{ 4215{