summaryrefslogtreecommitdiffstats
path: root/gl/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'gl/regexec.c')
-rw-r--r--gl/regexec.c214
1 files changed, 104 insertions, 110 deletions
diff --git a/gl/regexec.c b/gl/regexec.c
index dc449ce..d29d442 100644
--- a/gl/regexec.c
+++ b/gl/regexec.c
@@ -1,22 +1,21 @@
1/* Extended regular expression matching and search library. 1/* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free 2 Copyright (C) 2002-2013 Free Software Foundation, Inc.
3 Software Foundation, Inc.
4 This file is part of the GNU C Library. 3 This file is part of the GNU C Library.
5 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 5
7 This program is free software; you can redistribute it and/or modify 6 The GNU C Library is free software; you can redistribute it and/or
8 it under the terms of the GNU General Public License as published by 7 modify it under the terms of the GNU General Public
9 the Free Software Foundation; either version 3, or (at your option) 8 License as published by the Free Software Foundation; either
10 any later version. 9 version 3 of the License, or (at your option) any later version.
11 10
12 This program is distributed in the hope that it will be useful, 11 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 GNU General Public License for more details. 14 General Public License for more details.
16 15
17 You should have received a copy of the GNU General Public License along 16 You should have received a copy of the GNU General Public
18 with this program; if not, write to the Free Software Foundation, 17 License along with the GNU C Library; if not, see
19 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 <http://www.gnu.org/licenses/>. */
20 19
21static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, 20static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
22 Idx n) internal_function; 21 Idx n) internal_function;
@@ -52,9 +51,8 @@ static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
52 regoff_t range, Idx stop, 51 regoff_t range, Idx stop,
53 struct re_registers *regs, 52 struct re_registers *regs,
54 bool ret_len) internal_function; 53 bool ret_len) internal_function;
55static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, 54static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
56 Idx nregs, int regs_allocated) 55 Idx nregs, int regs_allocated) internal_function;
57 internal_function;
58static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) 56static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
59 internal_function; 57 internal_function;
60static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match, 58static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
@@ -201,7 +199,7 @@ static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
201static bool check_node_accept (const re_match_context_t *mctx, 199static bool check_node_accept (const re_match_context_t *mctx,
202 const re_token_t *node, Idx idx) 200 const re_token_t *node, Idx idx)
203 internal_function; 201 internal_function;
204static reg_errcode_t extend_buffers (re_match_context_t *mctx) 202static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
205 internal_function; 203 internal_function;
206 204
207/* Entry point for POSIX code. */ 205/* Entry point for POSIX code. */
@@ -210,11 +208,11 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx)
210 string STRING. 208 string STRING.
211 209
212 If NMATCH is zero or REG_NOSUB was set in the cflags argument to 210 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
213 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at 211 'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
214 least NMATCH elements, and we set them to the offsets of the 212 least NMATCH elements, and we set them to the offsets of the
215 corresponding matched substrings. 213 corresponding matched substrings.
216 214
217 EFLAGS specifies `execution flags' which affect matching: if 215 EFLAGS specifies "execution flags" which affect matching: if
218 REG_NOTBOL is set, then ^ does not match at the beginning of the 216 REG_NOTBOL is set, then ^ does not match at the beginning of the
219 string; if REG_NOTEOL is set, then $ does not match at the end. 217 string; if REG_NOTEOL is set, then $ does not match at the end.
220 218
@@ -230,9 +228,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
230{ 228{
231 reg_errcode_t err; 229 reg_errcode_t err;
232 Idx start, length; 230 Idx start, length;
233#ifdef _LIBC 231 re_dfa_t *dfa = preg->buffer;
234 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
235#endif
236 232
237 if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) 233 if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
238 return REG_BADPAT; 234 return REG_BADPAT;
@@ -248,14 +244,14 @@ regexec (preg, string, nmatch, pmatch, eflags)
248 length = strlen (string); 244 length = strlen (string);
249 } 245 }
250 246
251 __libc_lock_lock (dfa->lock); 247 lock_lock (dfa->lock);
252 if (preg->no_sub) 248 if (preg->no_sub)
253 err = re_search_internal (preg, string, length, start, length, 249 err = re_search_internal (preg, string, length, start, length,
254 length, 0, NULL, eflags); 250 length, 0, NULL, eflags);
255 else 251 else
256 err = re_search_internal (preg, string, length, start, length, 252 err = re_search_internal (preg, string, length, start, length,
257 length, nmatch, pmatch, eflags); 253 length, nmatch, pmatch, eflags);
258 __libc_lock_unlock (dfa->lock); 254 lock_unlock (dfa->lock);
259 return err != REG_NOERROR; 255 return err != REG_NOERROR;
260} 256}
261 257
@@ -366,7 +362,6 @@ weak_alias (__re_search_2, re_search_2)
366#endif 362#endif
367 363
368static regoff_t 364static regoff_t
369internal_function
370re_search_2_stub (struct re_pattern_buffer *bufp, 365re_search_2_stub (struct re_pattern_buffer *bufp,
371 const char *string1, Idx length1, 366 const char *string1, Idx length1,
372 const char *string2, Idx length2, 367 const char *string2, Idx length2,
@@ -414,7 +409,6 @@ re_search_2_stub (struct re_pattern_buffer *bufp,
414 otherwise the position of the match is returned. */ 409 otherwise the position of the match is returned. */
415 410
416static regoff_t 411static regoff_t
417internal_function
418re_search_stub (struct re_pattern_buffer *bufp, 412re_search_stub (struct re_pattern_buffer *bufp,
419 const char *string, Idx length, 413 const char *string, Idx length,
420 Idx start, regoff_t range, Idx stop, struct re_registers *regs, 414 Idx start, regoff_t range, Idx stop, struct re_registers *regs,
@@ -425,9 +419,7 @@ re_search_stub (struct re_pattern_buffer *bufp,
425 Idx nregs; 419 Idx nregs;
426 regoff_t rval; 420 regoff_t rval;
427 int eflags = 0; 421 int eflags = 0;
428#ifdef _LIBC 422 re_dfa_t *dfa = bufp->buffer;
429 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
430#endif
431 Idx last_start = start + range; 423 Idx last_start = start + range;
432 424
433 /* Check for out-of-range. */ 425 /* Check for out-of-range. */
@@ -438,7 +430,7 @@ re_search_stub (struct re_pattern_buffer *bufp,
438 else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0)) 430 else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0))
439 last_start = 0; 431 last_start = 0;
440 432
441 __libc_lock_lock (dfa->lock); 433 lock_lock (dfa->lock);
442 434
443 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; 435 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
444 eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; 436 eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
@@ -478,9 +470,9 @@ re_search_stub (struct re_pattern_buffer *bufp,
478 470
479 rval = 0; 471 rval = 0;
480 472
481 /* I hope we needn't fill ther regs with -1's when no match was found. */ 473 /* I hope we needn't fill their regs with -1's when no match was found. */
482 if (result != REG_NOERROR) 474 if (result != REG_NOERROR)
483 rval = -1; 475 rval = result == REG_NOMATCH ? -1 : -2;
484 else if (regs != NULL) 476 else if (regs != NULL)
485 { 477 {
486 /* If caller wants register contents data back, copy them. */ 478 /* If caller wants register contents data back, copy them. */
@@ -502,19 +494,18 @@ re_search_stub (struct re_pattern_buffer *bufp,
502 } 494 }
503 re_free (pmatch); 495 re_free (pmatch);
504 out: 496 out:
505 __libc_lock_unlock (dfa->lock); 497 lock_unlock (dfa->lock);
506 return rval; 498 return rval;
507} 499}
508 500
509static unsigned int 501static unsigned
510internal_function
511re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, 502re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
512 int regs_allocated) 503 int regs_allocated)
513{ 504{
514 int rval = REGS_REALLOCATE; 505 int rval = REGS_REALLOCATE;
515 Idx i; 506 Idx i;
516 Idx need_regs = nregs + 1; 507 Idx need_regs = nregs + 1;
517 /* We need one extra element beyond `num_regs' for the `-1' marker GNU code 508 /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
518 uses. */ 509 uses. */
519 510
520 /* Have the register data arrays been allocated? */ 511 /* Have the register data arrays been allocated? */
@@ -637,7 +628,7 @@ re_exec (s)
637 (0 <= LAST_START && LAST_START <= LENGTH) */ 628 (0 <= LAST_START && LAST_START <= LENGTH) */
638 629
639static reg_errcode_t 630static reg_errcode_t
640internal_function __attribute_warn_unused_result__ 631__attribute_warn_unused_result__
641re_search_internal (const regex_t *preg, 632re_search_internal (const regex_t *preg,
642 const char *string, Idx length, 633 const char *string, Idx length,
643 Idx start, Idx last_start, Idx stop, 634 Idx start, Idx last_start, Idx stop,
@@ -645,7 +636,7 @@ re_search_internal (const regex_t *preg,
645 int eflags) 636 int eflags)
646{ 637{
647 reg_errcode_t err; 638 reg_errcode_t err;
648 const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; 639 const re_dfa_t *dfa = preg->buffer;
649 Idx left_lim, right_lim; 640 Idx left_lim, right_lim;
650 int incr; 641 int incr;
651 bool fl_longest_match; 642 bool fl_longest_match;
@@ -720,7 +711,8 @@ re_search_internal (const regex_t *preg,
720 if (nmatch > 1 || dfa->has_mb_node) 711 if (nmatch > 1 || dfa->has_mb_node)
721 { 712 {
722 /* Avoid overflow. */ 713 /* Avoid overflow. */
723 if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0)) 714 if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
715 <= mctx.input.bufs_len), 0))
724 { 716 {
725 err = REG_ESPACE; 717 err = REG_ESPACE;
726 goto free_return; 718 goto free_return;
@@ -740,7 +732,7 @@ re_search_internal (const regex_t *preg,
740 mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF 732 mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
741 : CONTEXT_NEWLINE | CONTEXT_BEGBUF; 733 : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
742 734
743 /* Check incrementally whether of not the input string match. */ 735 /* Check incrementally whether the input string matches. */
744 incr = (last_start < start) ? -1 : 1; 736 incr = (last_start < start) ? -1 : 1;
745 left_lim = (last_start < start) ? last_start : start; 737 left_lim = (last_start < start) ? last_start : start;
746 right_lim = (last_start < start) ? start : last_start; 738 right_lim = (last_start < start) ? start : last_start;
@@ -922,7 +914,7 @@ re_search_internal (const regex_t *preg,
922 goto free_return; 914 goto free_return;
923 } 915 }
924 916
925 /* At last, add the offset to the each registers, since we slided 917 /* At last, add the offset to each register, since we slid
926 the buffers so that we could assume that the matching starts 918 the buffers so that we could assume that the matching starts
927 from 0. */ 919 from 0. */
928 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) 920 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
@@ -972,7 +964,7 @@ re_search_internal (const regex_t *preg,
972} 964}
973 965
974static reg_errcode_t 966static reg_errcode_t
975internal_function __attribute_warn_unused_result__ 967__attribute_warn_unused_result__
976prune_impossible_nodes (re_match_context_t *mctx) 968prune_impossible_nodes (re_match_context_t *mctx)
977{ 969{
978 const re_dfa_t *const dfa = mctx->dfa; 970 const re_dfa_t *const dfa = mctx->dfa;
@@ -988,7 +980,7 @@ prune_impossible_nodes (re_match_context_t *mctx)
988 halt_node = mctx->last_node; 980 halt_node = mctx->last_node;
989 981
990 /* Avoid overflow. */ 982 /* Avoid overflow. */
991 if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0)) 983 if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0))
992 return REG_ESPACE; 984 return REG_ESPACE;
993 985
994 sifted_states = re_malloc (re_dfastate_t *, match_last + 1); 986 sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
@@ -1068,7 +1060,7 @@ prune_impossible_nodes (re_match_context_t *mctx)
1068 since initial states may have constraints like "\<", "^", etc.. */ 1060 since initial states may have constraints like "\<", "^", etc.. */
1069 1061
1070static inline re_dfastate_t * 1062static inline re_dfastate_t *
1071__attribute ((always_inline)) internal_function 1063__attribute__ ((always_inline)) internal_function
1072acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, 1064acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1073 Idx idx) 1065 Idx idx)
1074{ 1066{
@@ -1106,7 +1098,7 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1106 FL_LONGEST_MATCH means we want the POSIX longest matching. 1098 FL_LONGEST_MATCH means we want the POSIX longest matching.
1107 If P_MATCH_FIRST is not NULL, and the match fails, it is set to the 1099 If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1108 next place where we may want to try matching. 1100 next place where we may want to try matching.
1109 Note that the matcher assume that the maching starts from the current 1101 Note that the matcher assumes that the matching starts from the current
1110 index of the buffer. */ 1102 index of the buffer. */
1111 1103
1112static Idx 1104static Idx
@@ -1175,11 +1167,12 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
1175 re_dfastate_t *old_state = cur_state; 1167 re_dfastate_t *old_state = cur_state;
1176 Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1; 1168 Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1177 1169
1178 if (BE (next_char_idx >= mctx->input.bufs_len, 0) 1170 if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
1171 && mctx->input.bufs_len < mctx->input.len)
1179 || (BE (next_char_idx >= mctx->input.valid_len, 0) 1172 || (BE (next_char_idx >= mctx->input.valid_len, 0)
1180 && mctx->input.valid_len < mctx->input.len)) 1173 && mctx->input.valid_len < mctx->input.len))
1181 { 1174 {
1182 err = extend_buffers (mctx); 1175 err = extend_buffers (mctx, next_char_idx + 1);
1183 if (BE (err != REG_NOERROR, 0)) 1176 if (BE (err != REG_NOERROR, 0))
1184 { 1177 {
1185 assert (err == REG_ESPACE); 1178 assert (err == REG_ESPACE);
@@ -1436,7 +1429,7 @@ internal_function __attribute_warn_unused_result__
1436set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, 1429set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1437 regmatch_t *pmatch, bool fl_backtrack) 1430 regmatch_t *pmatch, bool fl_backtrack)
1438{ 1431{
1439 const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; 1432 const re_dfa_t *dfa = preg->buffer;
1440 Idx idx, cur_node; 1433 Idx idx, cur_node;
1441 re_node_set eps_via_nodes; 1434 re_node_set eps_via_nodes;
1442 struct re_fail_stack_t *fs; 1435 struct re_fail_stack_t *fs;
@@ -1608,21 +1601,21 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1608 and sift the nodes in each states according to the following rules. 1601 and sift the nodes in each states according to the following rules.
1609 Updated state_log will be wrote to STATE_LOG. 1602 Updated state_log will be wrote to STATE_LOG.
1610 1603
1611 Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... 1604 Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1612 1. When STR_IDX == MATCH_LAST(the last index in the state_log): 1605 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1613 If `a' isn't the LAST_NODE and `a' can't epsilon transit to 1606 If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1614 the LAST_NODE, we throw away the node `a'. 1607 the LAST_NODE, we throw away the node 'a'.
1615 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts 1608 2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1616 string `s' and transit to `b': 1609 string 's' and transit to 'b':
1617 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw 1610 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1618 away the node `a'. 1611 away the node 'a'.
1619 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is 1612 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1620 thrown away, we throw away the node `a'. 1613 thrown away, we throw away the node 'a'.
1621 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': 1614 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1622 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the 1615 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1623 node `a'. 1616 node 'a'.
1624 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, 1617 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1625 we throw away the node `a'. */ 1618 we throw away the node 'a'. */
1626 1619
1627#define STATE_NODE_CONTAINS(state,node) \ 1620#define STATE_NODE_CONTAINS(state,node) \
1628 ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) 1621 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
@@ -1695,11 +1688,11 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1695 Idx i; 1688 Idx i;
1696 1689
1697 /* Then build the next sifted state. 1690 /* Then build the next sifted state.
1698 We build the next sifted state on `cur_dest', and update 1691 We build the next sifted state on 'cur_dest', and update
1699 `sifted_states[str_idx]' with `cur_dest'. 1692 'sifted_states[str_idx]' with 'cur_dest'.
1700 Note: 1693 Note:
1701 `cur_dest' is the sifted state from `state_log[str_idx + 1]'. 1694 'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1702 `cur_src' points the node_set of the old `state_log[str_idx]' 1695 'cur_src' points the node_set of the old 'state_log[str_idx]'
1703 (with the epsilon nodes pre-filtered out). */ 1696 (with the epsilon nodes pre-filtered out). */
1704 for (i = 0; i < cur_src->nelem; i++) 1697 for (i = 0; i < cur_src->nelem; i++)
1705 { 1698 {
@@ -1712,7 +1705,7 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1712 assert (!IS_EPSILON_NODE (type)); 1705 assert (!IS_EPSILON_NODE (type));
1713#endif 1706#endif
1714#ifdef RE_ENABLE_I18N 1707#ifdef RE_ENABLE_I18N
1715 /* If the node may accept `multi byte'. */ 1708 /* If the node may accept "multi byte". */
1716 if (dfa->nodes[prev_node].accept_mb) 1709 if (dfa->nodes[prev_node].accept_mb)
1717 naccepted = sift_states_iter_mb (mctx, sctx, prev_node, 1710 naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1718 str_idx, sctx->last_str_idx); 1711 str_idx, sctx->last_str_idx);
@@ -1753,12 +1746,13 @@ clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1753{ 1746{
1754 Idx top = mctx->state_log_top; 1747 Idx top = mctx->state_log_top;
1755 1748
1756 if (next_state_log_idx >= mctx->input.bufs_len 1749 if ((next_state_log_idx >= mctx->input.bufs_len
1750 && mctx->input.bufs_len < mctx->input.len)
1757 || (next_state_log_idx >= mctx->input.valid_len 1751 || (next_state_log_idx >= mctx->input.valid_len
1758 && mctx->input.valid_len < mctx->input.len)) 1752 && mctx->input.valid_len < mctx->input.len))
1759 { 1753 {
1760 reg_errcode_t err; 1754 reg_errcode_t err;
1761 err = extend_buffers (mctx); 1755 err = extend_buffers (mctx, next_state_log_idx + 1);
1762 if (BE (err != REG_NOERROR, 0)) 1756 if (BE (err != REG_NOERROR, 0))
1763 return err; 1757 return err;
1764 } 1758 }
@@ -2268,17 +2262,17 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2268{ 2262{
2269 const re_dfa_t *const dfa = mctx->dfa; 2263 const re_dfa_t *const dfa = mctx->dfa;
2270 int naccepted; 2264 int naccepted;
2271 /* Check the node can accept `multi byte'. */ 2265 /* Check the node can accept "multi byte". */
2272 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); 2266 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2273 if (naccepted > 0 && str_idx + naccepted <= max_str_idx && 2267 if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2274 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], 2268 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2275 dfa->nexts[node_idx])) 2269 dfa->nexts[node_idx]))
2276 /* The node can't accept the `multi byte', or the 2270 /* The node can't accept the "multi byte", or the
2277 destination was already thrown away, then the node 2271 destination was already thrown away, then the node
2278 could't accept the current input `multi byte'. */ 2272 could't accept the current input "multi byte". */
2279 naccepted = 0; 2273 naccepted = 0;
2280 /* Otherwise, it is sure that the node could accept 2274 /* Otherwise, it is sure that the node could accept
2281 `naccepted' bytes input. */ 2275 'naccepted' bytes input. */
2282 return naccepted; 2276 return naccepted;
2283} 2277}
2284#endif /* RE_ENABLE_I18N */ 2278#endif /* RE_ENABLE_I18N */
@@ -2457,7 +2451,7 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2457/* From the node set CUR_NODES, pick up the nodes whose types are 2451/* From the node set CUR_NODES, pick up the nodes whose types are
2458 OP_OPEN_SUBEXP and which have corresponding back references in the regular 2452 OP_OPEN_SUBEXP and which have corresponding back references in the regular
2459 expression. And register them to use them later for evaluating the 2453 expression. And register them to use them later for evaluating the
2460 correspoding back references. */ 2454 corresponding back references. */
2461 2455
2462static reg_errcode_t 2456static reg_errcode_t
2463internal_function 2457internal_function
@@ -2568,7 +2562,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2568 if (naccepted == 0) 2562 if (naccepted == 0)
2569 continue; 2563 continue;
2570 2564
2571 /* The node can accepts `naccepted' bytes. */ 2565 /* The node can accepts 'naccepted' bytes. */
2572 dest_idx = re_string_cur_idx (&mctx->input) + naccepted; 2566 dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2573 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted 2567 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2574 : mctx->max_mb_elem_len); 2568 : mctx->max_mb_elem_len);
@@ -2620,7 +2614,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2620 const re_token_t *node = dfa->nodes + node_idx; 2614 const re_token_t *node = dfa->nodes + node_idx;
2621 re_node_set *new_dest_nodes; 2615 re_node_set *new_dest_nodes;
2622 2616
2623 /* Check whether `node' is a backreference or not. */ 2617 /* Check whether 'node' is a backreference or not. */
2624 if (node->type != OP_BACK_REF) 2618 if (node->type != OP_BACK_REF)
2625 continue; 2619 continue;
2626 2620
@@ -2632,14 +2626,14 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2632 continue; 2626 continue;
2633 } 2627 }
2634 2628
2635 /* `node' is a backreference. 2629 /* 'node' is a backreference.
2636 Check the substring which the substring matched. */ 2630 Check the substring which the substring matched. */
2637 bkc_idx = mctx->nbkref_ents; 2631 bkc_idx = mctx->nbkref_ents;
2638 err = get_subexp (mctx, node_idx, cur_str_idx); 2632 err = get_subexp (mctx, node_idx, cur_str_idx);
2639 if (BE (err != REG_NOERROR, 0)) 2633 if (BE (err != REG_NOERROR, 0))
2640 goto free_return; 2634 goto free_return;
2641 2635
2642 /* And add the epsilon closures (which is `new_dest_nodes') of 2636 /* And add the epsilon closures (which is 'new_dest_nodes') of
2643 the backreference to appropriate state_log. */ 2637 the backreference to appropriate state_log. */
2644#ifdef DEBUG 2638#ifdef DEBUG
2645 assert (dfa->nexts[node_idx] != REG_MISSING); 2639 assert (dfa->nexts[node_idx] != REG_MISSING);
@@ -2663,7 +2657,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2663 dest_state = mctx->state_log[dest_str_idx]; 2657 dest_state = mctx->state_log[dest_str_idx];
2664 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 2658 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2665 : mctx->state_log[cur_str_idx]->nodes.nelem); 2659 : mctx->state_log[cur_str_idx]->nodes.nelem);
2666 /* Add `new_dest_node' to state_log. */ 2660 /* Add 'new_dest_node' to state_log. */
2667 if (dest_state == NULL) 2661 if (dest_state == NULL)
2668 { 2662 {
2669 mctx->state_log[dest_str_idx] 2663 mctx->state_log[dest_str_idx]
@@ -2815,7 +2809,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2815 if (bkref_str_off >= mctx->input.len) 2809 if (bkref_str_off >= mctx->input.len)
2816 break; 2810 break;
2817 2811
2818 err = extend_buffers (mctx); 2812 err = extend_buffers (mctx, bkref_str_off + 1);
2819 if (BE (err != REG_NOERROR, 0)) 2813 if (BE (err != REG_NOERROR, 0))
2820 return err; 2814 return err;
2821 2815
@@ -2937,9 +2931,12 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2937 { 2931 {
2938 re_dfastate_t **new_array; 2932 re_dfastate_t **new_array;
2939 Idx old_alloc = path->alloc; 2933 Idx old_alloc = path->alloc;
2940 Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1; 2934 Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2941 if (BE (new_alloc < old_alloc, 0) 2935 Idx new_alloc;
2942 || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0)) 2936 if (BE (IDX_MAX - old_alloc < incr_alloc, 0))
2937 return REG_ESPACE;
2938 new_alloc = old_alloc + incr_alloc;
2939 if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
2943 return REG_ESPACE; 2940 return REG_ESPACE;
2944 new_array = re_realloc (path->array, re_dfastate_t *, new_alloc); 2941 new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2945 if (BE (new_array == NULL, 0)) 2942 if (BE (new_array == NULL, 0))
@@ -3102,7 +3099,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3102 assert (!IS_EPSILON_NODE (type)); 3099 assert (!IS_EPSILON_NODE (type));
3103#endif 3100#endif
3104#ifdef RE_ENABLE_I18N 3101#ifdef RE_ENABLE_I18N
3105 /* If the node may accept `multi byte'. */ 3102 /* If the node may accept "multi byte". */
3106 if (dfa->nodes[cur_node].accept_mb) 3103 if (dfa->nodes[cur_node].accept_mb)
3107 { 3104 {
3108 naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, 3105 naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
@@ -3359,7 +3356,7 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3359 bitset_word_t elem, mask; 3356 bitset_word_t elem, mask;
3360 bool dests_node_malloced = false; 3357 bool dests_node_malloced = false;
3361 bool dest_states_malloced = false; 3358 bool dest_states_malloced = false;
3362 Idx ndests; /* Number of the destination states from `state'. */ 3359 Idx ndests; /* Number of the destination states from 'state'. */
3363 re_dfastate_t **trtable; 3360 re_dfastate_t **trtable;
3364 re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; 3361 re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3365 re_node_set follows, *dests_node; 3362 re_node_set follows, *dests_node;
@@ -3373,8 +3370,8 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3373 } *dests_alloc; 3370 } *dests_alloc;
3374 3371
3375 /* We build DFA states which corresponds to the destination nodes 3372 /* We build DFA states which corresponds to the destination nodes
3376 from `state'. `dests_node[i]' represents the nodes which i-th 3373 from 'state'. 'dests_node[i]' represents the nodes which i-th
3377 destination state contains, and `dests_ch[i]' represents the 3374 destination state contains, and 'dests_ch[i]' represents the
3378 characters which i-th destination state accepts. */ 3375 characters which i-th destination state accepts. */
3379 if (__libc_use_alloca (sizeof (struct dests_alloc))) 3376 if (__libc_use_alloca (sizeof (struct dests_alloc)))
3380 dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); 3377 dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
@@ -3388,20 +3385,23 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3388 dests_node = dests_alloc->dests_node; 3385 dests_node = dests_alloc->dests_node;
3389 dests_ch = dests_alloc->dests_ch; 3386 dests_ch = dests_alloc->dests_ch;
3390 3387
3391 /* Initialize transiton table. */ 3388 /* Initialize transition table. */
3392 state->word_trtable = state->trtable = NULL; 3389 state->word_trtable = state->trtable = NULL;
3393 3390
3394 /* At first, group all nodes belonging to `state' into several 3391 /* At first, group all nodes belonging to 'state' into several
3395 destinations. */ 3392 destinations. */
3396 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); 3393 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3397 if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0)) 3394 if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3398 { 3395 {
3399 if (dests_node_malloced) 3396 if (dests_node_malloced)
3400 free (dests_alloc); 3397 free (dests_alloc);
3398 /* Return false in case of an error, true otherwise. */
3401 if (ndests == 0) 3399 if (ndests == 0)
3402 { 3400 {
3403 state->trtable = (re_dfastate_t **) 3401 state->trtable = (re_dfastate_t **)
3404 calloc (sizeof (re_dfastate_t *), SBC_MAX); 3402 calloc (sizeof (re_dfastate_t *), SBC_MAX);
3403 if (BE (state->trtable == NULL, 0))
3404 return false;
3405 return true; 3405 return true;
3406 } 3406 }
3407 return false; 3407 return false;
@@ -3591,13 +3591,13 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3591 reg_errcode_t err; 3591 reg_errcode_t err;
3592 bool ok; 3592 bool ok;
3593 Idx i, j, k; 3593 Idx i, j, k;
3594 Idx ndests; /* Number of the destinations from `state'. */ 3594 Idx ndests; /* Number of the destinations from 'state'. */
3595 bitset_t accepts; /* Characters a node can accept. */ 3595 bitset_t accepts; /* Characters a node can accept. */
3596 const re_node_set *cur_nodes = &state->nodes; 3596 const re_node_set *cur_nodes = &state->nodes;
3597 bitset_empty (accepts); 3597 bitset_empty (accepts);
3598 ndests = 0; 3598 ndests = 0;
3599 3599
3600 /* For all the nodes belonging to `state', */ 3600 /* For all the nodes belonging to 'state', */
3601 for (i = 0; i < cur_nodes->nelem; ++i) 3601 for (i = 0; i < cur_nodes->nelem; ++i)
3602 { 3602 {
3603 re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; 3603 re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
@@ -3640,7 +3640,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3640 else 3640 else
3641 continue; 3641 continue;
3642 3642
3643 /* Check the `accepts' and sift the characters which are not 3643 /* Check the 'accepts' and sift the characters which are not
3644 match it the context. */ 3644 match it the context. */
3645 if (constraint) 3645 if (constraint)
3646 { 3646 {
@@ -3699,7 +3699,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3699 } 3699 }
3700 } 3700 }
3701 3701
3702 /* Then divide `accepts' into DFA states, or create a new 3702 /* Then divide 'accepts' into DFA states, or create a new
3703 state. Above, we make sure that accepts is not empty. */ 3703 state. Above, we make sure that accepts is not empty. */
3704 for (j = 0; j < ndests; ++j) 3704 for (j = 0; j < ndests; ++j)
3705 { 3705 {
@@ -3712,7 +3712,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3712 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) 3712 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3713 continue; 3713 continue;
3714 3714
3715 /* Enumerate the intersection set of this state and `accepts'. */ 3715 /* Enumerate the intersection set of this state and 'accepts'. */
3716 has_intersec = 0; 3716 has_intersec = 0;
3717 for (k = 0; k < BITSET_WORDS; ++k) 3717 for (k = 0; k < BITSET_WORDS; ++k)
3718 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; 3718 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
@@ -3720,7 +3720,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3720 if (!has_intersec) 3720 if (!has_intersec)
3721 continue; 3721 continue;
3722 3722
3723 /* Then check if this state is a subset of `accepts'. */ 3723 /* Then check if this state is a subset of 'accepts'. */
3724 not_subset = not_consumed = 0; 3724 not_subset = not_consumed = 0;
3725 for (k = 0; k < BITSET_WORDS; ++k) 3725 for (k = 0; k < BITSET_WORDS; ++k)
3726 { 3726 {
@@ -3728,8 +3728,8 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3728 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; 3728 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3729 } 3729 }
3730 3730
3731 /* If this state isn't a subset of `accepts', create a 3731 /* If this state isn't a subset of 'accepts', create a
3732 new group state, which has the `remains'. */ 3732 new group state, which has the 'remains'. */
3733 if (not_subset) 3733 if (not_subset)
3734 { 3734 {
3735 bitset_copy (dests_ch[ndests], remains); 3735 bitset_copy (dests_ch[ndests], remains);
@@ -3768,7 +3768,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3768} 3768}
3769 3769
3770#ifdef RE_ENABLE_I18N 3770#ifdef RE_ENABLE_I18N
3771/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. 3771/* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
3772 Return the number of the bytes the node accepts. 3772 Return the number of the bytes the node accepts.
3773 STR_IDX is the current index of the input string. 3773 STR_IDX is the current index of the input string.
3774 3774
@@ -3895,7 +3895,6 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3895 const int32_t *table, *indirect; 3895 const int32_t *table, *indirect;
3896 const unsigned char *weights, *extra; 3896 const unsigned char *weights, *extra;
3897 const char *collseqwc; 3897 const char *collseqwc;
3898 int32_t idx;
3899 /* This #include defines a local function! */ 3898 /* This #include defines a local function! */
3900# include <locale/weight.h> 3899# include <locale/weight.h>
3901 3900
@@ -3933,6 +3932,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3933 in_collseq = find_collation_sequence_value (pin, elem_len); 3932 in_collseq = find_collation_sequence_value (pin, elem_len);
3934 } 3933 }
3935 /* match with range expression? */ 3934 /* match with range expression? */
3935 /* FIXME: Implement rational ranges here, too. */
3936 for (i = 0; i < cset->nranges; ++i) 3936 for (i = 0; i < cset->nranges; ++i)
3937 if (cset->range_starts[i] <= in_collseq 3937 if (cset->range_starts[i] <= in_collseq
3938 && in_collseq <= cset->range_ends[i]) 3938 && in_collseq <= cset->range_ends[i])
@@ -3953,7 +3953,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3953 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); 3953 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3954 indirect = (const int32_t *) 3954 indirect = (const int32_t *)
3955 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); 3955 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3956 int32_t idx = findidx (&cp); 3956 int32_t idx = findidx (&cp, elem_len);
3957 if (idx > 0) 3957 if (idx > 0)
3958 for (i = 0; i < cset->nequiv_classes; ++i) 3958 for (i = 0; i < cset->nequiv_classes; ++i)
3959 { 3959 {
@@ -3984,18 +3984,9 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3984# endif /* _LIBC */ 3984# endif /* _LIBC */
3985 { 3985 {
3986 /* match with range expression? */ 3986 /* match with range expression? */
3987#if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__)
3988 wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3989#else
3990 wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3991 cmp_buf[2] = wc;
3992#endif
3993 for (i = 0; i < cset->nranges; ++i) 3987 for (i = 0; i < cset->nranges; ++i)
3994 { 3988 {
3995 cmp_buf[0] = cset->range_starts[i]; 3989 if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
3996 cmp_buf[4] = cset->range_ends[i];
3997 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3998 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3999 { 3990 {
4000 match_len = char_len; 3991 match_len = char_len;
4001 goto check_node_accept_bytes_match; 3992 goto check_node_accept_bytes_match;
@@ -4065,7 +4056,7 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
4065 /* Skip the collation sequence value. */ 4056 /* Skip the collation sequence value. */
4066 idx += sizeof (uint32_t); 4057 idx += sizeof (uint32_t);
4067 /* Skip the wide char sequence of the collating element. */ 4058 /* Skip the wide char sequence of the collating element. */
4068 idx = idx + sizeof (uint32_t) * (extra[idx] + 1); 4059 idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
4069 /* If we found the entry, return the sequence value. */ 4060 /* If we found the entry, return the sequence value. */
4070 if (found) 4061 if (found)
4071 return *(uint32_t *) (extra + idx); 4062 return *(uint32_t *) (extra + idx);
@@ -4133,17 +4124,20 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4133 4124
4134static reg_errcode_t 4125static reg_errcode_t
4135internal_function __attribute_warn_unused_result__ 4126internal_function __attribute_warn_unused_result__
4136extend_buffers (re_match_context_t *mctx) 4127extend_buffers (re_match_context_t *mctx, int min_len)
4137{ 4128{
4138 reg_errcode_t ret; 4129 reg_errcode_t ret;
4139 re_string_t *pstr = &mctx->input; 4130 re_string_t *pstr = &mctx->input;
4140 4131
4141 /* Avoid overflow. */ 4132 /* Avoid overflow. */
4142 if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0)) 4133 if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
4134 <= pstr->bufs_len, 0))
4143 return REG_ESPACE; 4135 return REG_ESPACE;
4144 4136
4145 /* Double the lengthes of the buffers. */ 4137 /* Double the lengths of the buffers, but allocate at least MIN_LEN. */
4146 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); 4138 ret = re_string_realloc_buffers (pstr,
4139 MAX (min_len,
4140 MIN (pstr->len, pstr->bufs_len * 2)));
4147 if (BE (ret != REG_NOERROR, 0)) 4141 if (BE (ret != REG_NOERROR, 0))
4148 return ret; 4142 return ret;
4149 4143
@@ -4206,7 +4200,7 @@ match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4206 size_t max_object_size = 4200 size_t max_object_size =
4207 MAX (sizeof (struct re_backref_cache_entry), 4201 MAX (sizeof (struct re_backref_cache_entry),
4208 sizeof (re_sub_match_top_t *)); 4202 sizeof (re_sub_match_top_t *));
4209 if (BE (SIZE_MAX / max_object_size < n, 0)) 4203 if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0))
4210 return REG_ESPACE; 4204 return REG_ESPACE;
4211 4205
4212 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); 4206 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);