summaryrefslogtreecommitdiffstats
path: root/gl/regex_internal.c
diff options
context:
space:
mode:
Diffstat (limited to 'gl/regex_internal.c')
-rw-r--r--gl/regex_internal.c99
1 files changed, 49 insertions, 50 deletions
diff --git a/gl/regex_internal.c b/gl/regex_internal.c
index 98b8d5d..899b0ae 100644
--- a/gl/regex_internal.c
+++ b/gl/regex_internal.c
@@ -1,22 +1,21 @@
1/* Extended regular expression matching and search library. 1/* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free 2 Copyright (C) 2002-2013 Free Software Foundation, Inc.
3 Software Foundation, Inc.
4 This file is part of the GNU C Library. 3 This file is part of the GNU C Library.
5 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 5
7 This program is free software; you can redistribute it and/or modify 6 The GNU C Library is free software; you can redistribute it and/or
8 it under the terms of the GNU General Public License as published by 7 modify it under the terms of the GNU General Public
9 the Free Software Foundation; either version 3, or (at your option) 8 License as published by the Free Software Foundation; either
10 any later version. 9 version 3 of the License, or (at your option) any later version.
11 10
12 This program is distributed in the hope that it will be useful, 11 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 GNU General Public License for more details. 14 General Public License for more details.
16 15
17 You should have received a copy of the GNU General Public License along 16 You should have received a copy of the GNU General Public
18 with this program; if not, write to the Free Software Foundation, 17 License along with the GNU C Library; if not, see
19 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 <http://www.gnu.org/licenses/>. */
20 19
21static void re_string_construct_common (const char *str, Idx len, 20static void re_string_construct_common (const char *str, Idx len,
22 re_string_t *pstr, 21 re_string_t *pstr,
@@ -135,9 +134,9 @@ re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
135 { 134 {
136 wint_t *new_wcs; 135 wint_t *new_wcs;
137 136
138 /* Avoid overflow. */ 137 /* Avoid overflow in realloc. */
139 size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx)); 138 const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
140 if (BE (SIZE_MAX / max_object_size < new_buf_len, 0)) 139 if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_buf_len, 0))
141 return REG_ESPACE; 140 return REG_ESPACE;
142 141
143 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); 142 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
@@ -237,13 +236,8 @@ build_wcs_buffer (re_string_t *pstr)
237 else 236 else
238 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; 237 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
239 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); 238 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
240 if (BE (mbclen == (size_t) -2, 0)) 239 if (BE (mbclen == (size_t) -1 || mbclen == 0
241 { 240 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
242 /* The buffer doesn't have enough space, finish to build. */
243 pstr->cur_state = prev_st;
244 break;
245 }
246 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
247 { 241 {
248 /* We treat these cases as a singlebyte character. */ 242 /* We treat these cases as a singlebyte character. */
249 mbclen = 1; 243 mbclen = 1;
@@ -252,6 +246,12 @@ build_wcs_buffer (re_string_t *pstr)
252 wc = pstr->trans[wc]; 246 wc = pstr->trans[wc];
253 pstr->cur_state = prev_st; 247 pstr->cur_state = prev_st;
254 } 248 }
249 else if (BE (mbclen == (size_t) -2, 0))
250 {
251 /* The buffer doesn't have enough space, finish to build. */
252 pstr->cur_state = prev_st;
253 break;
254 }
255 255
256 /* Write wide character and padding. */ 256 /* Write wide character and padding. */
257 pstr->wcs[byte_idx++] = wc; 257 pstr->wcs[byte_idx++] = wc;
@@ -334,9 +334,11 @@ build_wcs_upper_buffer (re_string_t *pstr)
334 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) 334 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
335 pstr->wcs[byte_idx++] = WEOF; 335 pstr->wcs[byte_idx++] = WEOF;
336 } 336 }
337 else if (mbclen == (size_t) -1 || mbclen == 0) 337 else if (mbclen == (size_t) -1 || mbclen == 0
338 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
338 { 339 {
339 /* It is an invalid character or '\0'. Just use the byte. */ 340 /* It is an invalid character, an incomplete character
341 at the end of the string, or '\0'. Just use the byte. */
340 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; 342 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
341 pstr->mbs[byte_idx] = ch; 343 pstr->mbs[byte_idx] = ch;
342 /* And also cast it to wide char. */ 344 /* And also cast it to wide char. */
@@ -449,7 +451,8 @@ build_wcs_upper_buffer (re_string_t *pstr)
449 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) 451 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
450 pstr->wcs[byte_idx++] = WEOF; 452 pstr->wcs[byte_idx++] = WEOF;
451 } 453 }
452 else if (mbclen == (size_t) -1 || mbclen == 0) 454 else if (mbclen == (size_t) -1 || mbclen == 0
455 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
453 { 456 {
454 /* It is an invalid character or '\0'. Just use the byte. */ 457 /* It is an invalid character or '\0'. Just use the byte. */
455 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; 458 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
@@ -496,8 +499,7 @@ re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
496 rawbuf_idx < new_raw_idx;) 499 rawbuf_idx < new_raw_idx;)
497 { 500 {
498 wchar_t wc2; 501 wchar_t wc2;
499 Idx remain_len; 502 Idx remain_len = pstr->raw_len - rawbuf_idx;
500 remain_len = pstr->len - rawbuf_idx;
501 prev_st = pstr->cur_state; 503 prev_st = pstr->cur_state;
502 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx, 504 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
503 remain_len, &pstr->cur_state); 505 remain_len, &pstr->cur_state);
@@ -733,21 +735,21 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
733 mbstate_t cur_state; 735 mbstate_t cur_state;
734 wchar_t wc2; 736 wchar_t wc2;
735 Idx mlen = raw + pstr->len - p; 737 Idx mlen = raw + pstr->len - p;
738 unsigned char buf[6];
736 size_t mbclen; 739 size_t mbclen;
737 740
738#if 0 /* dead code: buf is set but never used */ 741 const unsigned char *pp = p;
739 unsigned char buf[6];
740 if (BE (pstr->trans != NULL, 0)) 742 if (BE (pstr->trans != NULL, 0))
741 { 743 {
742 int i = mlen < 6 ? mlen : 6; 744 int i = mlen < 6 ? mlen : 6;
743 while (--i >= 0) 745 while (--i >= 0)
744 buf[i] = pstr->trans[p[i]]; 746 buf[i] = pstr->trans[p[i]];
747 pp = buf;
745 } 748 }
746#endif
747 /* XXX Don't use mbrtowc, we know which conversion 749 /* XXX Don't use mbrtowc, we know which conversion
748 to use (UTF-8 -> UCS4). */ 750 to use (UTF-8 -> UCS4). */
749 memset (&cur_state, 0, sizeof (cur_state)); 751 memset (&cur_state, 0, sizeof (cur_state));
750 mbclen = __mbrtowc (&wc2, (const char *) p, mlen, 752 mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
751 &cur_state); 753 &cur_state);
752 if (raw + offset - p <= mbclen 754 if (raw + offset - p <= mbclen
753 && mbclen < (size_t) -2) 755 && mbclen < (size_t) -2)
@@ -832,7 +834,7 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
832} 834}
833 835
834static unsigned char 836static unsigned char
835internal_function __attribute ((pure)) 837internal_function __attribute__ ((pure))
836re_string_peek_byte_case (const re_string_t *pstr, Idx idx) 838re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
837{ 839{
838 int ch; 840 int ch;
@@ -869,7 +871,7 @@ re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
869} 871}
870 872
871static unsigned char 873static unsigned char
872internal_function __attribute ((pure)) 874internal_function
873re_string_fetch_byte_case (re_string_t *pstr) 875re_string_fetch_byte_case (re_string_t *pstr)
874{ 876{
875 if (BE (!pstr->mbs_allocated, 1)) 877 if (BE (!pstr->mbs_allocated, 1))
@@ -972,7 +974,7 @@ re_node_set_alloc (re_node_set *set, Idx size)
972 set->alloc = size; 974 set->alloc = size;
973 set->nelem = 0; 975 set->nelem = 0;
974 set->elems = re_malloc (Idx, size); 976 set->elems = re_malloc (Idx, size);
975 if (BE (set->elems == NULL, 0)) 977 if (BE (set->elems == NULL, 0) && (MALLOC_0_IS_NONNULL || size != 0))
976 return REG_ESPACE; 978 return REG_ESPACE;
977 return REG_NOERROR; 979 return REG_NOERROR;
978} 980}
@@ -1352,7 +1354,7 @@ re_node_set_insert_last (re_node_set *set, Idx elem)
1352 Return true if SET1 and SET2 are equivalent. */ 1354 Return true if SET1 and SET2 are equivalent. */
1353 1355
1354static bool 1356static bool
1355internal_function __attribute ((pure)) 1357internal_function __attribute__ ((pure))
1356re_node_set_compare (const re_node_set *set1, const re_node_set *set2) 1358re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1357{ 1359{
1358 Idx i; 1360 Idx i;
@@ -1367,7 +1369,7 @@ re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1367/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ 1369/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1368 1370
1369static Idx 1371static Idx
1370internal_function __attribute ((pure)) 1372internal_function __attribute__ ((pure))
1371re_node_set_contains (const re_node_set *set, Idx elem) 1373re_node_set_contains (const re_node_set *set, Idx elem)
1372{ 1374{
1373 __re_size_t idx, right, mid; 1375 __re_size_t idx, right, mid;
@@ -1413,13 +1415,12 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1413 Idx *new_nexts, *new_indices; 1415 Idx *new_nexts, *new_indices;
1414 re_node_set *new_edests, *new_eclosures; 1416 re_node_set *new_edests, *new_eclosures;
1415 re_token_t *new_nodes; 1417 re_token_t *new_nodes;
1416 size_t max_object_size =
1417 MAX (sizeof (re_token_t),
1418 MAX (sizeof (re_node_set),
1419 sizeof (Idx)));
1420 1418
1421 /* Avoid overflows. */ 1419 /* Avoid overflows in realloc. */
1422 if (BE (SIZE_MAX / 2 / max_object_size < dfa->nodes_alloc, 0)) 1420 const size_t max_object_size = MAX (sizeof (re_token_t),
1421 MAX (sizeof (re_node_set),
1422 sizeof (Idx)));
1423 if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_nodes_alloc, 0))
1423 return REG_MISSING; 1424 return REG_MISSING;
1424 1425
1425 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); 1426 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
@@ -1442,11 +1443,9 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1442 dfa->nodes[dfa->nodes_len] = token; 1443 dfa->nodes[dfa->nodes_len] = token;
1443 dfa->nodes[dfa->nodes_len].constraint = 0; 1444 dfa->nodes[dfa->nodes_len].constraint = 0;
1444#ifdef RE_ENABLE_I18N 1445#ifdef RE_ENABLE_I18N
1445 {
1446 int type = token.type;
1447 dfa->nodes[dfa->nodes_len].accept_mb = 1446 dfa->nodes[dfa->nodes_len].accept_mb =
1448 (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET; 1447 ((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
1449 } 1448 || token.type == COMPLEX_BRACKET);
1450#endif 1449#endif
1451 dfa->nexts[dfa->nodes_len] = REG_MISSING; 1450 dfa->nexts[dfa->nodes_len] = REG_MISSING;
1452 re_node_set_init_empty (dfa->edests + dfa->nodes_len); 1451 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
@@ -1454,7 +1453,7 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1454 return dfa->nodes_len++; 1453 return dfa->nodes_len++;
1455} 1454}
1456 1455
1457static inline re_hashval_t 1456static re_hashval_t
1458internal_function 1457internal_function
1459calc_state_hash (const re_node_set *nodes, unsigned int context) 1458calc_state_hash (const re_node_set *nodes, unsigned int context)
1460{ 1459{
@@ -1551,7 +1550,7 @@ re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1551 && re_node_set_compare (state->entrance_nodes, nodes)) 1550 && re_node_set_compare (state->entrance_nodes, nodes))
1552 return state; 1551 return state;
1553 } 1552 }
1554 /* There are no appropriate state in `dfa', create the new one. */ 1553 /* There are no appropriate state in 'dfa', create the new one. */
1555 new_state = create_cd_newstate (dfa, nodes, context, hash); 1554 new_state = create_cd_newstate (dfa, nodes, context, hash);
1556 if (BE (new_state == NULL, 0)) 1555 if (BE (new_state == NULL, 0))
1557 *err = REG_ESPACE; 1556 *err = REG_ESPACE;
@@ -1580,7 +1579,7 @@ register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1580 { 1579 {
1581 Idx elem = newstate->nodes.elems[i]; 1580 Idx elem = newstate->nodes.elems[i];
1582 if (!IS_EPSILON_NODE (dfa->nodes[elem].type)) 1581 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1583 if (BE (! re_node_set_insert_last (&newstate->non_eps_nodes, elem), 0)) 1582 if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem))
1584 return REG_ESPACE; 1583 return REG_ESPACE;
1585 } 1584 }
1586 1585
@@ -1615,7 +1614,7 @@ free_state (re_dfastate_t *state)
1615 re_free (state); 1614 re_free (state);
1616} 1615}
1617 1616
1618/* Create the new state which is independ of contexts. 1617/* Create the new state which is independent of contexts.
1619 Return the new state if succeeded, otherwise return NULL. */ 1618 Return the new state if succeeded, otherwise return NULL. */
1620 1619
1621static re_dfastate_t * 1620static re_dfastate_t *