summaryrefslogtreecommitdiffstats
path: root/gl/regex_internal.c
diff options
context:
space:
mode:
Diffstat (limited to 'gl/regex_internal.c')
-rw-r--r--gl/regex_internal.c64
1 files changed, 11 insertions, 53 deletions
diff --git a/gl/regex_internal.c b/gl/regex_internal.c
index aefcfa2..0e6919f 100644
--- a/gl/regex_internal.c
+++ b/gl/regex_internal.c
@@ -1,5 +1,5 @@
1/* Extended regular expression matching and search library. 1/* Extended regular expression matching and search library.
2 Copyright (C) 2002-2021 Free Software Foundation, Inc. 2 Copyright (C) 2002-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library. 3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. 4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5 5
@@ -30,10 +30,8 @@ static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
30 re_hashval_t hash); 30 re_hashval_t hash);
31static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, 31static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
32 Idx new_buf_len); 32 Idx new_buf_len);
33#ifdef RE_ENABLE_I18N
34static void build_wcs_buffer (re_string_t *pstr); 33static void build_wcs_buffer (re_string_t *pstr);
35static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr); 34static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr);
36#endif /* RE_ENABLE_I18N */
37static void build_upper_buffer (re_string_t *pstr); 35static void build_upper_buffer (re_string_t *pstr);
38static void re_string_translate_buffer (re_string_t *pstr); 36static void re_string_translate_buffer (re_string_t *pstr);
39static unsigned int re_string_context_at (const re_string_t *input, Idx idx, 37static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
@@ -91,7 +89,6 @@ re_string_construct (re_string_t *pstr, const char *str, Idx len,
91 89
92 if (icase) 90 if (icase)
93 { 91 {
94#ifdef RE_ENABLE_I18N
95 if (dfa->mb_cur_max > 1) 92 if (dfa->mb_cur_max > 1)
96 { 93 {
97 while (1) 94 while (1)
@@ -109,16 +106,13 @@ re_string_construct (re_string_t *pstr, const char *str, Idx len,
109 } 106 }
110 } 107 }
111 else 108 else
112#endif /* RE_ENABLE_I18N */
113 build_upper_buffer (pstr); 109 build_upper_buffer (pstr);
114 } 110 }
115 else 111 else
116 { 112 {
117#ifdef RE_ENABLE_I18N
118 if (dfa->mb_cur_max > 1) 113 if (dfa->mb_cur_max > 1)
119 build_wcs_buffer (pstr); 114 build_wcs_buffer (pstr);
120 else 115 else
121#endif /* RE_ENABLE_I18N */
122 { 116 {
123 if (trans != NULL) 117 if (trans != NULL)
124 re_string_translate_buffer (pstr); 118 re_string_translate_buffer (pstr);
@@ -139,7 +133,6 @@ static reg_errcode_t
139__attribute_warn_unused_result__ 133__attribute_warn_unused_result__
140re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len) 134re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
141{ 135{
142#ifdef RE_ENABLE_I18N
143 if (pstr->mb_cur_max > 1) 136 if (pstr->mb_cur_max > 1)
144 { 137 {
145 wint_t *new_wcs; 138 wint_t *new_wcs;
@@ -162,7 +155,6 @@ re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
162 pstr->offsets = new_offsets; 155 pstr->offsets = new_offsets;
163 } 156 }
164 } 157 }
165#endif /* RE_ENABLE_I18N */
166 if (pstr->mbs_allocated) 158 if (pstr->mbs_allocated)
167 { 159 {
168 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, 160 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
@@ -194,7 +186,6 @@ re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
194 pstr->raw_stop = pstr->stop; 186 pstr->raw_stop = pstr->stop;
195} 187}
196 188
197#ifdef RE_ENABLE_I18N
198 189
199/* Build wide character buffer PSTR->WCS. 190/* Build wide character buffer PSTR->WCS.
200 If the byte sequence of the string are: 191 If the byte sequence of the string are:
@@ -530,7 +521,6 @@ re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
530 *last_wc = wc; 521 *last_wc = wc;
531 return rawbuf_idx; 522 return rawbuf_idx;
532} 523}
533#endif /* RE_ENABLE_I18N */
534 524
535/* Build the buffer PSTR->MBS, and apply the translation if we need. 525/* Build the buffer PSTR->MBS, and apply the translation if we need.
536 This function is used in case of REG_ICASE. */ 526 This function is used in case of REG_ICASE. */
@@ -585,10 +575,8 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
585 else 575 else
586 { 576 {
587 /* Reset buffer. */ 577 /* Reset buffer. */
588#ifdef RE_ENABLE_I18N
589 if (pstr->mb_cur_max > 1) 578 if (pstr->mb_cur_max > 1)
590 memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); 579 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
591#endif /* RE_ENABLE_I18N */
592 pstr->len = pstr->raw_len; 580 pstr->len = pstr->raw_len;
593 pstr->stop = pstr->raw_stop; 581 pstr->stop = pstr->raw_stop;
594 pstr->valid_len = 0; 582 pstr->valid_len = 0;
@@ -608,7 +596,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
608 if (__glibc_likely (offset < pstr->valid_raw_len)) 596 if (__glibc_likely (offset < pstr->valid_raw_len))
609 { 597 {
610 /* Yes, move them to the front of the buffer. */ 598 /* Yes, move them to the front of the buffer. */
611#ifdef RE_ENABLE_I18N
612 if (__glibc_unlikely (pstr->offsets_needed)) 599 if (__glibc_unlikely (pstr->offsets_needed))
613 { 600 {
614 Idx low = 0, high = pstr->valid_len, mid; 601 Idx low = 0, high = pstr->valid_len, mid;
@@ -672,15 +659,12 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
672 } 659 }
673 } 660 }
674 else 661 else
675#endif
676 { 662 {
677 pstr->tip_context = re_string_context_at (pstr, offset - 1, 663 pstr->tip_context = re_string_context_at (pstr, offset - 1,
678 eflags); 664 eflags);
679#ifdef RE_ENABLE_I18N
680 if (pstr->mb_cur_max > 1) 665 if (pstr->mb_cur_max > 1)
681 memmove (pstr->wcs, pstr->wcs + offset, 666 memmove (pstr->wcs, pstr->wcs + offset,
682 (pstr->valid_len - offset) * sizeof (wint_t)); 667 (pstr->valid_len - offset) * sizeof (wint_t));
683#endif /* RE_ENABLE_I18N */
684 if (__glibc_unlikely (pstr->mbs_allocated)) 668 if (__glibc_unlikely (pstr->mbs_allocated))
685 memmove (pstr->mbs, pstr->mbs + offset, 669 memmove (pstr->mbs, pstr->mbs + offset,
686 pstr->valid_len - offset); 670 pstr->valid_len - offset);
@@ -691,7 +675,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
691 } 675 }
692 else 676 else
693 { 677 {
694#ifdef RE_ENABLE_I18N
695 /* No, skip all characters until IDX. */ 678 /* No, skip all characters until IDX. */
696 Idx prev_valid_len = pstr->valid_len; 679 Idx prev_valid_len = pstr->valid_len;
697 680
@@ -701,9 +684,7 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
701 pstr->stop = pstr->raw_stop - idx + offset; 684 pstr->stop = pstr->raw_stop - idx + offset;
702 pstr->offsets_needed = 0; 685 pstr->offsets_needed = 0;
703 } 686 }
704#endif
705 pstr->valid_len = 0; 687 pstr->valid_len = 0;
706#ifdef RE_ENABLE_I18N
707 if (pstr->mb_cur_max > 1) 688 if (pstr->mb_cur_max > 1)
708 { 689 {
709 Idx wcs_idx; 690 Idx wcs_idx;
@@ -787,7 +768,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
787 pstr->valid_raw_len = pstr->valid_len; 768 pstr->valid_raw_len = pstr->valid_len;
788 } 769 }
789 else 770 else
790#endif /* RE_ENABLE_I18N */
791 { 771 {
792 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; 772 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
793 pstr->valid_raw_len = 0; 773 pstr->valid_raw_len = 0;
@@ -807,7 +787,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
807 pstr->stop -= offset; 787 pstr->stop -= offset;
808 788
809 /* Then build the buffers. */ 789 /* Then build the buffers. */
810#ifdef RE_ENABLE_I18N
811 if (pstr->mb_cur_max > 1) 790 if (pstr->mb_cur_max > 1)
812 { 791 {
813 if (pstr->icase) 792 if (pstr->icase)
@@ -820,7 +799,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
820 build_wcs_buffer (pstr); 799 build_wcs_buffer (pstr);
821 } 800 }
822 else 801 else
823#endif /* RE_ENABLE_I18N */
824 if (__glibc_unlikely (pstr->mbs_allocated)) 802 if (__glibc_unlikely (pstr->mbs_allocated))
825 { 803 {
826 if (pstr->icase) 804 if (pstr->icase)
@@ -846,28 +824,22 @@ re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
846 if (__glibc_likely (!pstr->mbs_allocated)) 824 if (__glibc_likely (!pstr->mbs_allocated))
847 return re_string_peek_byte (pstr, idx); 825 return re_string_peek_byte (pstr, idx);
848 826
849#ifdef RE_ENABLE_I18N
850 if (pstr->mb_cur_max > 1 827 if (pstr->mb_cur_max > 1
851 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx)) 828 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
852 return re_string_peek_byte (pstr, idx); 829 return re_string_peek_byte (pstr, idx);
853#endif
854 830
855 off = pstr->cur_idx + idx; 831 off = pstr->cur_idx + idx;
856#ifdef RE_ENABLE_I18N
857 if (pstr->offsets_needed) 832 if (pstr->offsets_needed)
858 off = pstr->offsets[off]; 833 off = pstr->offsets[off];
859#endif
860 834
861 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; 835 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
862 836
863#ifdef RE_ENABLE_I18N
864 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I 837 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
865 this function returns CAPITAL LETTER I instead of first byte of 838 this function returns CAPITAL LETTER I instead of first byte of
866 DOTLESS SMALL LETTER I. The latter would confuse the parser, 839 DOTLESS SMALL LETTER I. The latter would confuse the parser,
867 since peek_byte_case doesn't advance cur_idx in any way. */ 840 since peek_byte_case doesn't advance cur_idx in any way. */
868 if (pstr->offsets_needed && !isascii (ch)) 841 if (pstr->offsets_needed && !isascii (ch))
869 return re_string_peek_byte (pstr, idx); 842 return re_string_peek_byte (pstr, idx);
870#endif
871 843
872 return ch; 844 return ch;
873} 845}
@@ -878,7 +850,6 @@ re_string_fetch_byte_case (re_string_t *pstr)
878 if (__glibc_likely (!pstr->mbs_allocated)) 850 if (__glibc_likely (!pstr->mbs_allocated))
879 return re_string_fetch_byte (pstr); 851 return re_string_fetch_byte (pstr);
880 852
881#ifdef RE_ENABLE_I18N
882 if (pstr->offsets_needed) 853 if (pstr->offsets_needed)
883 { 854 {
884 Idx off; 855 Idx off;
@@ -904,7 +875,6 @@ re_string_fetch_byte_case (re_string_t *pstr)
904 re_string_char_size_at (pstr, pstr->cur_idx)); 875 re_string_char_size_at (pstr, pstr->cur_idx));
905 return ch; 876 return ch;
906 } 877 }
907#endif
908 878
909 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; 879 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
910} 880}
@@ -912,10 +882,8 @@ re_string_fetch_byte_case (re_string_t *pstr)
912static void 882static void
913re_string_destruct (re_string_t *pstr) 883re_string_destruct (re_string_t *pstr)
914{ 884{
915#ifdef RE_ENABLE_I18N
916 re_free (pstr->wcs); 885 re_free (pstr->wcs);
917 re_free (pstr->offsets); 886 re_free (pstr->offsets);
918#endif /* RE_ENABLE_I18N */
919 if (pstr->mbs_allocated) 887 if (pstr->mbs_allocated)
920 re_free (pstr->mbs); 888 re_free (pstr->mbs);
921} 889}
@@ -933,7 +901,6 @@ re_string_context_at (const re_string_t *input, Idx idx, int eflags)
933 if (__glibc_unlikely (idx == input->len)) 901 if (__glibc_unlikely (idx == input->len))
934 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF 902 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
935 : CONTEXT_NEWLINE | CONTEXT_ENDBUF); 903 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
936#ifdef RE_ENABLE_I18N
937 if (input->mb_cur_max > 1) 904 if (input->mb_cur_max > 1)
938 { 905 {
939 wint_t wc; 906 wint_t wc;
@@ -953,7 +920,6 @@ re_string_context_at (const re_string_t *input, Idx idx, int eflags)
953 ? CONTEXT_NEWLINE : 0); 920 ? CONTEXT_NEWLINE : 0);
954 } 921 }
955 else 922 else
956#endif
957 { 923 {
958 c = re_string_byte_at (input, idx); 924 c = re_string_byte_at (input, idx);
959 if (bitset_contain (input->word_char, c)) 925 if (bitset_contain (input->word_char, c))
@@ -1430,32 +1396,28 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1430 if (__glibc_unlikely (new_nodes == NULL)) 1396 if (__glibc_unlikely (new_nodes == NULL))
1431 return -1; 1397 return -1;
1432 dfa->nodes = new_nodes; 1398 dfa->nodes = new_nodes;
1399 dfa->nodes_alloc = new_nodes_alloc;
1433 new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc); 1400 new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1401 if (new_nexts != NULL)
1402 dfa->nexts = new_nexts;
1434 new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc); 1403 new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1404 if (new_indices != NULL)
1405 dfa->org_indices = new_indices;
1435 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); 1406 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1407 if (new_edests != NULL)
1408 dfa->edests = new_edests;
1436 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); 1409 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1410 if (new_eclosures != NULL)
1411 dfa->eclosures = new_eclosures;
1437 if (__glibc_unlikely (new_nexts == NULL || new_indices == NULL 1412 if (__glibc_unlikely (new_nexts == NULL || new_indices == NULL
1438 || new_edests == NULL || new_eclosures == NULL)) 1413 || new_edests == NULL || new_eclosures == NULL))
1439 { 1414 return -1;
1440 re_free (new_nexts);
1441 re_free (new_indices);
1442 re_free (new_edests);
1443 re_free (new_eclosures);
1444 return -1;
1445 }
1446 dfa->nexts = new_nexts;
1447 dfa->org_indices = new_indices;
1448 dfa->edests = new_edests;
1449 dfa->eclosures = new_eclosures;
1450 dfa->nodes_alloc = new_nodes_alloc;
1451 } 1415 }
1452 dfa->nodes[dfa->nodes_len] = token; 1416 dfa->nodes[dfa->nodes_len] = token;
1453 dfa->nodes[dfa->nodes_len].constraint = 0; 1417 dfa->nodes[dfa->nodes_len].constraint = 0;
1454#ifdef RE_ENABLE_I18N
1455 dfa->nodes[dfa->nodes_len].accept_mb = 1418 dfa->nodes[dfa->nodes_len].accept_mb =
1456 ((token.type == OP_PERIOD && dfa->mb_cur_max > 1) 1419 ((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
1457 || token.type == COMPLEX_BRACKET); 1420 || token.type == COMPLEX_BRACKET);
1458#endif
1459 dfa->nexts[dfa->nodes_len] = -1; 1421 dfa->nexts[dfa->nodes_len] = -1;
1460 re_node_set_init_empty (dfa->edests + dfa->nodes_len); 1422 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1461 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len); 1423 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
@@ -1651,9 +1613,7 @@ create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1651 re_token_type_t type = node->type; 1613 re_token_type_t type = node->type;
1652 if (type == CHARACTER && !node->constraint) 1614 if (type == CHARACTER && !node->constraint)
1653 continue; 1615 continue;
1654#ifdef RE_ENABLE_I18N
1655 newstate->accept_mb |= node->accept_mb; 1616 newstate->accept_mb |= node->accept_mb;
1656#endif /* RE_ENABLE_I18N */
1657 1617
1658 /* If the state has the halt node, the state is a halt state. */ 1618 /* If the state has the halt node, the state is a halt state. */
1659 if (type == END_OF_RE) 1619 if (type == END_OF_RE)
@@ -1705,9 +1665,7 @@ create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1705 1665
1706 if (type == CHARACTER && !constraint) 1666 if (type == CHARACTER && !constraint)
1707 continue; 1667 continue;
1708#ifdef RE_ENABLE_I18N
1709 newstate->accept_mb |= node->accept_mb; 1668 newstate->accept_mb |= node->accept_mb;
1710#endif /* RE_ENABLE_I18N */
1711 1669
1712 /* If the state has the halt node, the state is a halt state. */ 1670 /* If the state has the halt node, the state is a halt state. */
1713 if (type == END_OF_RE) 1671 if (type == END_OF_RE)