diff options
Diffstat (limited to 'gl/memchr.c')
| -rw-r--r-- | gl/memchr.c | 201 |
1 files changed, 201 insertions, 0 deletions
diff --git a/gl/memchr.c b/gl/memchr.c new file mode 100644 index 00000000..d44ad6de --- /dev/null +++ b/gl/memchr.c | |||
| @@ -0,0 +1,201 @@ | |||
| 1 | /* Copyright (C) 1991, 1993, 1996, 1997, 1999, 2000, 2003, 2004, 2006 Free | ||
| 2 | Software Foundation, Inc. | ||
| 3 | |||
| 4 | Based on strlen implementation by Torbjorn Granlund (tege@sics.se), | ||
| 5 | with help from Dan Sahlin (dan@sics.se) and | ||
| 6 | commentary by Jim Blandy (jimb@ai.mit.edu); | ||
| 7 | adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), | ||
| 8 | and implemented by Roland McGrath (roland@ai.mit.edu). | ||
| 9 | |||
| 10 | NOTE: The canonical source of this file is maintained with the GNU C Library. | ||
| 11 | Bugs can be reported to bug-glibc@prep.ai.mit.edu. | ||
| 12 | |||
| 13 | This program is free software; you can redistribute it and/or modify it | ||
| 14 | under the terms of the GNU General Public License as published by the | ||
| 15 | Free Software Foundation; either version 2, or (at your option) any | ||
| 16 | later version. | ||
| 17 | |||
| 18 | This program is distributed in the hope that it will be useful, | ||
| 19 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 21 | GNU General Public License for more details. | ||
| 22 | |||
| 23 | You should have received a copy of the GNU General Public License | ||
| 24 | along with this program; if not, write to the Free Software Foundation, | ||
| 25 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ | ||
| 26 | |||
| 27 | #ifndef _LIBC | ||
| 28 | # include <config.h> | ||
| 29 | #endif | ||
| 30 | |||
| 31 | #include <string.h> | ||
| 32 | |||
| 33 | #include <stddef.h> | ||
| 34 | |||
| 35 | #if defined _LIBC | ||
| 36 | # include <memcopy.h> | ||
| 37 | #else | ||
| 38 | # define reg_char char | ||
| 39 | #endif | ||
| 40 | |||
| 41 | #include <limits.h> | ||
| 42 | |||
| 43 | #if HAVE_BP_SYM_H || defined _LIBC | ||
| 44 | # include <bp-sym.h> | ||
| 45 | #else | ||
| 46 | # define BP_SYM(sym) sym | ||
| 47 | #endif | ||
| 48 | |||
| 49 | #undef memchr | ||
| 50 | #undef __memchr | ||
| 51 | |||
| 52 | /* Search no more than N bytes of S for C. */ | ||
| 53 | void * | ||
| 54 | __memchr (void const *s, int c_in, size_t n) | ||
| 55 | { | ||
| 56 | const unsigned char *char_ptr; | ||
| 57 | const unsigned long int *longword_ptr; | ||
| 58 | unsigned long int longword, magic_bits, charmask; | ||
| 59 | unsigned reg_char c; | ||
| 60 | int i; | ||
| 61 | |||
| 62 | c = (unsigned char) c_in; | ||
| 63 | |||
| 64 | /* Handle the first few characters by reading one character at a time. | ||
| 65 | Do this until CHAR_PTR is aligned on a longword boundary. */ | ||
| 66 | for (char_ptr = (const unsigned char *) s; | ||
| 67 | n > 0 && (size_t) char_ptr % sizeof longword != 0; | ||
| 68 | --n, ++char_ptr) | ||
| 69 | if (*char_ptr == c) | ||
| 70 | return (void *) char_ptr; | ||
| 71 | |||
| 72 | /* All these elucidatory comments refer to 4-byte longwords, | ||
| 73 | but the theory applies equally well to any size longwords. */ | ||
| 74 | |||
| 75 | longword_ptr = (const unsigned long int *) char_ptr; | ||
| 76 | |||
| 77 | /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits | ||
| 78 | the "holes." Note that there is a hole just to the left of | ||
| 79 | each byte, with an extra at the end: | ||
| 80 | |||
| 81 | bits: 01111110 11111110 11111110 11111111 | ||
| 82 | bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD | ||
| 83 | |||
| 84 | The 1-bits make sure that carries propagate to the next 0-bit. | ||
| 85 | The 0-bits provide holes for carries to fall into. */ | ||
| 86 | |||
| 87 | /* Set MAGIC_BITS to be this pattern of 1 and 0 bits. | ||
| 88 | Set CHARMASK to be a longword, each of whose bytes is C. */ | ||
| 89 | |||
| 90 | magic_bits = 0xfefefefe; | ||
| 91 | charmask = c | (c << 8); | ||
| 92 | charmask |= charmask << 16; | ||
| 93 | #if 0xffffffffU < ULONG_MAX | ||
| 94 | magic_bits |= magic_bits << 32; | ||
| 95 | charmask |= charmask << 32; | ||
| 96 | if (8 < sizeof longword) | ||
| 97 | for (i = 64; i < sizeof longword * 8; i *= 2) | ||
| 98 | { | ||
| 99 | magic_bits |= magic_bits << i; | ||
| 100 | charmask |= charmask << i; | ||
| 101 | } | ||
| 102 | #endif | ||
| 103 | magic_bits = (ULONG_MAX >> 1) & (magic_bits | 1); | ||
| 104 | |||
| 105 | /* Instead of the traditional loop which tests each character, | ||
| 106 | we will test a longword at a time. The tricky part is testing | ||
| 107 | if *any of the four* bytes in the longword in question are zero. */ | ||
| 108 | while (n >= sizeof longword) | ||
| 109 | { | ||
| 110 | /* We tentatively exit the loop if adding MAGIC_BITS to | ||
| 111 | LONGWORD fails to change any of the hole bits of LONGWORD. | ||
| 112 | |||
| 113 | 1) Is this safe? Will it catch all the zero bytes? | ||
| 114 | Suppose there is a byte with all zeros. Any carry bits | ||
| 115 | propagating from its left will fall into the hole at its | ||
| 116 | least significant bit and stop. Since there will be no | ||
| 117 | carry from its most significant bit, the LSB of the | ||
| 118 | byte to the left will be unchanged, and the zero will be | ||
| 119 | detected. | ||
| 120 | |||
| 121 | 2) Is this worthwhile? Will it ignore everything except | ||
| 122 | zero bytes? Suppose every byte of LONGWORD has a bit set | ||
| 123 | somewhere. There will be a carry into bit 8. If bit 8 | ||
| 124 | is set, this will carry into bit 16. If bit 8 is clear, | ||
| 125 | one of bits 9-15 must be set, so there will be a carry | ||
| 126 | into bit 16. Similarly, there will be a carry into bit | ||
| 127 | 24. If one of bits 24-30 is set, there will be a carry | ||
| 128 | into bit 31, so all of the hole bits will be changed. | ||
| 129 | |||
| 130 | The one misfire occurs when bits 24-30 are clear and bit | ||
| 131 | 31 is set; in this case, the hole at bit 31 is not | ||
| 132 | changed. If we had access to the processor carry flag, | ||
| 133 | we could close this loophole by putting the fourth hole | ||
| 134 | at bit 32! | ||
| 135 | |||
| 136 | So it ignores everything except 128's, when they're aligned | ||
| 137 | properly. | ||
| 138 | |||
| 139 | 3) But wait! Aren't we looking for C, not zero? | ||
| 140 | Good point. So what we do is XOR LONGWORD with a longword, | ||
| 141 | each of whose bytes is C. This turns each byte that is C | ||
| 142 | into a zero. */ | ||
| 143 | |||
| 144 | longword = *longword_ptr++ ^ charmask; | ||
| 145 | |||
| 146 | /* Add MAGIC_BITS to LONGWORD. */ | ||
| 147 | if ((((longword + magic_bits) | ||
| 148 | |||
| 149 | /* Set those bits that were unchanged by the addition. */ | ||
| 150 | ^ ~longword) | ||
| 151 | |||
| 152 | /* Look at only the hole bits. If any of the hole bits | ||
| 153 | are unchanged, most likely one of the bytes was a | ||
| 154 | zero. */ | ||
| 155 | & ~magic_bits) != 0) | ||
| 156 | { | ||
| 157 | /* Which of the bytes was C? If none of them were, it was | ||
| 158 | a misfire; continue the search. */ | ||
| 159 | |||
| 160 | const unsigned char *cp = (const unsigned char *) (longword_ptr - 1); | ||
| 161 | |||
| 162 | if (cp[0] == c) | ||
| 163 | return (void *) cp; | ||
| 164 | if (cp[1] == c) | ||
| 165 | return (void *) &cp[1]; | ||
| 166 | if (cp[2] == c) | ||
| 167 | return (void *) &cp[2]; | ||
| 168 | if (cp[3] == c) | ||
| 169 | return (void *) &cp[3]; | ||
| 170 | if (4 < sizeof longword && cp[4] == c) | ||
| 171 | return (void *) &cp[4]; | ||
| 172 | if (5 < sizeof longword && cp[5] == c) | ||
| 173 | return (void *) &cp[5]; | ||
| 174 | if (6 < sizeof longword && cp[6] == c) | ||
| 175 | return (void *) &cp[6]; | ||
| 176 | if (7 < sizeof longword && cp[7] == c) | ||
| 177 | return (void *) &cp[7]; | ||
| 178 | if (8 < sizeof longword) | ||
| 179 | for (i = 8; i < sizeof longword; i++) | ||
| 180 | if (cp[i] == c) | ||
| 181 | return (void *) &cp[i]; | ||
| 182 | } | ||
| 183 | |||
| 184 | n -= sizeof longword; | ||
| 185 | } | ||
| 186 | |||
| 187 | char_ptr = (const unsigned char *) longword_ptr; | ||
| 188 | |||
| 189 | while (n-- > 0) | ||
| 190 | { | ||
| 191 | if (*char_ptr == c) | ||
| 192 | return (void *) char_ptr; | ||
| 193 | else | ||
| 194 | ++char_ptr; | ||
| 195 | } | ||
| 196 | |||
| 197 | return 0; | ||
| 198 | } | ||
| 199 | #ifdef weak_alias | ||
| 200 | weak_alias (__memchr, BP_SYM (memchr)) | ||
| 201 | #endif | ||
