diff options
Diffstat (limited to 'plugins/uriparser/UriEscape.c')
| -rw-r--r-- | plugins/uriparser/UriEscape.c | 453 |
1 files changed, 453 insertions, 0 deletions
diff --git a/plugins/uriparser/UriEscape.c b/plugins/uriparser/UriEscape.c new file mode 100644 index 00000000..79ee3a68 --- /dev/null +++ b/plugins/uriparser/UriEscape.c | |||
| @@ -0,0 +1,453 @@ | |||
| 1 | /* | ||
| 2 | * uriparser - RFC 3986 URI parsing library | ||
| 3 | * | ||
| 4 | * Copyright (C) 2007, Weijia Song <songweijia@gmail.com> | ||
| 5 | * Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org> | ||
| 6 | * All rights reserved. | ||
| 7 | * | ||
| 8 | * Redistribution and use in source and binary forms, with or without | ||
| 9 | * modification, are permitted provided that the following conditions | ||
| 10 | * are met: | ||
| 11 | * | ||
| 12 | * * Redistributions of source code must retain the above | ||
| 13 | * copyright notice, this list of conditions and the following | ||
| 14 | * disclaimer. | ||
| 15 | * | ||
| 16 | * * Redistributions in binary form must reproduce the above | ||
| 17 | * copyright notice, this list of conditions and the following | ||
| 18 | * disclaimer in the documentation and/or other materials | ||
| 19 | * provided with the distribution. | ||
| 20 | * | ||
| 21 | * * Neither the name of the <ORGANIZATION> nor the names of its | ||
| 22 | * contributors may be used to endorse or promote products | ||
| 23 | * derived from this software without specific prior written | ||
| 24 | * permission. | ||
| 25 | * | ||
| 26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 27 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 28 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | ||
| 29 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | ||
| 30 | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | ||
| 31 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
| 32 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
| 33 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
| 34 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
| 35 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
| 36 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
| 37 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 38 | */ | ||
| 39 | |||
| 40 | /* What encodings are enabled? */ | ||
| 41 | #include <uriparser/UriDefsConfig.h> | ||
| 42 | #if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) | ||
| 43 | /* Include SELF twice */ | ||
| 44 | # ifdef URI_ENABLE_ANSI | ||
| 45 | # define URI_PASS_ANSI 1 | ||
| 46 | # include "UriEscape.c" | ||
| 47 | # undef URI_PASS_ANSI | ||
| 48 | # endif | ||
| 49 | # ifdef URI_ENABLE_UNICODE | ||
| 50 | # define URI_PASS_UNICODE 1 | ||
| 51 | # include "UriEscape.c" | ||
| 52 | # undef URI_PASS_UNICODE | ||
| 53 | # endif | ||
| 54 | #else | ||
| 55 | # ifdef URI_PASS_ANSI | ||
| 56 | # include <uriparser/UriDefsAnsi.h> | ||
| 57 | # else | ||
| 58 | # include <uriparser/UriDefsUnicode.h> | ||
| 59 | # include <wchar.h> | ||
| 60 | # endif | ||
| 61 | |||
| 62 | |||
| 63 | |||
| 64 | #ifndef URI_DOXYGEN | ||
| 65 | # include <uriparser/Uri.h> | ||
| 66 | # include "UriCommon.h" | ||
| 67 | #endif | ||
| 68 | |||
| 69 | |||
| 70 | |||
| 71 | URI_CHAR * URI_FUNC(Escape)(const URI_CHAR * in, URI_CHAR * out, | ||
| 72 | UriBool spaceToPlus, UriBool normalizeBreaks) { | ||
| 73 | return URI_FUNC(EscapeEx)(in, NULL, out, spaceToPlus, normalizeBreaks); | ||
| 74 | } | ||
| 75 | |||
| 76 | |||
| 77 | |||
| 78 | URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst, | ||
| 79 | const URI_CHAR * inAfterLast, URI_CHAR * out, | ||
| 80 | UriBool spaceToPlus, UriBool normalizeBreaks) { | ||
| 81 | const URI_CHAR * read = inFirst; | ||
| 82 | URI_CHAR * write = out; | ||
| 83 | UriBool prevWasCr = URI_FALSE; | ||
| 84 | if ((out == NULL) || (inFirst == out)) { | ||
| 85 | return NULL; | ||
| 86 | } else if (inFirst == NULL) { | ||
| 87 | if (out != NULL) { | ||
| 88 | out[0] = _UT('\0'); | ||
| 89 | } | ||
| 90 | return out; | ||
| 91 | } | ||
| 92 | |||
| 93 | for (;;) { | ||
| 94 | if ((inAfterLast != NULL) && (read >= inAfterLast)) { | ||
| 95 | write[0] = _UT('\0'); | ||
| 96 | return write; | ||
| 97 | } | ||
| 98 | |||
| 99 | switch (read[0]) { | ||
| 100 | case _UT('\0'): | ||
| 101 | write[0] = _UT('\0'); | ||
| 102 | return write; | ||
| 103 | |||
| 104 | case _UT(' '): | ||
| 105 | if (spaceToPlus) { | ||
| 106 | write[0] = _UT('+'); | ||
| 107 | write++; | ||
| 108 | } else { | ||
| 109 | write[0] = _UT('%'); | ||
| 110 | write[1] = _UT('2'); | ||
| 111 | write[2] = _UT('0'); | ||
| 112 | write += 3; | ||
| 113 | } | ||
| 114 | prevWasCr = URI_FALSE; | ||
| 115 | break; | ||
| 116 | |||
| 117 | case _UT('a'): /* ALPHA */ | ||
| 118 | case _UT('A'): | ||
| 119 | case _UT('b'): | ||
| 120 | case _UT('B'): | ||
| 121 | case _UT('c'): | ||
| 122 | case _UT('C'): | ||
| 123 | case _UT('d'): | ||
| 124 | case _UT('D'): | ||
| 125 | case _UT('e'): | ||
| 126 | case _UT('E'): | ||
| 127 | case _UT('f'): | ||
| 128 | case _UT('F'): | ||
| 129 | case _UT('g'): | ||
| 130 | case _UT('G'): | ||
| 131 | case _UT('h'): | ||
| 132 | case _UT('H'): | ||
| 133 | case _UT('i'): | ||
| 134 | case _UT('I'): | ||
| 135 | case _UT('j'): | ||
| 136 | case _UT('J'): | ||
| 137 | case _UT('k'): | ||
| 138 | case _UT('K'): | ||
| 139 | case _UT('l'): | ||
| 140 | case _UT('L'): | ||
| 141 | case _UT('m'): | ||
| 142 | case _UT('M'): | ||
| 143 | case _UT('n'): | ||
| 144 | case _UT('N'): | ||
| 145 | case _UT('o'): | ||
| 146 | case _UT('O'): | ||
| 147 | case _UT('p'): | ||
| 148 | case _UT('P'): | ||
| 149 | case _UT('q'): | ||
| 150 | case _UT('Q'): | ||
| 151 | case _UT('r'): | ||
| 152 | case _UT('R'): | ||
| 153 | case _UT('s'): | ||
| 154 | case _UT('S'): | ||
| 155 | case _UT('t'): | ||
| 156 | case _UT('T'): | ||
| 157 | case _UT('u'): | ||
| 158 | case _UT('U'): | ||
| 159 | case _UT('v'): | ||
| 160 | case _UT('V'): | ||
| 161 | case _UT('w'): | ||
| 162 | case _UT('W'): | ||
| 163 | case _UT('x'): | ||
| 164 | case _UT('X'): | ||
| 165 | case _UT('y'): | ||
| 166 | case _UT('Y'): | ||
| 167 | case _UT('z'): | ||
| 168 | case _UT('Z'): | ||
| 169 | case _UT('0'): /* DIGIT */ | ||
| 170 | case _UT('1'): | ||
| 171 | case _UT('2'): | ||
| 172 | case _UT('3'): | ||
| 173 | case _UT('4'): | ||
| 174 | case _UT('5'): | ||
| 175 | case _UT('6'): | ||
| 176 | case _UT('7'): | ||
| 177 | case _UT('8'): | ||
| 178 | case _UT('9'): | ||
| 179 | case _UT('-'): /* "-" / "." / "_" / "~" */ | ||
| 180 | case _UT('.'): | ||
| 181 | case _UT('_'): | ||
| 182 | case _UT('~'): | ||
| 183 | /* Copy unmodified */ | ||
| 184 | write[0] = read[0]; | ||
| 185 | write++; | ||
| 186 | |||
| 187 | prevWasCr = URI_FALSE; | ||
| 188 | break; | ||
| 189 | |||
| 190 | case _UT('\x0a'): | ||
| 191 | if (normalizeBreaks) { | ||
| 192 | if (!prevWasCr) { | ||
| 193 | write[0] = _UT('%'); | ||
| 194 | write[1] = _UT('0'); | ||
| 195 | write[2] = _UT('D'); | ||
| 196 | write[3] = _UT('%'); | ||
| 197 | write[4] = _UT('0'); | ||
| 198 | write[5] = _UT('A'); | ||
| 199 | write += 6; | ||
| 200 | } | ||
| 201 | } else { | ||
| 202 | write[0] = _UT('%'); | ||
| 203 | write[1] = _UT('0'); | ||
| 204 | write[2] = _UT('A'); | ||
| 205 | write += 3; | ||
| 206 | } | ||
| 207 | prevWasCr = URI_FALSE; | ||
| 208 | break; | ||
| 209 | |||
| 210 | case _UT('\x0d'): | ||
| 211 | if (normalizeBreaks) { | ||
| 212 | write[0] = _UT('%'); | ||
| 213 | write[1] = _UT('0'); | ||
| 214 | write[2] = _UT('D'); | ||
| 215 | write[3] = _UT('%'); | ||
| 216 | write[4] = _UT('0'); | ||
| 217 | write[5] = _UT('A'); | ||
| 218 | write += 6; | ||
| 219 | } else { | ||
| 220 | write[0] = _UT('%'); | ||
| 221 | write[1] = _UT('0'); | ||
| 222 | write[2] = _UT('D'); | ||
| 223 | write += 3; | ||
| 224 | } | ||
| 225 | prevWasCr = URI_TRUE; | ||
| 226 | break; | ||
| 227 | |||
| 228 | default: | ||
| 229 | /* Percent encode */ | ||
| 230 | { | ||
| 231 | const unsigned char code = (unsigned char)read[0]; | ||
| 232 | write[0] = _UT('%'); | ||
| 233 | write[1] = URI_FUNC(HexToLetter)(code >> 4); | ||
| 234 | write[2] = URI_FUNC(HexToLetter)(code & 0x0f); | ||
| 235 | write += 3; | ||
| 236 | } | ||
| 237 | prevWasCr = URI_FALSE; | ||
| 238 | break; | ||
| 239 | } | ||
| 240 | |||
| 241 | read++; | ||
| 242 | } | ||
| 243 | } | ||
| 244 | |||
| 245 | |||
| 246 | |||
| 247 | const URI_CHAR * URI_FUNC(UnescapeInPlace)(URI_CHAR * inout) { | ||
| 248 | return URI_FUNC(UnescapeInPlaceEx)(inout, URI_FALSE, URI_BR_DONT_TOUCH); | ||
| 249 | } | ||
| 250 | |||
| 251 | |||
| 252 | |||
| 253 | const URI_CHAR * URI_FUNC(UnescapeInPlaceEx)(URI_CHAR * inout, | ||
| 254 | UriBool plusToSpace, UriBreakConversion breakConversion) { | ||
| 255 | URI_CHAR * read = inout; | ||
| 256 | URI_CHAR * write = inout; | ||
| 257 | UriBool prevWasCr = URI_FALSE; | ||
| 258 | |||
| 259 | if (inout == NULL) { | ||
| 260 | return NULL; | ||
| 261 | } | ||
| 262 | |||
| 263 | for (;;) { | ||
| 264 | switch (read[0]) { | ||
| 265 | case _UT('\0'): | ||
| 266 | if (read > write) { | ||
| 267 | write[0] = _UT('\0'); | ||
| 268 | } | ||
| 269 | return write; | ||
| 270 | |||
| 271 | case _UT('%'): | ||
| 272 | switch (read[1]) { | ||
| 273 | case _UT('0'): | ||
| 274 | case _UT('1'): | ||
| 275 | case _UT('2'): | ||
| 276 | case _UT('3'): | ||
| 277 | case _UT('4'): | ||
| 278 | case _UT('5'): | ||
| 279 | case _UT('6'): | ||
| 280 | case _UT('7'): | ||
| 281 | case _UT('8'): | ||
| 282 | case _UT('9'): | ||
| 283 | case _UT('a'): | ||
| 284 | case _UT('b'): | ||
| 285 | case _UT('c'): | ||
| 286 | case _UT('d'): | ||
| 287 | case _UT('e'): | ||
| 288 | case _UT('f'): | ||
| 289 | case _UT('A'): | ||
| 290 | case _UT('B'): | ||
| 291 | case _UT('C'): | ||
| 292 | case _UT('D'): | ||
| 293 | case _UT('E'): | ||
| 294 | case _UT('F'): | ||
| 295 | switch (read[2]) { | ||
| 296 | case _UT('0'): | ||
| 297 | case _UT('1'): | ||
| 298 | case _UT('2'): | ||
| 299 | case _UT('3'): | ||
| 300 | case _UT('4'): | ||
| 301 | case _UT('5'): | ||
| 302 | case _UT('6'): | ||
| 303 | case _UT('7'): | ||
| 304 | case _UT('8'): | ||
| 305 | case _UT('9'): | ||
| 306 | case _UT('a'): | ||
| 307 | case _UT('b'): | ||
| 308 | case _UT('c'): | ||
| 309 | case _UT('d'): | ||
| 310 | case _UT('e'): | ||
| 311 | case _UT('f'): | ||
| 312 | case _UT('A'): | ||
| 313 | case _UT('B'): | ||
| 314 | case _UT('C'): | ||
| 315 | case _UT('D'): | ||
| 316 | case _UT('E'): | ||
| 317 | case _UT('F'): | ||
| 318 | { | ||
| 319 | /* Percent group found */ | ||
| 320 | const unsigned char left = URI_FUNC(HexdigToInt)(read[1]); | ||
| 321 | const unsigned char right = URI_FUNC(HexdigToInt)(read[2]); | ||
| 322 | const int code = 16 * left + right; | ||
| 323 | switch (code) { | ||
| 324 | case 10: | ||
| 325 | switch (breakConversion) { | ||
| 326 | case URI_BR_TO_LF: | ||
| 327 | if (!prevWasCr) { | ||
| 328 | write[0] = (URI_CHAR)10; | ||
| 329 | write++; | ||
| 330 | } | ||
| 331 | break; | ||
| 332 | |||
| 333 | case URI_BR_TO_CRLF: | ||
| 334 | if (!prevWasCr) { | ||
| 335 | write[0] = (URI_CHAR)13; | ||
| 336 | write[1] = (URI_CHAR)10; | ||
| 337 | write += 2; | ||
| 338 | } | ||
| 339 | break; | ||
| 340 | |||
| 341 | case URI_BR_TO_CR: | ||
| 342 | if (!prevWasCr) { | ||
| 343 | write[0] = (URI_CHAR)13; | ||
| 344 | write++; | ||
| 345 | } | ||
| 346 | break; | ||
| 347 | |||
| 348 | case URI_BR_DONT_TOUCH: | ||
| 349 | default: | ||
| 350 | write[0] = (URI_CHAR)10; | ||
| 351 | write++; | ||
| 352 | |||
| 353 | } | ||
| 354 | prevWasCr = URI_FALSE; | ||
| 355 | break; | ||
| 356 | |||
| 357 | case 13: | ||
| 358 | switch (breakConversion) { | ||
| 359 | case URI_BR_TO_LF: | ||
| 360 | write[0] = (URI_CHAR)10; | ||
| 361 | write++; | ||
| 362 | break; | ||
| 363 | |||
| 364 | case URI_BR_TO_CRLF: | ||
| 365 | write[0] = (URI_CHAR)13; | ||
| 366 | write[1] = (URI_CHAR)10; | ||
| 367 | write += 2; | ||
| 368 | break; | ||
| 369 | |||
| 370 | case URI_BR_TO_CR: | ||
| 371 | write[0] = (URI_CHAR)13; | ||
| 372 | write++; | ||
| 373 | break; | ||
| 374 | |||
| 375 | case URI_BR_DONT_TOUCH: | ||
| 376 | default: | ||
| 377 | write[0] = (URI_CHAR)13; | ||
| 378 | write++; | ||
| 379 | |||
| 380 | } | ||
| 381 | prevWasCr = URI_TRUE; | ||
| 382 | break; | ||
| 383 | |||
| 384 | default: | ||
| 385 | write[0] = (URI_CHAR)(code); | ||
| 386 | write++; | ||
| 387 | |||
| 388 | prevWasCr = URI_FALSE; | ||
| 389 | |||
| 390 | } | ||
| 391 | read += 3; | ||
| 392 | } | ||
| 393 | break; | ||
| 394 | |||
| 395 | default: | ||
| 396 | /* Copy two chars unmodified and */ | ||
| 397 | /* look at this char again */ | ||
| 398 | if (read > write) { | ||
| 399 | write[0] = read[0]; | ||
| 400 | write[1] = read[1]; | ||
| 401 | } | ||
| 402 | read += 2; | ||
| 403 | write += 2; | ||
| 404 | |||
| 405 | prevWasCr = URI_FALSE; | ||
| 406 | } | ||
| 407 | break; | ||
| 408 | |||
| 409 | default: | ||
| 410 | /* Copy one char unmodified and */ | ||
| 411 | /* look at this char again */ | ||
| 412 | if (read > write) { | ||
| 413 | write[0] = read[0]; | ||
| 414 | } | ||
| 415 | read++; | ||
| 416 | write++; | ||
| 417 | |||
| 418 | prevWasCr = URI_FALSE; | ||
| 419 | } | ||
| 420 | break; | ||
| 421 | |||
| 422 | case _UT('+'): | ||
| 423 | if (plusToSpace) { | ||
| 424 | /* Convert '+' to ' ' */ | ||
| 425 | write[0] = _UT(' '); | ||
| 426 | } else { | ||
| 427 | /* Copy one char unmodified */ | ||
| 428 | if (read > write) { | ||
| 429 | write[0] = read[0]; | ||
| 430 | } | ||
| 431 | } | ||
| 432 | read++; | ||
| 433 | write++; | ||
| 434 | |||
| 435 | prevWasCr = URI_FALSE; | ||
| 436 | break; | ||
| 437 | |||
| 438 | default: | ||
| 439 | /* Copy one char unmodified */ | ||
| 440 | if (read > write) { | ||
| 441 | write[0] = read[0]; | ||
| 442 | } | ||
| 443 | read++; | ||
| 444 | write++; | ||
| 445 | |||
| 446 | prevWasCr = URI_FALSE; | ||
| 447 | } | ||
| 448 | } | ||
| 449 | } | ||
| 450 | |||
| 451 | |||
| 452 | |||
| 453 | #endif | ||
