/* * uriparser - RFC 3986 URI parsing library * * Copyright (C) 2007, Weijia Song * Copyright (C) 2007, Sebastian Pipping * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * * Neither the name of the nor the names of its * contributors may be used to endorse or promote products * derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. */ /* What encodings are enabled? */ #include #if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) /* Include SELF twice */ # ifdef URI_ENABLE_ANSI # define URI_PASS_ANSI 1 # include "UriEscape.c" # undef URI_PASS_ANSI # endif # ifdef URI_ENABLE_UNICODE # define URI_PASS_UNICODE 1 # include "UriEscape.c" # undef URI_PASS_UNICODE # endif #else # ifdef URI_PASS_ANSI # include # else # include # include # endif #ifndef URI_DOXYGEN # include # include "UriCommon.h" #endif URI_CHAR * URI_FUNC(Escape)(const URI_CHAR * in, URI_CHAR * out, UriBool spaceToPlus, UriBool normalizeBreaks) { return URI_FUNC(EscapeEx)(in, NULL, out, spaceToPlus, normalizeBreaks); } URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst, const URI_CHAR * inAfterLast, URI_CHAR * out, UriBool spaceToPlus, UriBool normalizeBreaks) { const URI_CHAR * read = inFirst; URI_CHAR * write = out; UriBool prevWasCr = URI_FALSE; if ((out == NULL) || (inFirst == out)) { return NULL; } else if (inFirst == NULL) { if (out != NULL) { out[0] = _UT('\0'); } return out; } for (;;) { if ((inAfterLast != NULL) && (read >= inAfterLast)) { write[0] = _UT('\0'); return write; } switch (read[0]) { case _UT('\0'): write[0] = _UT('\0'); return write; case _UT(' '): if (spaceToPlus) { write[0] = _UT('+'); write++; } else { write[0] = _UT('%'); write[1] = _UT('2'); write[2] = _UT('0'); write += 3; } prevWasCr = URI_FALSE; break; case _UT('a'): /* ALPHA */ case _UT('A'): case _UT('b'): case _UT('B'): case _UT('c'): case _UT('C'): case _UT('d'): case _UT('D'): case _UT('e'): case _UT('E'): case _UT('f'): case _UT('F'): case _UT('g'): case _UT('G'): case _UT('h'): case _UT('H'): case _UT('i'): case _UT('I'): case _UT('j'): case _UT('J'): case _UT('k'): case _UT('K'): case _UT('l'): case _UT('L'): case _UT('m'): case _UT('M'): case _UT('n'): case _UT('N'): case _UT('o'): case _UT('O'): case _UT('p'): case _UT('P'): case _UT('q'): case _UT('Q'): case _UT('r'): case _UT('R'): case _UT('s'): case _UT('S'): case _UT('t'): case _UT('T'): case _UT('u'): case _UT('U'): case _UT('v'): case _UT('V'): case _UT('w'): case _UT('W'): case _UT('x'): case _UT('X'): case _UT('y'): case _UT('Y'): case _UT('z'): case _UT('Z'): case _UT('0'): /* DIGIT */ case _UT('1'): case _UT('2'): case _UT('3'): case _UT('4'): case _UT('5'): case _UT('6'): case _UT('7'): case _UT('8'): case _UT('9'): case _UT('-'): /* "-" / "." / "_" / "~" */ case _UT('.'): case _UT('_'): case _UT('~'): /* Copy unmodified */ write[0] = read[0]; write++; prevWasCr = URI_FALSE; break; case _UT('\x0a'): if (normalizeBreaks) { if (!prevWasCr) { write[0] = _UT('%'); write[1] = _UT('0'); write[2] = _UT('D'); write[3] = _UT('%'); write[4] = _UT('0'); write[5] = _UT('A'); write += 6; } } else { write[0] = _UT('%'); write[1] = _UT('0'); write[2] = _UT('A'); write += 3; } prevWasCr = URI_FALSE; break; case _UT('\x0d'): if (normalizeBreaks) { write[0] = _UT('%'); write[1] = _UT('0'); write[2] = _UT('D'); write[3] = _UT('%'); write[4] = _UT('0'); write[5] = _UT('A'); write += 6; } else { write[0] = _UT('%'); write[1] = _UT('0'); write[2] = _UT('D'); write += 3; } prevWasCr = URI_TRUE; break; default: /* Percent encode */ { const unsigned char code = (unsigned char)read[0]; write[0] = _UT('%'); write[1] = URI_FUNC(HexToLetter)(code >> 4); write[2] = URI_FUNC(HexToLetter)(code & 0x0f); write += 3; } prevWasCr = URI_FALSE; break; } read++; } } const URI_CHAR * URI_FUNC(UnescapeInPlace)(URI_CHAR * inout) { return URI_FUNC(UnescapeInPlaceEx)(inout, URI_FALSE, URI_BR_DONT_TOUCH); } const URI_CHAR * URI_FUNC(UnescapeInPlaceEx)(URI_CHAR * inout, UriBool plusToSpace, UriBreakConversion breakConversion) { URI_CHAR * read = inout; URI_CHAR * write = inout; UriBool prevWasCr = URI_FALSE; if (inout == NULL) { return NULL; } for (;;) { switch (read[0]) { case _UT('\0'): if (read > write) { write[0] = _UT('\0'); } return write; case _UT('%'): switch (read[1]) { case _UT('0'): case _UT('1'): case _UT('2'): case _UT('3'): case _UT('4'): case _UT('5'): case _UT('6'): case _UT('7'): case _UT('8'): case _UT('9'): case _UT('a'): case _UT('b'): case _UT('c'): case _UT('d'): case _UT('e'): case _UT('f'): case _UT('A'): case _UT('B'): case _UT('C'): case _UT('D'): case _UT('E'): case _UT('F'): switch (read[2]) { case _UT('0'): case _UT('1'): case _UT('2'): case _UT('3'): case _UT('4'): case _UT('5'): case _UT('6'): case _UT('7'): case _UT('8'): case _UT('9'): case _UT('a'): case _UT('b'): case _UT('c'): case _UT('d'): case _UT('e'): case _UT('f'): case _UT('A'): case _UT('B'): case _UT('C'): case _UT('D'): case _UT('E'): case _UT('F'): { /* Percent group found */ const unsigned char left = URI_FUNC(HexdigToInt)(read[1]); const unsigned char right = URI_FUNC(HexdigToInt)(read[2]); const int code = 16 * left + right; switch (code) { case 10: switch (breakConversion) { case URI_BR_TO_LF: if (!prevWasCr) { write[0] = (URI_CHAR)10; write++; } break; case URI_BR_TO_CRLF: if (!prevWasCr) { write[0] = (URI_CHAR)13; write[1] = (URI_CHAR)10; write += 2; } break; case URI_BR_TO_CR: if (!prevWasCr) { write[0] = (URI_CHAR)13; write++; } break; case URI_BR_DONT_TOUCH: default: write[0] = (URI_CHAR)10; write++; } prevWasCr = URI_FALSE; break; case 13: switch (breakConversion) { case URI_BR_TO_LF: write[0] = (URI_CHAR)10; write++; break; case URI_BR_TO_CRLF: write[0] = (URI_CHAR)13; write[1] = (URI_CHAR)10; write += 2; break; case URI_BR_TO_CR: write[0] = (URI_CHAR)13; write++; break; case URI_BR_DONT_TOUCH: default: write[0] = (URI_CHAR)13; write++; } prevWasCr = URI_TRUE; break; default: write[0] = (URI_CHAR)(code); write++; prevWasCr = URI_FALSE; } read += 3; } break; default: /* Copy two chars unmodified and */ /* look at this char again */ if (read > write) { write[0] = read[0]; write[1] = read[1]; } read += 2; write += 2; prevWasCr = URI_FALSE; } break; default: /* Copy one char unmodified and */ /* look at this char again */ if (read > write) { write[0] = read[0]; } read++; write++; prevWasCr = URI_FALSE; } break; case _UT('+'): if (plusToSpace) { /* Convert '+' to ' ' */ write[0] = _UT(' '); } else { /* Copy one char unmodified */ if (read > write) { write[0] = read[0]; } } read++; write++; prevWasCr = URI_FALSE; break; default: /* Copy one char unmodified */ if (read > write) { write[0] = read[0]; } read++; write++; prevWasCr = URI_FALSE; } } } #endif