/* * uriparser - RFC 3986 URI parsing library * * Copyright (C) 2007, Weijia Song * Copyright (C) 2007, Sebastian Pipping * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * * Neither the name of the nor the names of its * contributors may be used to endorse or promote products * derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. */ /* What encodings are enabled? */ #include #if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) /* Include SELF twice */ # ifdef URI_ENABLE_ANSI # define URI_PASS_ANSI 1 # include "UriCommon.c" # undef URI_PASS_ANSI # endif # ifdef URI_ENABLE_UNICODE # define URI_PASS_UNICODE 1 # include "UriCommon.c" # undef URI_PASS_UNICODE # endif #else # ifdef URI_PASS_ANSI # include # else # include # include # endif #ifndef URI_DOXYGEN # include # include "UriCommon.h" #endif /*extern*/ const URI_CHAR * const URI_FUNC(SafeToPointTo) = _UT("X"); /*extern*/ const URI_CHAR * const URI_FUNC(ConstPwd) = _UT("."); /*extern*/ const URI_CHAR * const URI_FUNC(ConstParent) = _UT(".."); void URI_FUNC(ResetUri)(URI_TYPE(Uri) * uri) { memset(uri, 0, sizeof(URI_TYPE(Uri))); } /* Compares two text ranges for equal text content */ int URI_FUNC(CompareRange)( const URI_TYPE(TextRange) * a, const URI_TYPE(TextRange) * b) { int diff; /* NOTE: Both NULL means equal! */ if ((a == NULL) || (b == NULL)) { return ((a == NULL) ? 0 : 1) - ((b == NULL) ? 0 : 1); } /* NOTE: Both NULL means equal! */ if ((a->first == NULL) || (b->first == NULL)) { return ((a->first == NULL) ? 0 : 1) - ((b->first == NULL) ? 0 : 1); } diff = ((int)(a->afterLast - a->first) - (int)(b->afterLast - b->first)); if (diff > 0) { return 1; } else if (diff < 0) { return -1; } diff = URI_STRNCMP(a->first, b->first, (a->afterLast - a->first)); if (diff > 0) { return 1; } else if (diff < 0) { return -1; } return diff; } /* Properly removes "." and ".." path segments */ UriBool URI_FUNC(RemoveDotSegments)(URI_TYPE(Uri) * uri, UriBool relative) { if (uri == NULL) { return URI_TRUE; } return URI_FUNC(RemoveDotSegmentsEx)(uri, relative, uri->owner); } UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri, UriBool relative, UriBool pathOwned) { URI_TYPE(PathSegment) * walker; if ((uri == NULL) || (uri->pathHead == NULL)) { return URI_TRUE; } walker = uri->pathHead; walker->reserved = NULL; /* Prev pointer */ do { UriBool removeSegment = URI_FALSE; int len = (int)(walker->text.afterLast - walker->text.first); switch (len) { case 1: if ((walker->text.first)[0] == _UT('.')) { /* "." segment -> remove if not essential */ URI_TYPE(PathSegment) * const prev = walker->reserved; URI_TYPE(PathSegment) * const nextBackup = walker->next; /* Is this dot segment essential? */ removeSegment = URI_TRUE; if (relative && (walker == uri->pathHead) && (walker->next != NULL)) { const URI_CHAR * ch = walker->next->text.first; for (; ch < walker->next->text.afterLast; ch++) { if (*ch == _UT(':')) { removeSegment = URI_FALSE; break; } } } if (removeSegment) { /* Last segment? */ if (walker->next != NULL) { /* Not last segment */ walker->next->reserved = prev; if (prev == NULL) { /* First but not last segment */ uri->pathHead = walker->next; } else { /* Middle segment */ prev->next = walker->next; } if (pathOwned && (walker->text.first != walker->text.afterLast)) { free((URI_CHAR *)walker->text.first); } free(walker); } else { /* Last segment */ if (pathOwned && (walker->text.first != walker->text.afterLast)) { free((URI_CHAR *)walker->text.first); } if (prev == NULL) { /* Last and first */ if (URI_FUNC(IsHostSet)(uri)) { /* Replace "." with empty segment to represent trailing slash */ walker->text.first = URI_FUNC(SafeToPointTo); walker->text.afterLast = URI_FUNC(SafeToPointTo); } else { free(walker); uri->pathHead = NULL; uri->pathTail = NULL; } } else { /* Last but not first, replace "." with empty segment to represent trailing slash */ walker->text.first = URI_FUNC(SafeToPointTo); walker->text.afterLast = URI_FUNC(SafeToPointTo); } } walker = nextBackup; } } break; case 2: if (((walker->text.first)[0] == _UT('.')) && ((walker->text.first)[1] == _UT('.'))) { /* Path ".." -> remove this and the previous segment */ URI_TYPE(PathSegment) * const prev = walker->reserved; URI_TYPE(PathSegment) * prevPrev; URI_TYPE(PathSegment) * const nextBackup = walker->next; removeSegment = URI_TRUE; if (relative) { if (prev == NULL) { removeSegment = URI_FALSE; } else if ((prev != NULL) && ((prev->text.afterLast - prev->text.first) == 2) && ((prev->text.first)[0] == _UT('.')) && ((prev->text.first)[1] == _UT('.'))) { removeSegment = URI_FALSE; } } if (removeSegment) { if (prev != NULL) { /* Not first segment */ prevPrev = prev->reserved; if (prevPrev != NULL) { /* Not even prev is the first one */ prevPrev->next = walker->next; if (walker->next != NULL) { walker->next->reserved = prevPrev; } else { /* Last segment -> insert "" segment to represent trailing slash, update tail */ URI_TYPE(PathSegment) * const segment = malloc(1 * sizeof(URI_TYPE(PathSegment))); if (segment == NULL) { if (pathOwned && (walker->text.first != walker->text.afterLast)) { free((URI_CHAR *)walker->text.first); } free(walker); if (pathOwned && (prev->text.first != prev->text.afterLast)) { free((URI_CHAR *)prev->text.first); } free(prev); return URI_FALSE; /* Raises malloc error */ } memset(segment, 0, sizeof(URI_TYPE(PathSegment))); segment->text.first = URI_FUNC(SafeToPointTo); segment->text.afterLast = URI_FUNC(SafeToPointTo); prevPrev->next = segment; uri->pathTail = segment; } if (pathOwned && (walker->text.first != walker->text.afterLast)) { free((URI_CHAR *)walker->text.first); } free(walker); if (pathOwned && (prev->text.first != prev->text.afterLast)) { free((URI_CHAR *)prev->text.first); } free(prev); walker = nextBackup; } else { /* Prev is the first segment */ if (walker->next != NULL) { uri->pathHead = walker->next; walker->next->reserved = NULL; if (pathOwned && (walker->text.first != walker->text.afterLast)) { free((URI_CHAR *)walker->text.first); } free(walker); } else { /* Re-use segment for "" path segment to represent trailing slash, update tail */ URI_TYPE(PathSegment) * const segment = walker; if (pathOwned && (segment->text.first != segment->text.afterLast)) { free((URI_CHAR *)segment->text.first); } segment->text.first = URI_FUNC(SafeToPointTo); segment->text.afterLast = URI_FUNC(SafeToPointTo); uri->pathHead = segment; uri->pathTail = segment; } if (pathOwned && (prev->text.first != prev->text.afterLast)) { free((URI_CHAR *)prev->text.first); } free(prev); walker = nextBackup; } } else { URI_TYPE(PathSegment) * const anotherNextBackup = walker->next; /* First segment -> update head pointer */ uri->pathHead = walker->next; if (walker->next != NULL) { walker->next->reserved = NULL; } else { /* Last segment -> update tail */ uri->pathTail = NULL; } if (pathOwned && (walker->text.first != walker->text.afterLast)) { free((URI_CHAR *)walker->text.first); } free(walker); walker = anotherNextBackup; } } } break; } if (!removeSegment) { if (walker->next != NULL) { walker->next->reserved = walker; } else { /* Last segment -> update tail */ uri->pathTail = walker; } walker = walker->next; } } while (walker != NULL); return URI_TRUE; } /* Properly removes "." and ".." path segments */ UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri) { const UriBool ABSOLUTE = URI_FALSE; return URI_FUNC(RemoveDotSegments)(uri, ABSOLUTE); } unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig) { switch (hexdig) { case _UT('0'): case _UT('1'): case _UT('2'): case _UT('3'): case _UT('4'): case _UT('5'): case _UT('6'): case _UT('7'): case _UT('8'): case _UT('9'): return (unsigned char)(9 + hexdig - _UT('9')); case _UT('a'): case _UT('b'): case _UT('c'): case _UT('d'): case _UT('e'): case _UT('f'): return (unsigned char)(15 + hexdig - _UT('f')); case _UT('A'): case _UT('B'): case _UT('C'): case _UT('D'): case _UT('E'): case _UT('F'): return (unsigned char)(15 + hexdig - _UT('F')); default: return 0; } } URI_CHAR URI_FUNC(HexToLetter)(unsigned int value) { /* Uppercase recommended in section 2.1. of RFC 3986 * * http://tools.ietf.org/html/rfc3986#section-2.1 */ return URI_FUNC(HexToLetterEx)(value, URI_TRUE); } URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase) { switch (value) { case 0: return _UT('0'); case 1: return _UT('1'); case 2: return _UT('2'); case 3: return _UT('3'); case 4: return _UT('4'); case 5: return _UT('5'); case 6: return _UT('6'); case 7: return _UT('7'); case 8: return _UT('8'); case 9: return _UT('9'); case 10: return (uppercase == URI_TRUE) ? _UT('A') : _UT('a'); case 11: return (uppercase == URI_TRUE) ? _UT('B') : _UT('b'); case 12: return (uppercase == URI_TRUE) ? _UT('C') : _UT('c'); case 13: return (uppercase == URI_TRUE) ? _UT('D') : _UT('d'); case 14: return (uppercase == URI_TRUE) ? _UT('E') : _UT('e'); default: return (uppercase == URI_TRUE) ? _UT('F') : _UT('f'); } } /* Checks if a URI has the host component set. */ UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri) { return (uri != NULL) && ((uri->hostText.first != NULL) || (uri->hostData.ip4 != NULL) || (uri->hostData.ip6 != NULL) || (uri->hostData.ipFuture.first != NULL) ); } /* Copies the path segment list from one URI to another. */ UriBool URI_FUNC(CopyPath)(URI_TYPE(Uri) * dest, const URI_TYPE(Uri) * source) { if (source->pathHead == NULL) { /* No path component */ dest->pathHead = NULL; dest->pathTail = NULL; } else { /* Copy list but not the text contained */ URI_TYPE(PathSegment) * sourceWalker = source->pathHead; URI_TYPE(PathSegment) * destPrev = NULL; do { URI_TYPE(PathSegment) * cur = malloc(sizeof(URI_TYPE(PathSegment))); if (cur == NULL) { /* Fix broken list */ if (destPrev != NULL) { destPrev->next = NULL; } return URI_FALSE; /* Raises malloc error */ } /* From this functions usage we know that * * the dest URI cannot be uri->owner */ cur->text = sourceWalker->text; if (destPrev == NULL) { /* First segment ever */ dest->pathHead = cur; } else { destPrev->next = cur; } destPrev = cur; sourceWalker = sourceWalker->next; } while (sourceWalker != NULL); dest->pathTail = destPrev; dest->pathTail->next = NULL; } dest->absolutePath = source->absolutePath; return URI_TRUE; } /* Copies the authority part of an URI over to another. */ UriBool URI_FUNC(CopyAuthority)(URI_TYPE(Uri) * dest, const URI_TYPE(Uri) * source) { /* From this functions usage we know that * * the dest URI cannot be uri->owner */ /* Copy userInfo */ dest->userInfo = source->userInfo; /* Copy hostText */ dest->hostText = source->hostText; /* Copy hostData */ if (source->hostData.ip4 != NULL) { dest->hostData.ip4 = malloc(sizeof(UriIp4)); if (dest->hostData.ip4 == NULL) { return URI_FALSE; /* Raises malloc error */ } *(dest->hostData.ip4) = *(source->hostData.ip4); dest->hostData.ip6 = NULL; dest->hostData.ipFuture.first = NULL; dest->hostData.ipFuture.afterLast = NULL; } else if (source->hostData.ip6 != NULL) { dest->hostData.ip4 = NULL; dest->hostData.ip6 = malloc(sizeof(UriIp6)); if (dest->hostData.ip6 == NULL) { return URI_FALSE; /* Raises malloc error */ } *(dest->hostData.ip6) = *(source->hostData.ip6); dest->hostData.ipFuture.first = NULL; dest->hostData.ipFuture.afterLast = NULL; } else { dest->hostData.ip4 = NULL; dest->hostData.ip6 = NULL; dest->hostData.ipFuture = source->hostData.ipFuture; } /* Copy portText */ dest->portText = source->portText; return URI_TRUE; } UriBool URI_FUNC(FixAmbiguity)(URI_TYPE(Uri) * uri) { URI_TYPE(PathSegment) * segment; if ( /* Case 1: absolute path, empty first segment */ (uri->absolutePath && (uri->pathHead != NULL) && (uri->pathHead->text.afterLast == uri->pathHead->text.first)) /* Case 2: relative path, empty first and second segment */ || (!uri->absolutePath && (uri->pathHead != NULL) && (uri->pathHead->next != NULL) && (uri->pathHead->text.afterLast == uri->pathHead->text.first) && (uri->pathHead->next->text.afterLast == uri->pathHead->next->text.first))) { /* NOOP */ } else { return URI_TRUE; } segment = malloc(1 * sizeof(URI_TYPE(PathSegment))); if (segment == NULL) { return URI_FALSE; /* Raises malloc error */ } /* Insert "." segment in front */ segment->next = uri->pathHead; segment->text.first = URI_FUNC(ConstPwd); segment->text.afterLast = URI_FUNC(ConstPwd) + 1; uri->pathHead = segment; return URI_TRUE; } void URI_FUNC(FixEmptyTrailSegment)(URI_TYPE(Uri) * uri) { /* Fix path if only one empty segment */ if (!uri->absolutePath && !URI_FUNC(IsHostSet)(uri) && (uri->pathHead != NULL) && (uri->pathHead->next == NULL) && (uri->pathHead->text.first == uri->pathHead->text.afterLast)) { free(uri->pathHead); uri->pathHead = NULL; uri->pathTail = NULL; } } #endif