/* * uriparser - RFC 3986 URI parsing library * * Copyright (C) 2007, Weijia Song * Copyright (C) 2007, Sebastian Pipping * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * * Neither the name of the nor the names of its * contributors may be used to endorse or promote products * derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * @file Uri.h * Holds the RFC 3986 %URI parser interface. * NOTE: This header includes itself twice. */ #if (defined(URI_PASS_ANSI) && !defined(URI_H_ANSI)) \ || (defined(URI_PASS_UNICODE) && !defined(URI_H_UNICODE)) \ || (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) /* What encodings are enabled? */ #include "UriDefsConfig.h" #if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) /* Include SELF twice */ # ifdef URI_ENABLE_ANSI # define URI_PASS_ANSI 1 # include "Uri.h" # undef URI_PASS_ANSI # endif # ifdef URI_ENABLE_UNICODE # define URI_PASS_UNICODE 1 # include "Uri.h" # undef URI_PASS_UNICODE # endif /* Only one pass for each encoding */ #elif (defined(URI_PASS_ANSI) && !defined(URI_H_ANSI) \ && defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \ && !defined(URI_H_UNICODE) && defined(URI_ENABLE_UNICODE)) # ifdef URI_PASS_ANSI # define URI_H_ANSI 1 # include "UriDefsAnsi.h" # else # define URI_H_UNICODE 1 # include "UriDefsUnicode.h" # endif #ifdef __cplusplus extern "C" { #endif #ifndef URI_DOXYGEN # include "UriBase.h" #endif /** * Specifies a range of characters within a string. * The range includes all characters from first * to one before afterLast. So if both are * non-NULL the difference is the length of the text range. * * @see UriUriA * @see UriPathSegmentA * @see UriHostDataA * @since 0.3.0 */ typedef struct URI_TYPE(TextRangeStruct) { const URI_CHAR * first; /**< Pointer to first character */ const URI_CHAR * afterLast; /**< Pointer to character after the last one still in */ } URI_TYPE(TextRange); /**< @copydoc UriTextRangeStructA */ /** * Represents a path segment within a %URI path. * More precisely it is a node in a linked * list of path segments. * * @see UriUriA * @since 0.3.0 */ typedef struct URI_TYPE(PathSegmentStruct) { URI_TYPE(TextRange) text; /**< Path segment name */ struct URI_TYPE(PathSegmentStruct) * next; /**< Pointer to the next path segment in the list, can be NULL if last already */ void * reserved; /**< Reserved to the parser */ } URI_TYPE(PathSegment); /**< @copydoc UriPathSegmentStructA */ /** * Holds structured host information. * This is either a IPv4, IPv6, plain * text for IPvFuture or all zero for * a registered name. * * @see UriUriA * @since 0.3.0 */ typedef struct URI_TYPE(HostDataStruct) { UriIp4 * ip4; /**< IPv4 address */ UriIp6 * ip6; /**< IPv6 address */ URI_TYPE(TextRange) ipFuture; /**< IPvFuture address */ } URI_TYPE(HostData); /**< @copydoc UriHostDataStructA */ /** * Represents an RFC 3986 %URI. * Missing components can be {NULL, NULL} ranges. * * @see uriParseUriA * @see uriFreeUriMembersA * @see UriParserStateA * @since 0.3.0 */ typedef struct URI_TYPE(UriStruct) { URI_TYPE(TextRange) scheme; /**< Scheme (e.g. "http") */ URI_TYPE(TextRange) userInfo; /**< User info (e.g. "user:pass") */ URI_TYPE(TextRange) hostText; /**< Host text (set for all hosts, excluding square brackets) */ URI_TYPE(HostData) hostData; /**< Structured host type specific data */ URI_TYPE(TextRange) portText; /**< Port (e.g. "80") */ URI_TYPE(PathSegment) * pathHead; /**< Head of a linked list of path segments */ URI_TYPE(PathSegment) * pathTail; /**< Tail of the list behind pathHead */ URI_TYPE(TextRange) query; /**< Query without leading "?" */ URI_TYPE(TextRange) fragment; /**< Query without leading "#" */ UriBool absolutePath; /**< Absolute path flag, distincting "a" and "/a" */ UriBool owner; /**< Memory owner flag */ void * reserved; /**< Reserved to the parser */ } URI_TYPE(Uri); /**< @copydoc UriUriStructA */ /** * Represents a state of the %URI parser. * Missing components can be NULL to reflect * a components absence. * * @see uriFreeUriMembersA * @since 0.3.0 */ typedef struct URI_TYPE(ParserStateStruct) { URI_TYPE(Uri) * uri; /**< Plug in the %URI structure to be filled while parsing here */ int errorCode; /**< Code identifying the occured error */ const URI_CHAR * errorPos; /**< Pointer to position in case of a syntax error */ void * reserved; /**< Reserved to the parser */ } URI_TYPE(ParserState); /**< @copydoc UriParserStateStructA */ /** * Represents a query element. * More precisely it is a node in a linked * list of query elements. * * @since 0.7.0 */ typedef struct URI_TYPE(QueryListStruct) { const URI_CHAR * key; /**< Key of the query element */ const URI_CHAR * value; /**< Value of the query element, can be NULL */ struct URI_TYPE(QueryListStruct) * next; /**< Pointer to the next key/value pair in the list, can be NULL if last already */ } URI_TYPE(QueryList); /**< @copydoc UriQueryListStructA */ /** * Parses a RFC 3986 URI. * * @param state INOUT: Parser state with set output %URI, must not be NULL * @param first IN: Pointer to the first character to parse, must not be NULL * @param afterLast IN: Pointer to the character after the last to parse, must not be NULL * @return 0 on success, error code otherwise * * @see uriParseUriA * @see uriToStringA * @since 0.3.0 */ int URI_FUNC(ParseUriEx)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast); /** * Parses a RFC 3986 %URI. * * @param state INOUT: Parser state with set output %URI, must not be NULL * @param text IN: Text to parse, must not be NULL * @return 0 on success, error code otherwise * * @see uriParseUriExA * @see uriToStringA * @since 0.3.0 */ int URI_FUNC(ParseUri)(URI_TYPE(ParserState) * state, const URI_CHAR * text); /** * Frees all memory associated with the members * of the %URI structure. Note that the structure * itself is not freed, only its members. * * @param uri INOUT: %URI structure whose members should be freed * * @since 0.3.0 */ void URI_FUNC(FreeUriMembers)(URI_TYPE(Uri) * uri); /** * Percent-encodes all unreserved characters from the input string and * writes the encoded version to the output string. * Be sure to allocate 3 times the space of the input buffer for * the output buffer for normalizeBreaks == URI_FALSE and 6 times * the space for normalizeBreaks == URI_TRUE * (since e.g. "\x0d" becomes "%0D%0A" in that case) * * @param inFirst IN: Pointer to first character of the input text * @param inAfterLast IN: Pointer after the last character of the input text * @param out OUT: Encoded text destination * @param spaceToPlus IN: Wether to convert ' ' to '+' or not * @param normalizeBreaks IN: Wether to convert CR and LF to CR-LF or not. * @return Position of terminator in output string * * @see uriEscapeA * @see uriUnescapeInPlaceExA * @since 0.5.2 */ URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst, const URI_CHAR * inAfterLast, URI_CHAR * out, UriBool spaceToPlus, UriBool normalizeBreaks); /** * Percent-encodes all unreserved characters from the input string and * writes the encoded version to the output string. * Be sure to allocate 3 times the space of the input buffer for * the output buffer for normalizeBreaks == URI_FALSE and 6 times * the space for normalizeBreaks == URI_TRUE * (since e.g. "\x0d" becomes "%0D%0A" in that case) * * @param in IN: Text source * @param out OUT: Encoded text destination * @param spaceToPlus IN: Wether to convert ' ' to '+' or not * @param normalizeBreaks IN: Wether to convert CR and LF to CR-LF or not. * @return Position of terminator in output string * * @see uriEscapeExA * @see uriUnescapeInPlaceA * @since 0.5.0 */ URI_CHAR * URI_FUNC(Escape)(const URI_CHAR * in, URI_CHAR * out, UriBool spaceToPlus, UriBool normalizeBreaks); /** * Unescapes percent-encoded groups in a given string. * E.g. "%20" will become " ". Unescaping is done in place. * The return value will be point to the new position * of the terminating zero. Use this value to get the new * length of the string. NULL is only returned if inout * is NULL. * * @param inout INOUT: Text to unescape/decode * @param plusToSpace IN: Whether to convert '+' to ' ' or not * @param breakConversion IN: Line break conversion mode * @return Pointer to new position of the terminating zero * * @see uriUnescapeInPlaceA * @see uriEscapeExA * @since 0.5.0 */ const URI_CHAR * URI_FUNC(UnescapeInPlaceEx)(URI_CHAR * inout, UriBool plusToSpace, UriBreakConversion breakConversion); /** * Unescapes percent-encoded groups in a given string. * E.g. "%20" will become " ". Unescaping is done in place. * The return value will be point to the new position * of the terminating zero. Use this value to get the new * length of the string. NULL is only returned if inout * is NULL. * * NOTE: '+' is not decoded to ' ' and line breaks are not converted. * Use the more advanced UnescapeInPlaceEx for that features instead. * * @param inout INOUT: Text to unescape/decode * @return Pointer to new position of the terminating zero * * @see uriUnescapeInPlaceExA * @see uriEscapeA * @since 0.3.0 */ const URI_CHAR * URI_FUNC(UnescapeInPlace)(URI_CHAR * inout); /** * Performs reference resolution as described in * section 5.2.2 of RFC 3986. * NOTE: On success you have to call uriFreeUriMembersA on \p absoluteDest manually later. * * @param absoluteDest OUT: Result %URI * @param relativeSource IN: Reference to resolve * @param absoluteBase IN: Base %URI to apply * @return Error code or 0 on success * * @see uriRemoveBaseUriA, uriAddBaseUriExA * @since 0.4.0 */ int URI_FUNC(AddBaseUri)(URI_TYPE(Uri) * absoluteDest, const URI_TYPE(Uri) * relativeSource, const URI_TYPE(Uri) * absoluteBase); /** * Performs reference resolution as described in * section 5.2.2 of RFC 3986. * NOTE: On success you have to call uriFreeUriMembersA on \p absoluteDest manually later. * * @param absoluteDest OUT: Result %URI * @param relativeSource IN: Reference to resolve * @param absoluteBase IN: Base %URI to apply * @param options IN: Configuration to apply * @return Error code or 0 on success * * @see uriRemoveBaseUriA, uriAddBaseUriA * @since 0.8.1 */ int URI_FUNC(AddBaseUriEx)(URI_TYPE(Uri) * absoluteDest, const URI_TYPE(Uri) * relativeSource, const URI_TYPE(Uri) * absoluteBase, UriResolutionOptions options); /** * Tries to make a relative %URI (a reference) from an * absolute %URI and a given base %URI. This can only work if * the absolute %URI shares scheme and authority with * the base %URI. If it does not the result will still be * an absolute URI (with scheme part if necessary). * NOTE: On success you have to call uriFreeUriMembersA on * \p dest manually later. * * @param dest OUT: Result %URI * @param absoluteSource IN: Absolute %URI to make relative * @param absoluteBase IN: Base %URI * @param domainRootMode IN: Create %URI with path relative to domain root * @return Error code or 0 on success * * @see uriAddBaseUriA, uriAddBaseUriExA * @since 0.5.2 */ int URI_FUNC(RemoveBaseUri)(URI_TYPE(Uri) * dest, const URI_TYPE(Uri) * absoluteSource, const URI_TYPE(Uri) * absoluteBase, UriBool domainRootMode); /** * Checks two URIs for equivalence. Comparison is done * the naive way, without prior normalization. * NOTE: Two NULL URIs are equal as well. * * @param a IN: First %URI * @param b IN: Second %URI * @return URI_TRUE when equal, URI_FAlSE else * * @since 0.4.0 */ UriBool URI_FUNC(EqualsUri)(const URI_TYPE(Uri) * a, const URI_TYPE(Uri) * b); /** * Calculates the number of characters needed to store the * string representation of the given %URI excluding the * terminator. * * @param uri IN: %URI to measure * @param charsRequired OUT: Length of the string representation in characters excluding terminator * @return Error code or 0 on success * * @see uriToStringA * @since 0.5.0 */ int URI_FUNC(ToStringCharsRequired)(const URI_TYPE(Uri) * uri, int * charsRequired); /** * Converts a %URI structure back to text as described in * section 5.3 of RFC 3986. * * @param dest OUT: Output destination * @param uri IN: %URI to convert * @param maxChars IN: Maximum number of characters to copy including terminator * @param charsWritten OUT: Number of characters written, can be lower than maxChars even if the %URI is too long! * @return Error code or 0 on success * * @see uriToStringCharsRequiredA * @since 0.4.0 */ int URI_FUNC(ToString)(URI_CHAR * dest, const URI_TYPE(Uri) * uri, int maxChars, int * charsWritten); /** * Determines the components of a %URI that are not normalized. * * @param uri IN: %URI to check * @return Normalization job mask * * @see uriNormalizeSyntaxA * @since 0.5.0 */ unsigned int URI_FUNC(NormalizeSyntaxMaskRequired)(const URI_TYPE(Uri) * uri); /** * Normalizes a %URI using a normalization mask. * The normalization mask decides what components are normalized. * * NOTE: If necessary the %URI becomes owner of all memory * behind the text pointed to. Text is duplicated in that case. * * @param uri INOUT: %URI to normalize * @param mask IN: Normalization mask * @return Error code or 0 on success * * @see uriNormalizeSyntaxA * @see uriNormalizeSyntaxMaskRequiredA * @since 0.5.0 */ int URI_FUNC(NormalizeSyntaxEx)(URI_TYPE(Uri) * uri, unsigned int mask); /** * Normalizes all components of a %URI. * * NOTE: If necessary the %URI becomes owner of all memory * behind the text pointed to. Text is duplicated in that case. * * @param uri INOUT: %URI to normalize * @return Error code or 0 on success * * @see uriNormalizeSyntaxExA * @see uriNormalizeSyntaxMaskRequiredA * @since 0.5.0 */ int URI_FUNC(NormalizeSyntax)(URI_TYPE(Uri) * uri); /** * Converts a Unix filename to a %URI string. * The destination buffer must be large enough to hold 7 + 3 * len(filename) + 1 * characters in case of an absolute filename or 3 * len(filename) + 1 in case * of a relative filename. * * EXAMPLE * Input: "/bin/bash" * Output: "file:///bin/bash" * * @param filename IN: Unix filename to convert * @param uriString OUT: Destination to write %URI string to * @return Error code or 0 on success * * @see uriUriStringToUnixFilenameA * @see uriWindowsFilenameToUriStringA * @since 0.5.2 */ int URI_FUNC(UnixFilenameToUriString)(const URI_CHAR * filename, URI_CHAR * uriString); /** * Converts a Windows filename to a %URI string. * The destination buffer must be large enough to hold 8 + 3 * len(filename) + 1 * characters in case of an absolute filename or 3 * len(filename) + 1 in case * of a relative filename. * * EXAMPLE * Input: "E:\\Documents and Settings" * Output: "file:///E:/Documents%20and%20Settings" * * @param filename IN: Windows filename to convert * @param uriString OUT: Destination to write %URI string to * @return Error code or 0 on success * * @see uriUriStringToWindowsFilenameA * @see uriUnixFilenameToUriStringA * @since 0.5.2 */ int URI_FUNC(WindowsFilenameToUriString)(const URI_CHAR * filename, URI_CHAR * uriString); /** * Extracts a Unix filename from a %URI string. * The destination buffer must be large enough to hold len(uriString) + 1 - 7 * characters in case of an absolute %URI or len(uriString) + 1 in case * of a relative %URI. * * @param uriString IN: %URI string to convert * @param filename OUT: Destination to write filename to * @return Error code or 0 on success * * @see uriUnixFilenameToUriStringA * @see uriUriStringToWindowsFilenameA * @since 0.5.2 */ int URI_FUNC(UriStringToUnixFilename)(const URI_CHAR * uriString, URI_CHAR * filename); /** * Extracts a Windows filename from a %URI string. * The destination buffer must be large enough to hold len(uriString) + 1 - 8 * characters in case of an absolute %URI or len(uriString) + 1 in case * of a relative %URI. * * @param uriString IN: %URI string to convert * @param filename OUT: Destination to write filename to * @return Error code or 0 on success * * @see uriWindowsFilenameToUriStringA * @see uriUriStringToUnixFilenameA * @since 0.5.2 */ int URI_FUNC(UriStringToWindowsFilename)(const URI_CHAR * uriString, URI_CHAR * filename); /** * Calculates the number of characters needed to store the * string representation of the given query list excluding the * terminator. It is assumed that line breaks are will be * normalized to "%0D%0A". * * @param queryList IN: Query list to measure * @param charsRequired OUT: Length of the string representation in characters excluding terminator * @return Error code or 0 on success * * @see uriComposeQueryCharsRequiredExA * @see uriComposeQueryA * @since 0.7.0 */ int URI_FUNC(ComposeQueryCharsRequired)(const URI_TYPE(QueryList) * queryList, int * charsRequired); /** * Calculates the number of characters needed to store the * string representation of the given query list excluding the * terminator. * * @param queryList IN: Query list to measure * @param charsRequired OUT: Length of the string representation in characters excluding terminator * @param spaceToPlus IN: Wether to convert ' ' to '+' or not * @param normalizeBreaks IN: Wether to convert CR and LF to CR-LF or not. * @return Error code or 0 on success * * @see uriComposeQueryCharsRequiredA * @see uriComposeQueryExA * @since 0.7.0 */ int URI_FUNC(ComposeQueryCharsRequiredEx)(const URI_TYPE(QueryList) * queryList, int * charsRequired, UriBool spaceToPlus, UriBool normalizeBreaks); /** * Converts a query list structure back to a query string. * The composed string does not start with '?', * on the way ' ' is converted to '+' and line breaks are * normalized to "%0D%0A". * * @param dest OUT: Output destination * @param queryList IN: Query list to convert * @param maxChars IN: Maximum number of characters to copy including terminator * @param charsWritten OUT: Number of characters written, can be lower than maxChars even if the query list is too long! * @return Error code or 0 on success * * @see uriComposeQueryExA * @see uriComposeQueryMallocA * @see uriComposeQueryCharsRequiredA * @see uriDissectQueryMallocA * @since 0.7.0 */ int URI_FUNC(ComposeQuery)(URI_CHAR * dest, const URI_TYPE(QueryList) * queryList, int maxChars, int * charsWritten); /** * Converts a query list structure back to a query string. * The composed string does not start with '?'. * * @param dest OUT: Output destination * @param queryList IN: Query list to convert * @param maxChars IN: Maximum number of characters to copy including terminator * @param charsWritten OUT: Number of characters written, can be lower than maxChars even if the query list is too long! * @param spaceToPlus IN: Wether to convert ' ' to '+' or not * @param normalizeBreaks IN: Wether to convert CR and LF to CR-LF or not. * @return Error code or 0 on success * * @see uriComposeQueryA * @see uriComposeQueryMallocExA * @see uriComposeQueryCharsRequiredExA * @see uriDissectQueryMallocExA * @since 0.7.0 */ int URI_FUNC(ComposeQueryEx)(URI_CHAR * dest, const URI_TYPE(QueryList) * queryList, int maxChars, int * charsWritten, UriBool spaceToPlus, UriBool normalizeBreaks); /** * Converts a query list structure back to a query string. * Memory for this string is allocated internally. * The composed string does not start with '?', * on the way ' ' is converted to '+' and line breaks are * normalized to "%0D%0A". * * @param dest OUT: Output destination * @param queryList IN: Query list to convert * @return Error code or 0 on success * * @see uriComposeQueryMallocExA * @see uriComposeQueryA * @see uriDissectQueryMallocA * @since 0.7.0 */ int URI_FUNC(ComposeQueryMalloc)(URI_CHAR ** dest, const URI_TYPE(QueryList) * queryList); /** * Converts a query list structure back to a query string. * Memory for this string is allocated internally. * The composed string does not start with '?'. * * @param dest OUT: Output destination * @param queryList IN: Query list to convert * @param spaceToPlus IN: Wether to convert ' ' to '+' or not * @param normalizeBreaks IN: Wether to convert CR and LF to CR-LF or not. * @return Error code or 0 on success * * @see uriComposeQueryMallocA * @see uriComposeQueryExA * @see uriDissectQueryMallocExA * @since 0.7.0 */ int URI_FUNC(ComposeQueryMallocEx)(URI_CHAR ** dest, const URI_TYPE(QueryList) * queryList, UriBool spaceToPlus, UriBool normalizeBreaks); /** * Constructs a query list from the raw query string of a given URI. * On the way '+' is converted back to ' ', line breaks are not modified. * * @param dest OUT: Output destination * @param itemCount OUT: Number of items found, can be NULL * @param first IN: Pointer to first character after '?' * @param afterLast IN: Pointer to character after the last one still in * @return Error code or 0 on success * * @see uriDissectQueryMallocExA * @see uriComposeQueryA * @see uriFreeQueryListA * @since 0.7.0 */ int URI_FUNC(DissectQueryMalloc)(URI_TYPE(QueryList) ** dest, int * itemCount, const URI_CHAR * first, const URI_CHAR * afterLast); /** * Constructs a query list from the raw query string of a given URI. * * @param dest OUT: Output destination * @param itemCount OUT: Number of items found, can be NULL * @param first IN: Pointer to first character after '?' * @param afterLast IN: Pointer to character after the last one still in * @param plusToSpace IN: Whether to convert '+' to ' ' or not * @param breakConversion IN: Line break conversion mode * @return Error code or 0 on success * * @see uriDissectQueryMallocA * @see uriComposeQueryExA * @see uriFreeQueryListA * @since 0.7.0 */ int URI_FUNC(DissectQueryMallocEx)(URI_TYPE(QueryList) ** dest, int * itemCount, const URI_CHAR * first, const URI_CHAR * afterLast, UriBool plusToSpace, UriBreakConversion breakConversion); /** * Frees all memory associated with the given query list. * The structure itself is freed as well. * * @param queryList INOUT: Query list to free * * @since 0.7.0 */ void URI_FUNC(FreeQueryList)(URI_TYPE(QueryList) * queryList); #ifdef __cplusplus } #endif #endif #endif