/* * uriparser - RFC 3986 URI parsing library * * Copyright (C) 2025, Sebastian Pipping * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * 2. Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of * its contributors may be used to endorse or promote products * derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. */ /* What encodings are enabled? */ #include #if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) /* Include SELF twice */ # ifdef URI_ENABLE_ANSI # define URI_PASS_ANSI 1 # include "UriSetPath.c" # undef URI_PASS_ANSI # endif # ifdef URI_ENABLE_UNICODE # define URI_PASS_UNICODE 1 # include "UriSetPath.c" # undef URI_PASS_UNICODE # endif #else # ifdef URI_PASS_ANSI # include # else # include # include # endif # ifndef URI_DOXYGEN # include # include "UriCommon.h" # include "UriMemory.h" # include "UriSets.h" # endif # include UriBool URI_FUNC(IsWellFormedPath)(const URI_CHAR * first, const URI_CHAR * afterLast, UriBool hasHost) { if ((first == NULL) || (afterLast == NULL)) { return URI_FALSE; } if ((hasHost == URI_TRUE) && ((first >= afterLast) || (first[0] != _UT('/')))) { return URI_FALSE; } /* The related part of the grammar in RFC 3986 (section 3.3) reads: * * path = path-abempty ; begins with "/" or is empty * / path-absolute ; begins with "/" but not "//" * / path-noscheme ; begins with a non-colon segment * / path-rootless ; begins with a segment * / path-empty ; zero characters * * path-abempty = *( "/" segment ) * path-absolute = "/" [ segment-nz *( "/" segment ) ] * path-noscheme = segment-nz-nc *( "/" segment ) * path-rootless = segment-nz *( "/" segment ) * path-empty = 0 * * segment = *pchar * segment-nz = 1*pchar * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) * ; non-zero-length segment without any colon ":" * * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" * * The check below simplifies this to .. * * path = *( unreserved / pct-encoded / sub-delims / ":" / "@" / "/" ) * * .. and leaves the rest to pre-return removal of ambiguity * from cases like "path1:/path2" and "//path1/path2" inside SetPath. */ while (first < afterLast) { switch (first[0]) { case URI_SET_PCHAR_WITHOUT_PERCENT(_UT): break; /* pct-encoded */ case _UT('%'): if (afterLast - first < 3) { return URI_FALSE; } switch (first[1]) { case URI_SET_HEXDIG(_UT): break; default: return URI_FALSE; } switch (first[2]) { case URI_SET_HEXDIG(_UT): break; default: return URI_FALSE; } first += 2; break; case _UT('/'): break; default: return URI_FALSE; } first++; } return URI_TRUE; } static void URI_FUNC(DropEmptyFirstPathSegment)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) { assert(uri != NULL); assert(memory != NULL); assert(uri->pathHead != NULL); assert(uri->pathHead->text.first == uri->pathHead->text.afterLast); URI_TYPE(PathSegment) * const originalHead = uri->pathHead; uri->pathHead = uri->pathHead->next; originalHead->text.first = NULL; originalHead->text.afterLast = NULL; memory->free(memory, originalHead); } /* URIs without a host encode a leading slash in the path as .absolutePath == URI_TRUE. * This function checks for a leading empty path segment (that would have the "visual * effect" of a leading slash during stringification) and transforms it into .absolutePath * == URI_TRUE instead, if present. */ static void URI_FUNC(TransformEmptyLeadPathSegments)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) { assert(uri != NULL); assert(memory != NULL); if ((URI_FUNC(HasHost)(uri) == URI_TRUE) || (uri->pathHead == NULL) || (uri->pathHead->text.first != uri->pathHead->text.afterLast)) { return; /* i.e. nothing to do */ } assert(uri->absolutePath == URI_FALSE); URI_FUNC(DropEmptyFirstPathSegment)(uri, memory); uri->absolutePath = URI_TRUE; } static int URI_FUNC(InternalSetPath)(URI_TYPE(Uri) * destUri, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory) { assert(destUri != NULL); assert(first != NULL); assert(afterLast != NULL); assert(memory != NULL); assert(destUri->pathHead == NULL); /* set by SetPathMm right before */ assert(destUri->pathTail == NULL); /* set by SetPathMm right before */ assert(destUri->absolutePath == URI_FALSE); /* set by SetPathMm right before */ /* Skip the leading slash from target URIs with a host (so that we can * transfer the path 1:1 further down) */ if (URI_FUNC(HasHost)(destUri) == URI_TRUE) { /* NOTE: This is because SetPathMm called IsWellFormedPath earlier: */ assert((afterLast - first >= 1) && (first[0] == _UT('/'))); first++; } else if (first == afterLast) { /* This avoids (1) all the expensive but unnecessary work below * and also (2) mis-encoding as single empty path segment * that would need (detection and) repair further down otherwise */ return URI_SUCCESS; } /* Assemble "///.." input wrap for upcoming parse as a URI */ const size_t inputLenChars = (afterLast - first); const size_t MAX_SIZE_T = (size_t)-1; /* Detect overflow */ if (MAX_SIZE_T - inputLenChars < 3 + 1) { return URI_ERROR_MALLOC; } const size_t candidateLenChars = 3 + inputLenChars; /* Detect overflow */ if (MAX_SIZE_T / sizeof(URI_CHAR) < candidateLenChars + 1) { return URI_ERROR_MALLOC; } URI_CHAR * const candidate = memory->malloc(memory, (candidateLenChars + 1) * sizeof(URI_CHAR)); if (candidate == NULL) { return URI_ERROR_MALLOC; } memcpy(candidate, _UT("///"), 3 * sizeof(URI_CHAR)); memcpy(candidate + 3, first, inputLenChars * sizeof(URI_CHAR)); candidate[3 + inputLenChars] = _UT('\0'); /* Parse as an RFC 3986 URI */ URI_TYPE(Uri) tempUri; int res = URI_FUNC(ParseSingleUriExMm)(&tempUri, candidate, candidate + candidateLenChars, NULL, memory); assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX) || (res == URI_ERROR_MALLOC)); if (res != URI_SUCCESS) { memory->free(memory, candidate); return res; } /* Nothing but path and host is supposed to be set by the parse, in * particular not: */ assert(tempUri.query.first == NULL); assert(tempUri.fragment.first == NULL); /* Ensure that the strings in the path segments are all owned by * `tempUri` because we want to (1) rip out and keep the full path * list further down and (2) be able to free the parsed string * (`candidate`) also. */ res = URI_FUNC(MakeOwnerMm)(&tempUri, memory); assert((res == URI_SUCCESS) || (res == URI_ERROR_MALLOC)); if (res != URI_SUCCESS) { URI_FUNC(FreeUriMembersMm)(&tempUri, memory); memory->free(memory, candidate); return res; } assert(tempUri.owner == URI_TRUE); /* Move path to destination URI */ assert(tempUri.absolutePath == URI_FALSE); /* always URI_FALSE for URIs with host */ destUri->pathHead = tempUri.pathHead; destUri->pathTail = tempUri.pathTail; destUri->absolutePath = URI_FALSE; tempUri.pathHead = NULL; tempUri.pathTail = NULL; /* Free the rest of the temp URI */ URI_FUNC(FreeUriMembersMm)(&tempUri, memory); memory->free(memory, candidate); /* Restore use of .absolutePath as needed */ URI_FUNC(TransformEmptyLeadPathSegments)(destUri, memory); /* Disambiguate as needed */ UriBool success = URI_FUNC(FixPathNoScheme)(destUri, memory); if (success == URI_FALSE) { return URI_ERROR_MALLOC; } success = URI_FUNC(EnsureThatPathIsNotMistakenForHost)(destUri, memory); if (success == URI_FALSE) { return URI_ERROR_MALLOC; } return URI_SUCCESS; } int URI_FUNC(SetPathMm)(URI_TYPE(Uri) * uri, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory) { /* Input validation (before making any changes) */ if ((uri == NULL) || ((first == NULL) != (afterLast == NULL))) { return URI_ERROR_NULL; } URI_CHECK_MEMORY_MANAGER(memory); /* may return */ if ((first != NULL) && (URI_FUNC(IsWellFormedPath)(first, afterLast, URI_FUNC(HasHost)(uri)) == URI_FALSE)) { return URI_ERROR_SYNTAX; } /* Clear old value */ int res = URI_FUNC(FreeUriPath)(uri, memory); if (res != URI_SUCCESS) { return res; } uri->absolutePath = URI_FALSE; /* Already done? */ if (first == NULL) { return URI_SUCCESS; } assert(first != NULL); /* Ensure owned */ if (uri->owner == URI_FALSE) { res = URI_FUNC(MakeOwnerMm)(uri, memory); if (res != URI_SUCCESS) { return res; } } assert(uri->owner == URI_TRUE); /* Apply new value */ res = URI_FUNC(InternalSetPath)(uri, first, afterLast, memory); assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX) || (res == URI_ERROR_MALLOC)); return res; } int URI_FUNC(SetPath)(URI_TYPE(Uri) * uri, const URI_CHAR * first, const URI_CHAR * afterLast) { return URI_FUNC(SetPathMm)(uri, first, afterLast, NULL); } #endif