summaryrefslogtreecommitdiff
path: root/src/UriSetPath.c
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff-webhsoting.net>2026-05-08 11:53:45 +0200
committerJörg Frings-Fürst <debian@jff-webhsoting.net>2026-05-08 11:53:45 +0200
commitc3dce46c5f7cad6bc3cc91cc2c711ac089f25923 (patch)
treeabaac2b003b368aa5bde30a5b898a3f51e85db43 /src/UriSetPath.c
parentbc983f30186f3c204b1daea57b0057f93b74dde1 (diff)
New upstream version 1.0.1+dfsgupstream/1.0.1+dfsgupstream
Diffstat (limited to 'src/UriSetPath.c')
-rw-r--r--src/UriSetPath.c343
1 files changed, 343 insertions, 0 deletions
diff --git a/src/UriSetPath.c b/src/UriSetPath.c
new file mode 100644
index 0000000..17aef0f
--- /dev/null
+++ b/src/UriSetPath.c
@@ -0,0 +1,343 @@
+/*
+ * uriparser - RFC 3986 URI parsing library
+ *
+ * Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* What encodings are enabled? */
+#include <uriparser/UriDefsConfig.h>
+#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
+/* Include SELF twice */
+# ifdef URI_ENABLE_ANSI
+# define URI_PASS_ANSI 1
+# include "UriSetPath.c"
+# undef URI_PASS_ANSI
+# endif
+# ifdef URI_ENABLE_UNICODE
+# define URI_PASS_UNICODE 1
+# include "UriSetPath.c"
+# undef URI_PASS_UNICODE
+# endif
+#else
+# ifdef URI_PASS_ANSI
+# include <uriparser/UriDefsAnsi.h>
+# else
+# include <uriparser/UriDefsUnicode.h>
+# include <wchar.h>
+# endif
+
+# ifndef URI_DOXYGEN
+# include <uriparser/Uri.h>
+# include "UriCommon.h"
+# include "UriMemory.h"
+# include "UriSets.h"
+# endif
+
+# include <assert.h>
+
+UriBool URI_FUNC(IsWellFormedPath)(const URI_CHAR * first, const URI_CHAR * afterLast,
+ UriBool hasHost) {
+ if ((first == NULL) || (afterLast == NULL)) {
+ return URI_FALSE;
+ }
+
+ if ((hasHost == URI_TRUE) && ((first >= afterLast) || (first[0] != _UT('/')))) {
+ return URI_FALSE;
+ }
+
+ /* The related part of the grammar in RFC 3986 (section 3.3) reads:
+ *
+ * path = path-abempty ; begins with "/" or is empty
+ * / path-absolute ; begins with "/" but not "//"
+ * / path-noscheme ; begins with a non-colon segment
+ * / path-rootless ; begins with a segment
+ * / path-empty ; zero characters
+ *
+ * path-abempty = *( "/" segment )
+ * path-absolute = "/" [ segment-nz *( "/" segment ) ]
+ * path-noscheme = segment-nz-nc *( "/" segment )
+ * path-rootless = segment-nz *( "/" segment )
+ * path-empty = 0<pchar>
+ *
+ * segment = *pchar
+ * segment-nz = 1*pchar
+ * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+ * ; non-zero-length segment without any colon ":"
+ *
+ * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+ *
+ * The check below simplifies this to ..
+ *
+ * path = *( unreserved / pct-encoded / sub-delims / ":" / "@" / "/" )
+ *
+ * .. and leaves the rest to pre-return removal of ambiguity
+ * from cases like "path1:/path2" and "//path1/path2" inside SetPath.
+ */
+ while (first < afterLast) {
+ switch (first[0]) {
+ case URI_SET_PCHAR_WITHOUT_PERCENT(_UT):
+ break;
+
+ /* pct-encoded */
+ case _UT('%'):
+ if (afterLast - first < 3) {
+ return URI_FALSE;
+ }
+ switch (first[1]) {
+ case URI_SET_HEXDIG(_UT):
+ break;
+ default:
+ return URI_FALSE;
+ }
+ switch (first[2]) {
+ case URI_SET_HEXDIG(_UT):
+ break;
+ default:
+ return URI_FALSE;
+ }
+ first += 2;
+ break;
+
+ case _UT('/'):
+ break;
+
+ default:
+ return URI_FALSE;
+ }
+
+ first++;
+ }
+ return URI_TRUE;
+}
+
+static void URI_FUNC(DropEmptyFirstPathSegment)(URI_TYPE(Uri) * uri,
+ UriMemoryManager * memory) {
+ assert(uri != NULL);
+ assert(memory != NULL);
+ assert(uri->pathHead != NULL);
+ assert(uri->pathHead->text.first == uri->pathHead->text.afterLast);
+
+ URI_TYPE(PathSegment) * const originalHead = uri->pathHead;
+
+ uri->pathHead = uri->pathHead->next;
+
+ originalHead->text.first = NULL;
+ originalHead->text.afterLast = NULL;
+ memory->free(memory, originalHead);
+}
+
+/* URIs without a host encode a leading slash in the path as .absolutePath == URI_TRUE.
+ * This function checks for a leading empty path segment (that would have the "visual
+ * effect" of a leading slash during stringification) and transforms it into .absolutePath
+ * == URI_TRUE instead, if present. */
+static void URI_FUNC(TransformEmptyLeadPathSegments)(URI_TYPE(Uri) * uri,
+ UriMemoryManager * memory) {
+ assert(uri != NULL);
+ assert(memory != NULL);
+
+ if ((URI_FUNC(HasHost)(uri) == URI_TRUE) || (uri->pathHead == NULL)
+ || (uri->pathHead->text.first != uri->pathHead->text.afterLast)) {
+ return; /* i.e. nothing to do */
+ }
+
+ assert(uri->absolutePath == URI_FALSE);
+
+ URI_FUNC(DropEmptyFirstPathSegment)(uri, memory);
+
+ uri->absolutePath = URI_TRUE;
+}
+
+static int URI_FUNC(InternalSetPath)(URI_TYPE(Uri) * destUri, const URI_CHAR * first,
+ const URI_CHAR * afterLast,
+ UriMemoryManager * memory) {
+ assert(destUri != NULL);
+ assert(first != NULL);
+ assert(afterLast != NULL);
+ assert(memory != NULL);
+ assert(destUri->pathHead == NULL); /* set by SetPathMm right before */
+ assert(destUri->pathTail == NULL); /* set by SetPathMm right before */
+ assert(destUri->absolutePath == URI_FALSE); /* set by SetPathMm right before */
+
+ /* Skip the leading slash from target URIs with a host (so that we can
+ * transfer the path 1:1 further down) */
+ if (URI_FUNC(HasHost)(destUri) == URI_TRUE) {
+ /* NOTE: This is because SetPathMm called IsWellFormedPath earlier: */
+ assert((afterLast - first >= 1) && (first[0] == _UT('/')));
+ first++;
+ } else if (first == afterLast) {
+ /* This avoids (1) all the expensive but unnecessary work below
+ * and also (2) mis-encoding as single empty path segment
+ * that would need (detection and) repair further down otherwise */
+ return URI_SUCCESS;
+ }
+
+ /* Assemble "///.." input wrap for upcoming parse as a URI */
+ const size_t inputLenChars = (afterLast - first);
+ const size_t MAX_SIZE_T = (size_t)-1;
+
+ /* Detect overflow */
+ if (MAX_SIZE_T - inputLenChars < 3 + 1) {
+ return URI_ERROR_MALLOC;
+ }
+
+ const size_t candidateLenChars = 3 + inputLenChars;
+
+ /* Detect overflow */
+ if (MAX_SIZE_T / sizeof(URI_CHAR) < candidateLenChars + 1) {
+ return URI_ERROR_MALLOC;
+ }
+
+ URI_CHAR * const candidate =
+ memory->malloc(memory, (candidateLenChars + 1) * sizeof(URI_CHAR));
+
+ if (candidate == NULL) {
+ return URI_ERROR_MALLOC;
+ }
+
+ memcpy(candidate, _UT("///"), 3 * sizeof(URI_CHAR));
+ memcpy(candidate + 3, first, inputLenChars * sizeof(URI_CHAR));
+ candidate[3 + inputLenChars] = _UT('\0');
+
+ /* Parse as an RFC 3986 URI */
+ URI_TYPE(Uri) tempUri;
+ int res = URI_FUNC(ParseSingleUriExMm)(&tempUri, candidate,
+ candidate + candidateLenChars, NULL, memory);
+ assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX)
+ || (res == URI_ERROR_MALLOC));
+ if (res != URI_SUCCESS) {
+ memory->free(memory, candidate);
+ return res;
+ }
+
+ /* Nothing but path and host is supposed to be set by the parse, in
+ * particular not: */
+ assert(tempUri.query.first == NULL);
+ assert(tempUri.fragment.first == NULL);
+
+ /* Ensure that the strings in the path segments are all owned by
+ * `tempUri` because we want to (1) rip out and keep the full path
+ * list further down and (2) be able to free the parsed string
+ * (`candidate`) also. */
+ res = URI_FUNC(MakeOwnerMm)(&tempUri, memory);
+ assert((res == URI_SUCCESS) || (res == URI_ERROR_MALLOC));
+ if (res != URI_SUCCESS) {
+ URI_FUNC(FreeUriMembersMm)(&tempUri, memory);
+ memory->free(memory, candidate);
+ return res;
+ }
+ assert(tempUri.owner == URI_TRUE);
+
+ /* Move path to destination URI */
+ assert(tempUri.absolutePath == URI_FALSE); /* always URI_FALSE for URIs with host */
+ destUri->pathHead = tempUri.pathHead;
+ destUri->pathTail = tempUri.pathTail;
+ destUri->absolutePath = URI_FALSE;
+
+ tempUri.pathHead = NULL;
+ tempUri.pathTail = NULL;
+
+ /* Free the rest of the temp URI */
+ URI_FUNC(FreeUriMembersMm)(&tempUri, memory);
+ memory->free(memory, candidate);
+
+ /* Restore use of .absolutePath as needed */
+ URI_FUNC(TransformEmptyLeadPathSegments)(destUri, memory);
+
+ /* Disambiguate as needed */
+ UriBool success = URI_FUNC(FixPathNoScheme)(destUri, memory);
+ if (success == URI_FALSE) {
+ return URI_ERROR_MALLOC;
+ }
+
+ success = URI_FUNC(EnsureThatPathIsNotMistakenForHost)(destUri, memory);
+ if (success == URI_FALSE) {
+ return URI_ERROR_MALLOC;
+ }
+
+ return URI_SUCCESS;
+}
+
+int URI_FUNC(SetPathMm)(URI_TYPE(Uri) * uri, const URI_CHAR * first,
+ const URI_CHAR * afterLast, UriMemoryManager * memory) {
+ /* Input validation (before making any changes) */
+ if ((uri == NULL) || ((first == NULL) != (afterLast == NULL))) {
+ return URI_ERROR_NULL;
+ }
+
+ URI_CHECK_MEMORY_MANAGER(memory); /* may return */
+
+ if ((first != NULL)
+ && (URI_FUNC(IsWellFormedPath)(first, afterLast, URI_FUNC(HasHost)(uri))
+ == URI_FALSE)) {
+ return URI_ERROR_SYNTAX;
+ }
+
+ /* Clear old value */
+ int res = URI_FUNC(FreeUriPath)(uri, memory);
+ if (res != URI_SUCCESS) {
+ return res;
+ }
+ uri->absolutePath = URI_FALSE;
+
+ /* Already done? */
+ if (first == NULL) {
+ return URI_SUCCESS;
+ }
+
+ assert(first != NULL);
+
+ /* Ensure owned */
+ if (uri->owner == URI_FALSE) {
+ res = URI_FUNC(MakeOwnerMm)(uri, memory);
+ if (res != URI_SUCCESS) {
+ return res;
+ }
+ }
+
+ assert(uri->owner == URI_TRUE);
+
+ /* Apply new value */
+ res = URI_FUNC(InternalSetPath)(uri, first, afterLast, memory);
+ assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX)
+ || (res == URI_ERROR_MALLOC));
+ return res;
+}
+
+int URI_FUNC(SetPath)(URI_TYPE(Uri) * uri, const URI_CHAR * first,
+ const URI_CHAR * afterLast) {
+ return URI_FUNC(SetPathMm)(uri, first, afterLast, NULL);
+}
+
+#endif