From bc983f30186f3c204b1daea57b0057f93b74dde1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Sun, 30 Jun 2024 16:13:02 +0200 Subject: New upstream version 0.9.8+dfsg --- include/uriparser/Uri.h | 32 +++++++++++++++++++++++++++----- include/uriparser/UriBase.h | 2 +- 2 files changed, 28 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/uriparser/Uri.h b/include/uriparser/Uri.h index d2c8610..b80ac6c 100644 --- a/include/uriparser/Uri.h +++ b/include/uriparser/Uri.h @@ -1,4 +1,4 @@ -/* 4bf720e0ca97527a28e4c30f1c35b36a0b5f2697265c5ddc81080eaab4344ef2 (0.9.7+) +/* e8e2c75d033ddfe256fe87c3fd5a330a6f2c9cbb376ebd83a1b3263e804c766a (0.9.8+) * * uriparser - RFC 3986 URI parsing library * @@ -352,10 +352,19 @@ URI_PUBLIC int URI_FUNC(FreeUriMembersMm)(URI_TYPE(Uri) * uri, /** * Percent-encodes all unreserved characters from the input string and * writes the encoded version to the output string. - * Be sure to allocate 3 times the space of the input buffer for + * + * NOTE: Be sure to allocate 3 times the space of the input buffer for * the output buffer for normalizeBreaks == URI_FALSE and 6 times * the space for normalizeBreaks == URI_TRUE - * (since e.g. "\x0d" becomes "%0D%0A" in that case) + * (since e.g. "\x0d" becomes "%0D%0A" in that case). + * + * NOTE: The implementation treats (both char and) wchar_t units + * as code point integers, which works well for code points U+0001 to U+00ff + * in host-native endianness but nothing more; + * in particular, using uriEscapeExW with arbitrary Unicode input will + * not produce healthy results. + * Passing UTF-8 input to uriEscapeExA may be useful in some scenarios. + * Keep in mind that uriparser is about %URI (RFC 3986) not %IRI (RFC 3987). * * @param inFirst IN: Pointer to first character of the input text * @param inAfterLast IN: Pointer after the last character of the input text @@ -377,10 +386,19 @@ URI_PUBLIC URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst, /** * Percent-encodes all unreserved characters from the input string and * writes the encoded version to the output string. - * Be sure to allocate 3 times the space of the input buffer for + * + * NOTE: Be sure to allocate 3 times the space of the input buffer for * the output buffer for normalizeBreaks == URI_FALSE and 6 times * the space for normalizeBreaks == URI_TRUE - * (since e.g. "\x0d" becomes "%0D%0A" in that case) + * (since e.g. "\x0d" becomes "%0D%0A" in that case). + * + * NOTE: The implementation treats (both char and) wchar_t units + * as code point integers, which works well for code points U+0001 to U+00ff + * in host-native endianness but nothing more; + * in particular, using uriEscapeW with arbitrary Unicode input will + * not produce healthy results. + * Passing UTF-8 input to uriEscapeA may be useful in some scenarios. + * Keep in mind that uriparser is about %URI (RFC 3986) not %IRI (RFC 3987). * * @param in IN: Text source * @param out OUT: Encoded text destination @@ -608,6 +626,10 @@ URI_PUBLIC int URI_FUNC(ToStringCharsRequired)(const URI_TYPE(Uri) * uri, * Converts a %URI structure back to text as described in * section 5.3 of RFC 3986. * + * NOTE: Scheme-based normalization + * (section 6.2.3 of RFC 3986) + * is not applied and is considered a responsibility of the application using uriparser. + * * @param dest OUT: Output destination * @param uri IN: %URI to convert * @param maxChars IN: Maximum number of characters to copy including terminator diff --git a/include/uriparser/UriBase.h b/include/uriparser/UriBase.h index 5216b1d..dc3883e 100644 --- a/include/uriparser/UriBase.h +++ b/include/uriparser/UriBase.h @@ -55,7 +55,7 @@ /* Version */ #define URI_VER_MAJOR 0 #define URI_VER_MINOR 9 -#define URI_VER_RELEASE 7 +#define URI_VER_RELEASE 8 #define URI_VER_SUFFIX_ANSI "" #define URI_VER_SUFFIX_UNICODE URI_ANSI_TO_UNICODE(URI_VER_SUFFIX_ANSI) -- cgit v1.2.3