diff options
Diffstat (limited to 'include/uriparser/Uri.h')
-rw-r--r-- | include/uriparser/Uri.h | 32 |
1 files changed, 27 insertions, 5 deletions
diff --git a/include/uriparser/Uri.h b/include/uriparser/Uri.h index d2c8610..b80ac6c 100644 --- a/include/uriparser/Uri.h +++ b/include/uriparser/Uri.h @@ -1,4 +1,4 @@ -/* 4bf720e0ca97527a28e4c30f1c35b36a0b5f2697265c5ddc81080eaab4344ef2 (0.9.7+) +/* e8e2c75d033ddfe256fe87c3fd5a330a6f2c9cbb376ebd83a1b3263e804c766a (0.9.8+) * * uriparser - RFC 3986 URI parsing library * @@ -352,10 +352,19 @@ URI_PUBLIC int URI_FUNC(FreeUriMembersMm)(URI_TYPE(Uri) * uri, /** * Percent-encodes all unreserved characters from the input string and * writes the encoded version to the output string. - * Be sure to allocate <b>3 times</b> the space of the input buffer for + * + * NOTE: Be sure to allocate <b>3 times</b> the space of the input buffer for * the output buffer for <c>normalizeBreaks == URI_FALSE</c> and <b>6 times</b> * the space for <c>normalizeBreaks == URI_TRUE</c> - * (since e.g. "\x0d" becomes "%0D%0A" in that case) + * (since e.g. "\x0d" becomes "%0D%0A" in that case). + * + * NOTE: The implementation treats (both <c>char</c> and) <c>wchar_t</c> units + * as code point integers, which works well for code points <c>U+0001</c> to <c>U+00ff</c> + * in host-native endianness but nothing more; + * in particular, using <c>uriEscapeExW</c> with arbitrary Unicode input will + * not produce healthy results. + * Passing UTF-8 input to <c>uriEscapeExA</c> may be useful in some scenarios. + * Keep in mind that uriparser is about %URI (RFC 3986) not %IRI (RFC 3987). * * @param inFirst <b>IN</b>: Pointer to first character of the input text * @param inAfterLast <b>IN</b>: Pointer after the last character of the input text @@ -377,10 +386,19 @@ URI_PUBLIC URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst, /** * Percent-encodes all unreserved characters from the input string and * writes the encoded version to the output string. - * Be sure to allocate <b>3 times</b> the space of the input buffer for + * + * NOTE: Be sure to allocate <b>3 times</b> the space of the input buffer for * the output buffer for <c>normalizeBreaks == URI_FALSE</c> and <b>6 times</b> * the space for <c>normalizeBreaks == URI_TRUE</c> - * (since e.g. "\x0d" becomes "%0D%0A" in that case) + * (since e.g. "\x0d" becomes "%0D%0A" in that case). + * + * NOTE: The implementation treats (both <c>char</c> and) <c>wchar_t</c> units + * as code point integers, which works well for code points <c>U+0001</c> to <c>U+00ff</c> + * in host-native endianness but nothing more; + * in particular, using <c>uriEscapeW</c> with arbitrary Unicode input will + * not produce healthy results. + * Passing UTF-8 input to <c>uriEscapeA</c> may be useful in some scenarios. + * Keep in mind that uriparser is about %URI (RFC 3986) not %IRI (RFC 3987). * * @param in <b>IN</b>: Text source * @param out <b>OUT</b>: Encoded text destination @@ -608,6 +626,10 @@ URI_PUBLIC int URI_FUNC(ToStringCharsRequired)(const URI_TYPE(Uri) * uri, * Converts a %URI structure back to text as described in * <a href="http://tools.ietf.org/html/rfc3986#section-5.3">section 5.3 of RFC 3986</a>. * + * NOTE: Scheme-based normalization + * (<a href="http://tools.ietf.org/html/rfc3986#section-6.2.3">section 6.2.3 of RFC 3986</a>) + * is not applied and is considered a responsibility of the application using uriparser. + * * @param dest <b>OUT</b>: Output destination * @param uri <b>IN</b>: %URI to convert * @param maxChars <b>IN</b>: Maximum number of characters to copy <b>including</b> terminator |