summaryrefslogtreecommitdiff
path: root/lib/striconveh.c
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff.email>2022-01-08 11:53:52 +0100
committerJörg Frings-Fürst <debian@jff.email>2022-01-08 11:53:52 +0100
commitfa838e76139763f902c7d27cb9e1d393ed6a15e4 (patch)
tree7d0ae09775ea950056193eaa2ca93844299d46f1 /lib/striconveh.c
parentc78359d9542c86b972aac373efcf7bc7a8a560e5 (diff)
parent2959e59fab3bab834368adefd90bd4b1b094366b (diff)
Merge branch 'feature/upstream' into develop
Diffstat (limited to 'lib/striconveh.c')
-rw-r--r--lib/striconveh.c178
1 files changed, 103 insertions, 75 deletions
diff --git a/lib/striconveh.c b/lib/striconveh.c
index 45d76f88..5b60a7e0 100644
--- a/lib/striconveh.c
+++ b/lib/striconveh.c
@@ -1,27 +1,18 @@
/* Character set conversion with error handling.
- Copyright (C) 2001-2018 Free Software Foundation, Inc.
+ Copyright (C) 2001-2022 Free Software Foundation, Inc.
Written by Bruno Haible and Simon Josefsson.
- This program is free software: you can redistribute it and/or
- modify it under the terms of either:
+ This file is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as
+ published by the Free Software Foundation; either version 2.1 of the
+ License, or (at your option) any later version.
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
- or
-
- * the GNU General Public License as published by the Free
- Software Foundation; either version 2 of the License, or (at your
- option) any later version.
-
- or both in parallel, as here.
- This program is distributed in the hope that it will be useful,
+ This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+ GNU Lesser General Public License for more details.
- You should have received a copy of the GNU General Public License
+ You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
@@ -82,7 +73,7 @@ iconveh_open (const char *to_codeset, const char *from_codeset, iconveh_t *cdp)
{
int saved_errno = errno;
if (cd != (iconv_t)(-1))
- iconv_close (cdp->cd);
+ iconv_close (cd);
errno = saved_errno;
return -1;
}
@@ -466,13 +457,18 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
if (cd2 == (iconv_t)(-1))
{
/* TO_CODESET is UTF-8. */
- /* Error handling can produce up to 1 byte of output. */
- if (length + 1 + extra_alloc > allocated)
+ /* Error handling can produce up to 1 or 3 bytes of
+ output. */
+ size_t extra_need =
+ (handler == iconveh_replacement_character ? 3 : 1);
+ if (length + extra_need + extra_alloc > allocated)
{
char *memory;
allocated = 2 * allocated;
- if (length + 1 + extra_alloc > allocated)
+ if (length + extra_need + extra_alloc > allocated)
+ allocated = 2 * allocated;
+ if (length + extra_need + extra_alloc > allocated)
abort ();
if (result == initial_result)
memory = (char *) malloc (allocated);
@@ -491,7 +487,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
grow = false;
}
/* The input is invalid in FROM_CODESET. Eat up one byte
- and emit a question mark. */
+ and emit a replacement character or a question mark. */
if (!incremented)
{
if (insize == 0)
@@ -499,8 +495,19 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
inptr++;
insize--;
}
- result[length] = '?';
- length++;
+ if (handler == iconveh_replacement_character)
+ {
+ /* U+FFFD in UTF-8 encoding. */
+ result[length+0] = '\357';
+ result[length+1] = '\277';
+ result[length+2] = '\275';
+ length += 3;
+ }
+ else
+ {
+ result[length] = '?';
+ length++;
+ }
}
else
goto indirectly;
@@ -508,11 +515,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
else
{
if (result != initial_result)
- {
- int saved_errno = errno;
- free (result);
- errno = saved_errno;
- }
+ free (result);
return -1;
}
}
@@ -579,11 +582,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
else
{
if (result != initial_result)
- {
- int saved_errno = errno;
- free (result);
- errno = saved_errno;
- }
+ free (result);
return -1;
}
}
@@ -611,7 +610,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
{
const bool slowly = (offsets != NULL || handler == iconveh_error);
# define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
- char utf8buf[utf8bufsize + 1];
+ char utf8buf[utf8bufsize + 3];
size_t utf8len = 0;
const char *in1ptr = src;
size_t in1size = srclen;
@@ -692,19 +691,15 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
&& !(errno == E2BIG || errno == EINVAL || errno == EILSEQ))
{
if (result != initial_result)
- {
- int saved_errno = errno;
- free (result);
- errno = saved_errno;
- }
+ free (result);
return -1;
}
if (res1 == (size_t)(-1)
&& errno == EILSEQ && handler != iconveh_error)
{
/* The input is invalid in FROM_CODESET. Eat up one byte and
- emit a question mark. Room for the question mark was allocated
- at the end of utf8buf. */
+ emit a U+FFFD character or a question mark. Room for this
+ character was allocated at the end of utf8buf. */
if (!incremented1)
{
if (in1size == 0)
@@ -712,7 +707,16 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
in1ptr++;
in1size--;
}
- *out1ptr++ = '?';
+ if (handler == iconveh_replacement_character)
+ {
+ /* U+FFFD in UTF-8 encoding. */
+ out1ptr[0] = '\357';
+ out1ptr[1] = '\277';
+ out1ptr[2] = '\275';
+ out1ptr += 3;
+ }
+ else
+ *out1ptr++ = '?';
res1 = 0;
}
errno1 = errno;
@@ -777,7 +781,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
break;
else if (errno == EILSEQ && handler != iconveh_error)
{
- /* Error handling can produce up to 10 bytes of ASCII
+ /* Error handling can produce up to 10 bytes of UTF-8
output. But TO_CODESET may be UCS-2, UTF-16 or
UCS-4, so use CD2 here as well. */
char scratchbuf[10];
@@ -825,6 +829,14 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
scratchbuf[scratchlen++] = hex[(uc>>4) & 15];
scratchbuf[scratchlen++] = hex[uc & 15];
}
+ else if (handler == iconveh_replacement_character)
+ {
+ /* U+FFFD in UTF-8 encoding. */
+ scratchbuf[0] = '\357';
+ scratchbuf[1] = '\277';
+ scratchbuf[2] = '\275';
+ scratchlen = 3;
+ }
else
{
scratchbuf[0] = '?';
@@ -834,9 +846,45 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
inptr = scratchbuf;
insize = scratchlen;
if (cd2 != (iconv_t)(-1))
- res = iconv (cd2,
- (ICONV_CONST char **) &inptr, &insize,
- &out2ptr, &out2size);
+ {
+ char *out2ptr_try = out2ptr;
+ size_t out2size_try = out2size;
+ res = iconv (cd2,
+ (ICONV_CONST char **) &inptr, &insize,
+ &out2ptr_try, &out2size_try);
+ if (handler == iconveh_replacement_character
+ && (res == (size_t)(-1)
+ ? errno == EILSEQ
+ /* FreeBSD iconv(), NetBSD iconv(), and
+ Solaris 11 iconv() insert a '?' if they
+ cannot convert. This is what we want.
+ But IRIX iconv() inserts a NUL byte if it
+ cannot convert.
+ And musl libc iconv() inserts a '*' if it
+ cannot convert. */
+ : (res > 0
+ && !(out2ptr_try - out2ptr == 1
+ && *out2ptr == '?'))))
+ {
+ /* The iconv() call failed.
+ U+FFFD can't be converted to TO_CODESET.
+ Use '?' instead. */
+ scratchbuf[0] = '?';
+ scratchlen = 1;
+ inptr = scratchbuf;
+ insize = scratchlen;
+ res = iconv (cd2,
+ (ICONV_CONST char **) &inptr, &insize,
+ &out2ptr, &out2size);
+ }
+ else
+ {
+ /* Accept the results of the iconv() call. */
+ out2ptr = out2ptr_try;
+ out2size = out2size_try;
+ res = 0;
+ }
+ }
else
{
/* TO_CODESET is UTF-8. */
@@ -901,9 +949,10 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
length = out2ptr - result;
}
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
- /* Irix iconv() inserts a NUL byte if it cannot convert.
- NetBSD iconv() inserts a question mark if it cannot
- convert.
+ /* IRIX iconv() inserts a NUL byte if it cannot convert.
+ FreeBSD iconv(), NetBSD iconv(), and Solaris 11
+ iconv() insert a '?' if they cannot convert.
+ musl libc iconv() inserts a '*' if it cannot convert.
Only GNU libiconv and GNU libc are known to prefer
to fail rather than doing a lossy conversion. */
if (res != (size_t)(-1) && res > 0)
@@ -916,22 +965,14 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
{
/* Failure converting the ASCII replacement. */
if (result != initial_result)
- {
- int saved_errno = errno;
- free (result);
- errno = saved_errno;
- }
+ free (result);
return -1;
}
}
else
{
if (result != initial_result)
- {
- int saved_errno = errno;
- free (result);
- errno = saved_errno;
- }
+ free (result);
return -1;
}
}
@@ -1050,12 +1091,7 @@ str_cd_iconveh (const char *src,
if (retval < 0)
{
- if (result != NULL)
- {
- int saved_errno = errno;
- free (result);
- errno = saved_errno;
- }
+ free (result);
return NULL;
}
@@ -1127,12 +1163,8 @@ mem_iconveh (const char *src, size_t srclen,
{
if (iconveh_close (&cd) < 0)
{
- /* Return -1, but free the allocated memory, and while doing
- that, preserve the errno from iconveh_close. */
- int saved_errno = errno;
- if (result != *resultp && result != NULL)
+ if (result != *resultp)
free (result);
- errno = saved_errno;
return -1;
}
*resultp = result;
@@ -1186,11 +1218,7 @@ str_iconveh (const char *src,
{
if (iconveh_close (&cd) < 0)
{
- /* Return NULL, but free the allocated memory, and while doing
- that, preserve the errno from iconveh_close. */
- int saved_errno = errno;
free (result);
- errno = saved_errno;
return NULL;
}
}