diff options
| author | Jörg Frings-Fürst <debian@jff.email> | 2022-01-08 11:51:07 +0100 | 
|---|---|---|
| committer | Jörg Frings-Fürst <debian@jff.email> | 2022-01-08 11:51:07 +0100 | 
| commit | be8efac78d067c138ad8dda03df4336e73f94887 (patch) | |
| tree | 5f5254a628ba0ef72065b93d949d1c985742ea8e /lib/striconveh.c | |
| parent | 7b65dbd4ebade81d504cfe5e681292a58ad1fdf0 (diff) | |
New upstream version 1.0upstream/1.0
Diffstat (limited to 'lib/striconveh.c')
| -rw-r--r-- | lib/striconveh.c | 178 | 
1 files changed, 103 insertions, 75 deletions
| diff --git a/lib/striconveh.c b/lib/striconveh.c index 45d76f88..5b60a7e0 100644 --- a/lib/striconveh.c +++ b/lib/striconveh.c @@ -1,27 +1,18 @@  /* Character set conversion with error handling. -   Copyright (C) 2001-2018 Free Software Foundation, Inc. +   Copyright (C) 2001-2022 Free Software Foundation, Inc.     Written by Bruno Haible and Simon Josefsson. -   This program is free software: you can redistribute it and/or -   modify it under the terms of either: +   This file is free software: you can redistribute it and/or modify +   it under the terms of the GNU Lesser General Public License as +   published by the Free Software Foundation; either version 2.1 of the +   License, or (at your option) any later version. -     * the GNU Lesser General Public License as published by the Free -       Software Foundation; either version 3 of the License, or (at your -       option) any later version. - -   or - -     * the GNU General Public License as published by the Free -       Software Foundation; either version 2 of the License, or (at your -       option) any later version. - -   or both in parallel, as here. -   This program is distributed in the hope that it will be useful, +   This file is distributed in the hope that it will be useful,     but WITHOUT ANY WARRANTY; without even the implied warranty of     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -   GNU General Public License for more details. +   GNU Lesser General Public License for more details. -   You should have received a copy of the GNU General Public License +   You should have received a copy of the GNU Lesser General Public License     along with this program.  If not, see <https://www.gnu.org/licenses/>.  */  #include <config.h> @@ -82,7 +73,7 @@ iconveh_open (const char *to_codeset, const char *from_codeset, iconveh_t *cdp)          {            int saved_errno = errno;            if (cd != (iconv_t)(-1)) -            iconv_close (cdp->cd); +            iconv_close (cd);            errno = saved_errno;            return -1;          } @@ -466,13 +457,18 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,                  if (cd2 == (iconv_t)(-1))                    {                      /* TO_CODESET is UTF-8.  */ -                    /* Error handling can produce up to 1 byte of output.  */ -                    if (length + 1 + extra_alloc > allocated) +                    /* Error handling can produce up to 1 or 3 bytes of +                       output.  */ +                    size_t extra_need = +                      (handler == iconveh_replacement_character ? 3 : 1); +                    if (length + extra_need + extra_alloc > allocated)                        {                          char *memory;                          allocated = 2 * allocated; -                        if (length + 1 + extra_alloc > allocated) +                        if (length + extra_need + extra_alloc > allocated) +                          allocated = 2 * allocated; +                        if (length + extra_need + extra_alloc > allocated)                            abort ();                          if (result == initial_result)                            memory = (char *) malloc (allocated); @@ -491,7 +487,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,                          grow = false;                        }                      /* The input is invalid in FROM_CODESET.  Eat up one byte -                       and emit a question mark.  */ +                       and emit a replacement character or a question mark.  */                      if (!incremented)                        {                          if (insize == 0) @@ -499,8 +495,19 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,                          inptr++;                          insize--;                        } -                    result[length] = '?'; -                    length++; +                    if (handler == iconveh_replacement_character) +                      { +                        /* U+FFFD in UTF-8 encoding.  */ +                        result[length+0] = '\357'; +                        result[length+1] = '\277'; +                        result[length+2] = '\275'; +                        length += 3; +                      } +                    else +                      { +                        result[length] = '?'; +                        length++; +                      }                    }                  else                    goto indirectly; @@ -508,11 +515,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,              else                {                  if (result != initial_result) -                  { -                    int saved_errno = errno; -                    free (result); -                    errno = saved_errno; -                  } +                  free (result);                  return -1;                }            } @@ -579,11 +582,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,            else              {                if (result != initial_result) -                { -                  int saved_errno = errno; -                  free (result); -                  errno = saved_errno; -                } +                free (result);                return -1;              }          } @@ -611,7 +610,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,    {      const bool slowly = (offsets != NULL || handler == iconveh_error);  # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */ -    char utf8buf[utf8bufsize + 1]; +    char utf8buf[utf8bufsize + 3];      size_t utf8len = 0;      const char *in1ptr = src;      size_t in1size = srclen; @@ -692,19 +691,15 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,              && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ))            {              if (result != initial_result) -              { -                int saved_errno = errno; -                free (result); -                errno = saved_errno; -              } +              free (result);              return -1;            }          if (res1 == (size_t)(-1)              && errno == EILSEQ && handler != iconveh_error)            {              /* The input is invalid in FROM_CODESET.  Eat up one byte and -               emit a question mark.  Room for the question mark was allocated -               at the end of utf8buf.  */ +               emit a U+FFFD character or a question mark.  Room for this +               character was allocated at the end of utf8buf.  */              if (!incremented1)                {                  if (in1size == 0) @@ -712,7 +707,16 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,                  in1ptr++;                  in1size--;                } -            *out1ptr++ = '?'; +            if (handler == iconveh_replacement_character) +              { +                /* U+FFFD in UTF-8 encoding.  */ +                out1ptr[0] = '\357'; +                out1ptr[1] = '\277'; +                out1ptr[2] = '\275'; +                out1ptr += 3; +              } +            else +              *out1ptr++ = '?';              res1 = 0;            }          errno1 = errno; @@ -777,7 +781,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,                        break;                      else if (errno == EILSEQ && handler != iconveh_error)                        { -                        /* Error handling can produce up to 10 bytes of ASCII +                        /* Error handling can produce up to 10 bytes of UTF-8                             output.  But TO_CODESET may be UCS-2, UTF-16 or                             UCS-4, so use CD2 here as well.  */                          char scratchbuf[10]; @@ -825,6 +829,14 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,                              scratchbuf[scratchlen++] = hex[(uc>>4) & 15];                              scratchbuf[scratchlen++] = hex[uc & 15];                            } +                        else if (handler == iconveh_replacement_character) +                          { +                            /* U+FFFD in UTF-8 encoding.  */ +                            scratchbuf[0] = '\357'; +                            scratchbuf[1] = '\277'; +                            scratchbuf[2] = '\275'; +                            scratchlen = 3; +                          }                          else                            {                              scratchbuf[0] = '?'; @@ -834,9 +846,45 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,                          inptr = scratchbuf;                          insize = scratchlen;                          if (cd2 != (iconv_t)(-1)) -                          res = iconv (cd2, -                                       (ICONV_CONST char **) &inptr, &insize, -                                       &out2ptr, &out2size); +                          { +                            char *out2ptr_try = out2ptr; +                            size_t out2size_try = out2size; +                            res = iconv (cd2, +                                         (ICONV_CONST char **) &inptr, &insize, +                                         &out2ptr_try, &out2size_try); +                            if (handler == iconveh_replacement_character +                                && (res == (size_t)(-1) +                                    ? errno == EILSEQ +                                    /* FreeBSD iconv(), NetBSD iconv(), and +                                       Solaris 11 iconv() insert a '?' if they +                                       cannot convert.  This is what we want. +                                       But IRIX iconv() inserts a NUL byte if it +                                       cannot convert. +                                       And musl libc iconv() inserts a '*' if it +                                       cannot convert.  */ +                                    : (res > 0 +                                       && !(out2ptr_try - out2ptr == 1 +                                            && *out2ptr == '?')))) +                              { +                                /* The iconv() call failed. +                                   U+FFFD can't be converted to TO_CODESET. +                                   Use '?' instead.  */ +                                scratchbuf[0] = '?'; +                                scratchlen = 1; +                                inptr = scratchbuf; +                                insize = scratchlen; +                                res = iconv (cd2, +                                             (ICONV_CONST char **) &inptr, &insize, +                                             &out2ptr, &out2size); +                              } +                            else +                              { +                                /* Accept the results of the iconv() call.  */ +                                out2ptr = out2ptr_try; +                                out2size = out2size_try; +                                res = 0; +                              } +                          }                          else                            {                              /* TO_CODESET is UTF-8.  */ @@ -901,9 +949,10 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,                              length = out2ptr - result;                            }  # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__) -                        /* Irix iconv() inserts a NUL byte if it cannot convert. -                           NetBSD iconv() inserts a question mark if it cannot -                           convert. +                        /* IRIX iconv() inserts a NUL byte if it cannot convert. +                           FreeBSD iconv(), NetBSD iconv(), and Solaris 11 +                           iconv() insert a '?' if they cannot convert. +                           musl libc iconv() inserts a '*' if it cannot convert.                             Only GNU libiconv and GNU libc are known to prefer                             to fail rather than doing a lossy conversion.  */                          if (res != (size_t)(-1) && res > 0) @@ -916,22 +965,14 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,                            {                              /* Failure converting the ASCII replacement.  */                              if (result != initial_result) -                              { -                                int saved_errno = errno; -                                free (result); -                                errno = saved_errno; -                              } +                              free (result);                              return -1;                            }                        }                      else                        {                          if (result != initial_result) -                          { -                            int saved_errno = errno; -                            free (result); -                            errno = saved_errno; -                          } +                          free (result);                          return -1;                        }                    } @@ -1050,12 +1091,7 @@ str_cd_iconveh (const char *src,    if (retval < 0)      { -      if (result != NULL) -        { -          int saved_errno = errno; -          free (result); -          errno = saved_errno; -        } +      free (result);        return NULL;      } @@ -1127,12 +1163,8 @@ mem_iconveh (const char *src, size_t srclen,          {            if (iconveh_close (&cd) < 0)              { -              /* Return -1, but free the allocated memory, and while doing -                 that, preserve the errno from iconveh_close.  */ -              int saved_errno = errno; -              if (result != *resultp && result != NULL) +              if (result != *resultp)                  free (result); -              errno = saved_errno;                return -1;              }            *resultp = result; @@ -1186,11 +1218,7 @@ str_iconveh (const char *src,          {            if (iconveh_close (&cd) < 0)              { -              /* Return NULL, but free the allocated memory, and while doing -                 that, preserve the errno from iconveh_close.  */ -              int saved_errno = errno;                free (result); -              errno = saved_errno;                return NULL;              }          } | 
