diff options
author | Andreas Rottmann <a.rottmann@gmx.at> | 2009-09-14 12:32:44 +0200 |
---|---|---|
committer | Andreas Rottmann <a.rottmann@gmx.at> | 2009-09-14 12:32:44 +0200 |
commit | fa095a4504cbe668e4244547e2c141597bea4ecf (patch) | |
tree | 06135820a286ffec47804e75fbf8a147e92acd2e /lib/uniconv |
Imported Upstream version 0.9.1upstream/0.9.1
Diffstat (limited to 'lib/uniconv')
-rw-r--r-- | lib/uniconv/u-conv-from-enc.h | 91 | ||||
-rw-r--r-- | lib/uniconv/u-conv-to-enc.h | 156 | ||||
-rw-r--r-- | lib/uniconv/u-strconv-from-enc.h | 40 | ||||
-rw-r--r-- | lib/uniconv/u-strconv-to-enc.h | 63 | ||||
-rw-r--r-- | lib/uniconv/u16-conv-from-enc.c | 47 | ||||
-rw-r--r-- | lib/uniconv/u16-conv-to-enc.c | 177 | ||||
-rw-r--r-- | lib/uniconv/u16-strconv-from-enc.c | 34 | ||||
-rw-r--r-- | lib/uniconv/u16-strconv-from-locale.c | 29 | ||||
-rw-r--r-- | lib/uniconv/u16-strconv-to-enc.c | 50 | ||||
-rw-r--r-- | lib/uniconv/u16-strconv-to-locale.c | 29 | ||||
-rw-r--r-- | lib/uniconv/u32-conv-from-enc.c | 46 | ||||
-rw-r--r-- | lib/uniconv/u32-conv-to-enc.c | 49 | ||||
-rw-r--r-- | lib/uniconv/u32-strconv-from-enc.c | 34 | ||||
-rw-r--r-- | lib/uniconv/u32-strconv-from-locale.c | 29 | ||||
-rw-r--r-- | lib/uniconv/u32-strconv-to-enc.c | 49 | ||||
-rw-r--r-- | lib/uniconv/u32-strconv-to-locale.c | 29 | ||||
-rw-r--r-- | lib/uniconv/u8-conv-from-enc.c | 105 | ||||
-rw-r--r-- | lib/uniconv/u8-conv-to-enc.c | 92 | ||||
-rw-r--r-- | lib/uniconv/u8-strconv-from-enc.c | 34 | ||||
-rw-r--r-- | lib/uniconv/u8-strconv-from-locale.c | 29 | ||||
-rw-r--r-- | lib/uniconv/u8-strconv-to-enc.c | 79 | ||||
-rw-r--r-- | lib/uniconv/u8-strconv-to-locale.c | 29 |
22 files changed, 1320 insertions, 0 deletions
diff --git a/lib/uniconv/u-conv-from-enc.h b/lib/uniconv/u-conv-from-enc.h new file mode 100644 index 00000000..fa0f0d3d --- /dev/null +++ b/lib/uniconv/u-conv-from-enc.h @@ -0,0 +1,91 @@ +/* Conversion to UTF-16/UTF-32 from legacy encodings. + Copyright (C) 2002, 2006-2007, 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +UNIT * +FUNC (const char *fromcode, + enum iconv_ilseq_handler handler, + const char *src, size_t srclen, + size_t *offsets, + UNIT *resultbuf, size_t *lengthp) +{ +#if HAVE_UTF_NAME + char *result = (char *) resultbuf; + size_t length = *lengthp * sizeof (UNIT); + + if (mem_iconveha (src, srclen, fromcode, UTF_NAME, true, handler, + offsets, &result, &length) < 0) + return NULL; + if (offsets != NULL) + { + /* Convert 'char *' offsets to 'UNIT *' offsets. */ + size_t *offsets_end = offsets + srclen; + size_t *o; + + for (o = offsets; o < offsets_end; o++) + if (*o != (size_t)(-1)) + *o = *o / sizeof (UNIT); + } + if ((length % sizeof (UNIT)) != 0) + abort (); + *lengthp = length / sizeof (UNIT); + return (UNIT *) result; +#else + uint8_t *utf8_string; + size_t utf8_length; + UNIT *result; + + utf8_string = + u8_conv_from_encoding (fromcode, handler, src, srclen, offsets, + NULL, &utf8_length); + if (utf8_string == NULL) + return NULL; + result = U8_TO_U (utf8_string, utf8_length, resultbuf, lengthp); + if (result == NULL) + { + int saved_errno = errno; + free (utf8_string); + errno = saved_errno; + return NULL; + } + if (offsets != NULL) + { + size_t length = *lengthp; + size_t *offsets_end = offsets + srclen; + size_t *o; + size_t off8 = 0; /* offset into utf8_string */ + size_t offunit = 0; /* offset into result */ + + for (o = offsets; o < offsets_end; o++) + if (*o != (size_t)(-1)) + { + while (off8 < *o) + { + int count8 = u8_mblen (utf8_string + off8, utf8_length - off8); + int countunit = U_MBLEN (result + offunit, length - offunit); + if (count8 < 0 || countunit < 0) + abort (); + off8 += count8; + offunit += countunit; + } + if (*o != off8) + abort (); + *o = offunit; + } + } + free (utf8_string); + return result; +#endif +} diff --git a/lib/uniconv/u-conv-to-enc.h b/lib/uniconv/u-conv-to-enc.h new file mode 100644 index 00000000..ed715482 --- /dev/null +++ b/lib/uniconv/u-conv-to-enc.h @@ -0,0 +1,156 @@ +/* Conversion from UTF-16/UTF-32 to legacy encodings. + Copyright (C) 2002, 2006-2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +char * +FUNC (const char *tocode, + enum iconv_ilseq_handler handler, + const UNIT *src, size_t srclen, + size_t *offsets, + char *resultbuf, size_t *lengthp) +{ +#if HAVE_UTF_NAME + size_t *scaled_offsets; + char *result; + size_t length; + + if (offsets != NULL && srclen > 0) + { + scaled_offsets = + (size_t *) malloc (srclen * sizeof (UNIT) * sizeof (size_t)); + if (scaled_offsets == NULL) + { + errno = ENOMEM; + return NULL; + } + } + else + scaled_offsets = NULL; + + result = resultbuf; + length = *lengthp; + if (mem_iconveha ((const char *) src, srclen * sizeof (UNIT), + UTF_NAME, tocode, + handler == iconveh_question_mark, handler, + scaled_offsets, &result, &length) < 0) + { + int saved_errno = errno; + free (scaled_offsets); + errno = saved_errno; + return NULL; + } + + if (offsets != NULL) + { + /* Convert scaled_offsets[srclen * sizeof (UNIT)] to + offsets[srclen]. */ + size_t i; + + for (i = 0; i < srclen; i++) + offsets[i] = scaled_offsets[i * sizeof (UNIT)]; + free (scaled_offsets); + } + + if (result == NULL) /* when (resultbuf == NULL && length == 0) */ + { + result = (char *) malloc (1); + if (result == NULL) + { + errno = ENOMEM; + return NULL; + } + } + *lengthp = length; + return result; +#else + uint8_t tmpbuf[4096]; + size_t tmpbufsize = SIZEOF (tmpbuf); + uint8_t *utf8_src; + size_t utf8_srclen; + size_t *scaled_offsets; + char *result; + + utf8_src = U_TO_U8 (src, srclen, tmpbuf, &tmpbufsize); + if (utf8_src == NULL) + return NULL; + utf8_srclen = tmpbufsize; + + if (offsets != NULL && utf8_srclen > 0) + { + scaled_offsets = (size_t *) malloc (utf8_srclen * sizeof (size_t)); + if (scaled_offsets == NULL) + { + if (utf8_src != tmpbuf) + free (utf8_src); + errno = ENOMEM; + return NULL; + } + } + else + scaled_offsets = NULL; + + result = u8_conv_to_encoding (tocode, handler, utf8_src, utf8_srclen, + scaled_offsets, resultbuf, lengthp); + if (result == NULL) + { + int saved_errno = errno; + free (scaled_offsets); + if (utf8_src != tmpbuf) + free (utf8_src); + errno = saved_errno; + return NULL; + } + if (offsets != NULL) + { + size_t iunit; /* offset into src */ + size_t i8; /* offset into utf8_src */ + + for (iunit = 0; iunit < srclen; iunit++) + offsets[iunit] = (size_t)(-1); + + iunit = 0; + i8 = 0; + while (iunit < srclen && i8 < utf8_srclen) + { + int countunit; + int count8; + + offsets[iunit] = scaled_offsets[i8]; + + countunit = U_MBLEN (src + iunit, srclen - iunit); + count8 = u8_mblen (utf8_src + i8, utf8_srclen - i8); + if (countunit < 0 || count8 < 0) + abort (); + iunit += countunit; + i8 += count8; + } + /* Check that utf8_src has been traversed entirely. */ + if (i8 < utf8_srclen) + abort (); + /* Check that src has been traversed entirely, except possibly for an + incomplete sequence of units at the end. */ + if (iunit < srclen) + { + offsets[iunit] = *lengthp; + if (!(U_MBLEN (src + iunit, srclen - iunit) < 0)) + abort (); + } + free (scaled_offsets); + } + if (utf8_src != tmpbuf) + free (utf8_src); + return result; +#endif +} diff --git a/lib/uniconv/u-strconv-from-enc.h b/lib/uniconv/u-strconv-from-enc.h new file mode 100644 index 00000000..25813c76 --- /dev/null +++ b/lib/uniconv/u-strconv-from-enc.h @@ -0,0 +1,40 @@ +/* Conversion to UTF-8/UTF-16/UTF-32 from legacy encodings. + Copyright (C) 2002, 2006-2007, 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +UNIT * +FUNC (const char *string, + const char *fromcode, + enum iconv_ilseq_handler handler) +{ + UNIT *result; + size_t length; + + result = + U_CONV_FROM_ENCODING (fromcode, handler, + string, strlen (string) + 1, NULL, + NULL, &length); + if (result == NULL) + return NULL; + /* Verify the result has exactly one NUL unit, at the end. */ + if (!(length > 0 && result[length-1] == 0 + && U_STRLEN (result) == length-1)) + { + free (result); + errno = EILSEQ; + return NULL; + } + return result; +} diff --git a/lib/uniconv/u-strconv-to-enc.h b/lib/uniconv/u-strconv-to-enc.h new file mode 100644 index 00000000..b6e6ebb5 --- /dev/null +++ b/lib/uniconv/u-strconv-to-enc.h @@ -0,0 +1,63 @@ +/* Conversion from UTF-16/UTF-32 to legacy encodings. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +char * +FUNC (const UNIT *string, + const char *tocode, + enum iconv_ilseq_handler handler) +{ +#if HAVE_UTF_NAME + char *result = NULL; + size_t length = 0; + + if (mem_iconveha ((const char *) string, (U_STRLEN (string) + 1) * sizeof (UNIT), + UTF_NAME, tocode, + handler == iconveh_question_mark, handler, + NULL, &result, &length) < 0) + return NULL; + /* Verify the result has exactly one NUL byte, at the end. */ + if (!(length > 0 && result[length-1] == '\0' && strlen (result) == length-1)) + { + free (result); + errno = EILSEQ; + return NULL; + } + return result; +#else + uint8_t tmpbuf[4096]; + size_t tmpbufsize = SIZEOF (tmpbuf); + uint8_t *utf8_string; + char *result; + + utf8_string = U_TO_U8 (string, U_STRLEN (string) + 1, tmpbuf, &tmpbufsize); + if (utf8_string == NULL) + return NULL; + result = u8_strconv_to_encoding (utf8_string, tocode, handler); + if (result == NULL) + { + if (utf8_string != tmpbuf) + { + int saved_errno = errno; + free (utf8_string); + errno = saved_errno; + } + return NULL; + } + if (utf8_string != tmpbuf) + free (utf8_string); + return result; +#endif +} diff --git a/lib/uniconv/u16-conv-from-enc.c b/lib/uniconv/u16-conv-from-enc.c new file mode 100644 index 00000000..f38add30 --- /dev/null +++ b/lib/uniconv/u16-conv-from-enc.c @@ -0,0 +1,47 @@ +/* Conversion to UTF-16 from legacy encodings. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +#include <errno.h> +#include <stdlib.h> + +#include "striconveha.h" +#include "unistr.h" + +/* Name of UTF-16 encoding with machine dependent endianness and alignment. */ +#if defined _LIBICONV_VERSION || (__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) +# ifdef WORDS_BIGENDIAN +# define UTF16_NAME "UTF-16BE" +# else +# define UTF16_NAME "UTF-16LE" +# endif +#endif + +#define FUNC u16_conv_from_encoding +#define UNIT uint16_t +#define U8_TO_U u8_to_u16 +#define U_MBLEN u16_mblen +#if defined UTF16_NAME +# define UTF_NAME UTF16_NAME +# define HAVE_UTF_NAME 1 +#endif +#include "u-conv-from-enc.h" diff --git a/lib/uniconv/u16-conv-to-enc.c b/lib/uniconv/u16-conv-to-enc.c new file mode 100644 index 00000000..ba70736c --- /dev/null +++ b/lib/uniconv/u16-conv-to-enc.c @@ -0,0 +1,177 @@ +/* Conversion from UTF-16 to legacy encodings. + Copyright (C) 2002, 2006-2008 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include "striconveha.h" +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) + +/* Name of UTF-16 encoding with machine dependent endianness and alignment. */ +#if defined _LIBICONV_VERSION || (__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) +# ifdef WORDS_BIGENDIAN +# define UTF16_NAME "UTF-16BE" +# else +# define UTF16_NAME "UTF-16LE" +# endif +#endif + + +#if !defined UTF16_NAME + +/* A variant of u16_to_u8 that treats an incomplete sequence of units at the + end as a harmless no-op, rather than reporting it as an EILSEQ error. */ + +#define FUNC u16_to_u8_lenient +#define SRC_UNIT uint16_t +#define DST_UNIT uint8_t + +static DST_UNIT * +FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp) +{ + const SRC_UNIT *s_end = s + n; + /* Output string accumulator. */ + DST_UNIT *result; + size_t allocated; + size_t length; + + if (resultbuf != NULL) + { + result = resultbuf; + allocated = *lengthp; + } + else + { + result = NULL; + allocated = 0; + } + length = 0; + /* Invariants: + result is either == resultbuf or == NULL or malloc-allocated. + If length > 0, then result != NULL. */ + + while (s < s_end) + { + ucs4_t uc; + int count; + + /* Fetch a Unicode character from the input string. */ + count = u16_mbtoucr (&uc, s, s_end - s); + if (count < 0) + { + if (count == -2) + /* Incomplete sequence of units. */ + break; + if (!(result == resultbuf || result == NULL)) + free (result); + errno = EILSEQ; + return NULL; + } + s += count; + + /* Store it in the output string. */ + count = u8_uctomb (result + length, uc, allocated - length); + if (count == -1) + { + if (!(result == resultbuf || result == NULL)) + free (result); + errno = EILSEQ; + return NULL; + } + if (count == -2) + { + DST_UNIT *memory; + + allocated = (allocated > 0 ? 2 * allocated : 12); + if (length + 6 > allocated) + allocated = length + 6; + if (result == resultbuf || result == NULL) + memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT)); + else + memory = + (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT)); + + if (memory == NULL) + { + if (!(result == resultbuf || result == NULL)) + free (result); + errno = ENOMEM; + return NULL; + } + if (result == resultbuf && length > 0) + memcpy ((char *) memory, (char *) result, + length * sizeof (DST_UNIT)); + result = memory; + count = u8_uctomb (result + length, uc, allocated - length); + if (count < 0) + abort (); + } + length += count; + } + + if (length == 0) + { + if (result == NULL) + { + /* Return a non-NULL value. NULL means error. */ + result = (DST_UNIT *) malloc (1); + if (result == NULL) + { + errno = ENOMEM; + return NULL; + } + } + } + else if (result != resultbuf && length < allocated) + { + /* Shrink the allocated memory if possible. */ + DST_UNIT *memory; + + memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT)); + if (memory != NULL) + result = memory; + } + + *lengthp = length; + return result; +} + +#undef DST_UNIT +#undef SRC_UNIT +#undef FUNC + +#endif + + +#define FUNC u16_conv_to_encoding +#define UNIT uint16_t +#define U_TO_U8 u16_to_u8_lenient +#define U_MBLEN u16_mblen +#if defined UTF16_NAME +# define UTF_NAME UTF16_NAME +# define HAVE_UTF_NAME 1 +#endif +#include "u-conv-to-enc.h" diff --git a/lib/uniconv/u16-strconv-from-enc.c b/lib/uniconv/u16-strconv-from-enc.c new file mode 100644 index 00000000..736d0c4a --- /dev/null +++ b/lib/uniconv/u16-strconv-from-enc.c @@ -0,0 +1,34 @@ +/* Conversion to UTF-16 from legacy encodings. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include "unistr.h" + +#define FUNC u16_strconv_from_encoding +#define UNIT uint16_t +#define U_CONV_FROM_ENCODING u16_conv_from_encoding +#define U_STRLEN u16_strlen +#include "u-strconv-from-enc.h" diff --git a/lib/uniconv/u16-strconv-from-locale.c b/lib/uniconv/u16-strconv-from-locale.c new file mode 100644 index 00000000..63ddeb04 --- /dev/null +++ b/lib/uniconv/u16-strconv-from-locale.c @@ -0,0 +1,29 @@ +/* Conversion to UTF-16 from the locale encoding. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +uint16_t * +u16_strconv_from_locale (const char *string) +{ + const char *encoding = locale_charset (); + return u16_strconv_from_encoding (string, encoding, iconveh_question_mark); +} diff --git a/lib/uniconv/u16-strconv-to-enc.c b/lib/uniconv/u16-strconv-to-enc.c new file mode 100644 index 00000000..10e895e9 --- /dev/null +++ b/lib/uniconv/u16-strconv-to-enc.c @@ -0,0 +1,50 @@ +/* Conversion from UTF-16 to legacy encodings. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include "striconveha.h" +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) + +/* Name of UTF-16 encoding with machine dependent endianness and alignment. */ +#if defined _LIBICONV_VERSION || (__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) +# ifdef WORDS_BIGENDIAN +# define UTF16_NAME "UTF-16BE" +# else +# define UTF16_NAME "UTF-16LE" +# endif +#endif + +#define FUNC u16_strconv_to_encoding +#define UNIT uint16_t +#define U_TO_U8 u16_to_u8 +#define U_STRLEN u16_strlen +#if defined UTF16_NAME +# define UTF_NAME UTF16_NAME +# define HAVE_UTF_NAME 1 +#endif +#include "u-strconv-to-enc.h" diff --git a/lib/uniconv/u16-strconv-to-locale.c b/lib/uniconv/u16-strconv-to-locale.c new file mode 100644 index 00000000..14fd7b3b --- /dev/null +++ b/lib/uniconv/u16-strconv-to-locale.c @@ -0,0 +1,29 @@ +/* Conversion from UTF-16 to the locale encoding. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +char * +u16_strconv_to_locale (const uint16_t *string) +{ + const char *encoding = locale_charset (); + return u16_strconv_to_encoding (string, encoding, iconveh_question_mark); +} diff --git a/lib/uniconv/u32-conv-from-enc.c b/lib/uniconv/u32-conv-from-enc.c new file mode 100644 index 00000000..bac6301b --- /dev/null +++ b/lib/uniconv/u32-conv-from-enc.c @@ -0,0 +1,46 @@ +/* Conversion to UTF-32 from legacy encodings. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +#include <errno.h> +#include <stdlib.h> + +#include "striconveha.h" +#include "unistr.h" + +/* Name of UTF-32 or UCS-4 encoding with machine dependent endianness and + alignment. */ +#if defined _LIBICONV_VERSION +# define UTF32_NAME "UCS-4-INTERNAL" +#elif (__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) +# define UTF32_NAME "WCHAR_T" +#endif + +#define FUNC u32_conv_from_encoding +#define UNIT uint32_t +#define U8_TO_U u8_to_u32 +#define U_MBLEN u32_mblen +#if defined UTF32_NAME +# define UTF_NAME UTF32_NAME +# define HAVE_UTF_NAME 1 +#endif +#include "u-conv-from-enc.h" diff --git a/lib/uniconv/u32-conv-to-enc.c b/lib/uniconv/u32-conv-to-enc.c new file mode 100644 index 00000000..9155abaa --- /dev/null +++ b/lib/uniconv/u32-conv-to-enc.c @@ -0,0 +1,49 @@ +/* Conversion from UTF-32 to legacy encodings. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include "striconveha.h" +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) + +/* Name of UTF-32 or UCS-4 encoding with machine dependent endianness and + alignment. */ +#if defined _LIBICONV_VERSION +# define UTF32_NAME "UCS-4-INTERNAL" +#elif (__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) +# define UTF32_NAME "WCHAR_T" +#endif + +#define FUNC u32_conv_to_encoding +#define UNIT uint32_t +#define U_TO_U8 u32_to_u8 +#define U_MBLEN u32_mblen +#if defined UTF32_NAME +# define UTF_NAME UTF32_NAME +# define HAVE_UTF_NAME 1 +#endif +#include "u-conv-to-enc.h" diff --git a/lib/uniconv/u32-strconv-from-enc.c b/lib/uniconv/u32-strconv-from-enc.c new file mode 100644 index 00000000..aaac74f1 --- /dev/null +++ b/lib/uniconv/u32-strconv-from-enc.c @@ -0,0 +1,34 @@ +/* Conversion to UTF-32 from legacy encodings. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include "unistr.h" + +#define FUNC u32_strconv_from_encoding +#define UNIT uint32_t +#define U_CONV_FROM_ENCODING u32_conv_from_encoding +#define U_STRLEN u32_strlen +#include "u-strconv-from-enc.h" diff --git a/lib/uniconv/u32-strconv-from-locale.c b/lib/uniconv/u32-strconv-from-locale.c new file mode 100644 index 00000000..2cddcb63 --- /dev/null +++ b/lib/uniconv/u32-strconv-from-locale.c @@ -0,0 +1,29 @@ +/* Conversion to UTF-32 from the locale encoding. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +uint32_t * +u32_strconv_from_locale (const char *string) +{ + const char *encoding = locale_charset (); + return u32_strconv_from_encoding (string, encoding, iconveh_question_mark); +} diff --git a/lib/uniconv/u32-strconv-to-enc.c b/lib/uniconv/u32-strconv-to-enc.c new file mode 100644 index 00000000..c77e0b6e --- /dev/null +++ b/lib/uniconv/u32-strconv-to-enc.c @@ -0,0 +1,49 @@ +/* Conversion from UTF-32 to legacy encodings. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include "striconveha.h" +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) + +/* Name of UTF-32 or UCS-4 encoding with machine dependent endianness and + alignment. */ +#if defined _LIBICONV_VERSION +# define UTF32_NAME "UCS-4-INTERNAL" +#elif (__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) +# define UTF32_NAME "WCHAR_T" +#endif + +#define FUNC u32_strconv_to_encoding +#define UNIT uint32_t +#define U_TO_U8 u32_to_u8 +#define U_STRLEN u32_strlen +#if defined UTF32_NAME +# define UTF_NAME UTF32_NAME +# define HAVE_UTF_NAME 1 +#endif +#include "u-strconv-to-enc.h" diff --git a/lib/uniconv/u32-strconv-to-locale.c b/lib/uniconv/u32-strconv-to-locale.c new file mode 100644 index 00000000..64ab59fa --- /dev/null +++ b/lib/uniconv/u32-strconv-to-locale.c @@ -0,0 +1,29 @@ +/* Conversion from UTF-32 to the locale encoding. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +char * +u32_strconv_to_locale (const uint32_t *string) +{ + const char *encoding = locale_charset (); + return u32_strconv_to_encoding (string, encoding, iconveh_question_mark); +} diff --git a/lib/uniconv/u8-conv-from-enc.c b/lib/uniconv/u8-conv-from-enc.c new file mode 100644 index 00000000..7605e63e --- /dev/null +++ b/lib/uniconv/u8-conv-from-enc.c @@ -0,0 +1,105 @@ +/* Conversion to UTF-8 from legacy encodings. + Copyright (C) 2002, 2006-2007, 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include "c-strcaseeq.h" +#include "striconveha.h" +#include "unistr.h" + +uint8_t * +u8_conv_from_encoding (const char *fromcode, + enum iconv_ilseq_handler handler, + const char *src, size_t srclen, + size_t *offsets, + uint8_t *resultbuf, size_t *lengthp) +{ + if (STRCASEEQ (fromcode, "UTF-8", 'U','T','F','-','8',0,0,0,0)) + { + /* Conversion from UTF-8 to UTF-8. No need to go through iconv(). */ + uint8_t *result; + + if (u8_check ((const uint8_t *) src, srclen)) + { + errno = EILSEQ; + return NULL; + } + + if (offsets != NULL) + { + size_t i; + + for (i = 0; i < srclen; ) + { + int count = u8_mblen ((const uint8_t *) src + i, srclen - i); + /* We can rely on count > 0 because of the previous u8_check. */ + if (count <= 0) + abort (); + offsets[i] = i; + i++; + while (--count > 0) + offsets[i++] = (size_t)(-1); + } + } + + /* Memory allocation. */ + if (resultbuf != NULL && *lengthp >= srclen) + result = resultbuf; + else + { + result = (uint8_t *) malloc (srclen > 0 ? srclen : 1); + if (result == NULL) + { + errno = ENOMEM; + return NULL; + } + } + + memcpy ((char *) result, src, srclen); + *lengthp = srclen; + return result; + } + else + { + char *result = (char *) resultbuf; + size_t length = *lengthp; + + if (mem_iconveha (src, srclen, fromcode, "UTF-8", true, handler, + offsets, &result, &length) < 0) + return NULL; + + if (result == NULL) /* when (resultbuf == NULL && length == 0) */ + { + result = (char *) malloc (1); + if (result == NULL) + { + errno = ENOMEM; + return NULL; + } + } + *lengthp = length; + return (uint8_t *) result; + } +} diff --git a/lib/uniconv/u8-conv-to-enc.c b/lib/uniconv/u8-conv-to-enc.c new file mode 100644 index 00000000..90acccec --- /dev/null +++ b/lib/uniconv/u8-conv-to-enc.c @@ -0,0 +1,92 @@ +/* Conversion from UTF-8 to legacy encodings. + Copyright (C) 2002, 2006-2007, 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include "c-strcaseeq.h" +#include "striconveha.h" +#include "unistr.h" + +char * +u8_conv_to_encoding (const char *tocode, + enum iconv_ilseq_handler handler, + const uint8_t *src, size_t srclen, + size_t *offsets, + char *resultbuf, size_t *lengthp) +{ + if (STRCASEEQ (tocode, "UTF-8", 'U','T','F','-','8',0,0,0,0)) + { + char *result; + + /* Conversion from UTF-8 to UTF-8. No need to go through iconv(). */ +#if CONFIG_UNICODE_SAFETY + if (u8_check (src, srclen)) + { + errno = EILSEQ; + return NULL; + } +#endif + + /* Memory allocation. */ + if (resultbuf != NULL && *lengthp >= srclen) + result = resultbuf; + else + { + result = (char *) malloc (srclen > 0 ? srclen : 1); + if (result == NULL) + { + errno = ENOMEM; + return NULL; + } + } + + memcpy (result, (const char *) src, srclen); + *lengthp = srclen; + return result; + } + else + { + char *result = resultbuf; + size_t length = *lengthp; + + if (mem_iconveha ((const char *) src, srclen, + "UTF-8", tocode, + handler == iconveh_question_mark, handler, + offsets, &result, &length) < 0) + return NULL; + + if (result == NULL) /* when (resultbuf == NULL && length == 0) */ + { + result = (char *) malloc (1); + if (result == NULL) + { + errno = ENOMEM; + return NULL; + } + } + *lengthp = length; + return result; + } +} diff --git a/lib/uniconv/u8-strconv-from-enc.c b/lib/uniconv/u8-strconv-from-enc.c new file mode 100644 index 00000000..0f5736fc --- /dev/null +++ b/lib/uniconv/u8-strconv-from-enc.c @@ -0,0 +1,34 @@ +/* Conversion to UTF-8 from legacy encodings. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include "unistr.h" + +#define FUNC u8_strconv_from_encoding +#define UNIT uint8_t +#define U_CONV_FROM_ENCODING u8_conv_from_encoding +#define U_STRLEN u8_strlen +#include "u-strconv-from-enc.h" diff --git a/lib/uniconv/u8-strconv-from-locale.c b/lib/uniconv/u8-strconv-from-locale.c new file mode 100644 index 00000000..a901a3d6 --- /dev/null +++ b/lib/uniconv/u8-strconv-from-locale.c @@ -0,0 +1,29 @@ +/* Conversion to UTF-8 from the locale encoding. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +uint8_t * +u8_strconv_from_locale (const char *string) +{ + const char *encoding = locale_charset (); + return u8_strconv_from_encoding (string, encoding, iconveh_question_mark); +} diff --git a/lib/uniconv/u8-strconv-to-enc.c b/lib/uniconv/u8-strconv-to-enc.c new file mode 100644 index 00000000..a3f12b6b --- /dev/null +++ b/lib/uniconv/u8-strconv-to-enc.c @@ -0,0 +1,79 @@ +/* Conversion from UTF-8 to legacy encodings. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include "c-strcaseeq.h" +#include "striconveha.h" +#include "unistr.h" + +char * +u8_strconv_to_encoding (const uint8_t *string, + const char *tocode, + enum iconv_ilseq_handler handler) +{ + char *result; + size_t length; + + if (STRCASEEQ (tocode, "UTF-8", 'U','T','F','-','8',0,0,0,0)) + { + /* Conversion from UTF-8 to UTF-8. No need to go through iconv(). */ + length = u8_strlen (string) + 1; +#if CONFIG_UNICODE_SAFETY + if (u8_check (string, length)) + { + errno = EILSEQ; + return NULL; + } +#endif + result = (char *) malloc (length); + if (result == NULL) + { + errno = ENOMEM; + return NULL; + } + memcpy (result, (const char *) string, length); + return result; + } + else + { + result = NULL; + length = 0; + if (mem_iconveha ((const char *) string, u8_strlen (string) + 1, + "UTF-8", tocode, + handler == iconveh_question_mark, handler, + NULL, &result, &length) < 0) + return NULL; + /* Verify the result has exactly one NUL byte, at the end. */ + if (!(length > 0 && result[length-1] == '\0' + && strlen (result) == length-1)) + { + free (result); + errno = EILSEQ; + return NULL; + } + return result; + } +} diff --git a/lib/uniconv/u8-strconv-to-locale.c b/lib/uniconv/u8-strconv-to-locale.c new file mode 100644 index 00000000..27c45fbf --- /dev/null +++ b/lib/uniconv/u8-strconv-to-locale.c @@ -0,0 +1,29 @@ +/* Conversion from UTF-8 to the locale encoding. + Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>. */ + +#include <config.h> + +/* Specification. */ +#include "uniconv.h" + +char * +u8_strconv_to_locale (const uint8_t *string) +{ + const char *encoding = locale_charset (); + return u8_strconv_to_encoding (string, encoding, iconveh_question_mark); +} |