summaryrefslogtreecommitdiff
path: root/tests/c32rtomb.c
diff options
context:
space:
mode:
Diffstat (limited to 'tests/c32rtomb.c')
-rw-r--r--tests/c32rtomb.c132
1 files changed, 132 insertions, 0 deletions
diff --git a/tests/c32rtomb.c b/tests/c32rtomb.c
new file mode 100644
index 00000000..260bf097
--- /dev/null
+++ b/tests/c32rtomb.c
@@ -0,0 +1,132 @@
+/* Convert 32-bit wide character to multibyte character.
+ Copyright (C) 2020-2024 Free Software Foundation, Inc.
+
+ This file is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as
+ published by the Free Software Foundation; either version 2.1 of the
+ License, or (at your option) any later version.
+
+ This file is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2020. */
+
+#include <config.h>
+
+/* Specification. */
+#include <uchar.h>
+
+#include <errno.h>
+#include <wchar.h>
+
+#include "attribute.h" /* FALLTHROUGH */
+#include "localcharset.h"
+#include "streq.h"
+
+#if GL_CHAR32_T_IS_UNICODE
+# include "lc-charset-unicode.h"
+#endif
+
+size_t
+c32rtomb (char *s, char32_t wc, mbstate_t *ps)
+#undef c32rtomb
+{
+#if HAVE_WORKING_MBRTOC32
+
+# if C32RTOMB_RETVAL_BUG
+ if (s == NULL)
+ /* We know the NUL wide character corresponds to the NUL character. */
+ return 1;
+# endif
+
+ return c32rtomb (s, wc, ps);
+
+#elif _GL_SMALL_WCHAR_T
+
+ if (s == NULL)
+ return wcrtomb (NULL, 0, ps);
+ else
+ {
+ /* Special-case all encodings that may produce wide character values
+ > WCHAR_MAX. */
+ const char *encoding = locale_charset ();
+ if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
+ {
+ /* Special-case the UTF-8 encoding. Assume that the wide-character
+ encoding in a UTF-8 locale is UCS-2 or, equivalently, UTF-16. */
+ if (wc < 0x80)
+ {
+ s[0] = (unsigned char) wc;
+ return 1;
+ }
+ else
+ {
+ int count;
+
+ if (wc < 0x800)
+ count = 2;
+ else if (wc < 0x10000)
+ {
+ if (wc < 0xd800 || wc >= 0xe000)
+ count = 3;
+ else
+ {
+ errno = EILSEQ;
+ return (size_t)(-1);
+ }
+ }
+ else if (wc < 0x110000)
+ count = 4;
+ else
+ {
+ errno = EILSEQ;
+ return (size_t)(-1);
+ }
+
+ switch (count) /* note: code falls through cases! */
+ {
+ case 4: s[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000;
+ FALLTHROUGH;
+ case 3: s[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800;
+ FALLTHROUGH;
+ case 2: s[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0;
+ /*case 1:*/ s[0] = wc;
+ }
+ return count;
+ }
+ }
+ else
+ {
+ if ((wchar_t) wc == wc)
+ return wcrtomb (s, (wchar_t) wc, ps);
+ else
+ {
+ errno = EILSEQ;
+ return (size_t)(-1);
+ }
+ }
+ }
+
+#else
+
+ /* char32_t and wchar_t are equivalent. */
+# if GL_CHAR32_T_IS_UNICODE && GL_CHAR32_T_VS_WCHAR_T_NEEDS_CONVERSION
+ if (wc != 0)
+ {
+ wc = unicode_to_locale_encoding (wc);
+ if (wc == 0)
+ {
+ errno = EILSEQ;
+ return (size_t)(-1);
+ }
+ }
+# endif
+ return wcrtomb (s, (wchar_t) wc, ps);
+
+#endif
+}