summaryrefslogtreecommitdiff
path: root/tests/test-striconveh.c
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test-striconveh.c')
-rw-r--r--tests/test-striconveh.c399
1 files changed, 395 insertions, 4 deletions
diff --git a/tests/test-striconveh.c b/tests/test-striconveh.c
index cb21a4c3..781aa525 100644
--- a/tests/test-striconveh.c
+++ b/tests/test-striconveh.c
@@ -1,5 +1,5 @@
/* Test of character set conversion with error handling.
- Copyright (C) 2007-2018 Free Software Foundation, Inc.
+ Copyright (C) 2007-2022 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -44,16 +44,21 @@ new_offsets (size_t n)
int
main ()
{
+#if HAVE_ICONV
static enum iconv_ilseq_handler handlers[] =
- { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
+ {
+ iconveh_error,
+ iconveh_question_mark,
+ iconveh_replacement_character,
+ iconveh_escape_sequence
+ };
size_t indirect;
size_t h;
size_t o;
size_t i;
-#if HAVE_ICONV
/* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
- ISO-8859-2, and UTF-8. */
+ ISO-8859-2, UTF-8, and with libiconv or glibc also GB18030. */
iconv_t cd_ascii_to_88591 = iconv_open ("ISO-8859-1", "ASCII");
iconv_t cd_88591_to_88592 = iconv_open ("ISO-8859-2", "ISO-8859-1");
iconv_t cd_88592_to_88591 = iconv_open ("ISO-8859-1", "ISO-8859-2");
@@ -63,6 +68,12 @@ main ()
iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2");
iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8");
iconv_t cd_utf7_to_utf8 = iconv_open ("UTF-8", "UTF-7");
+# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__)
+ iconv_t cd_ascii_to_gb18030 = iconv_open ("GB18030", "ASCII");
+ iconv_t cd_utf8_to_gb18030 = iconv_open ("GB18030", "UTF-8");
+ iconv_t cd_88591_to_gb18030 = iconv_open ("GB18030", "ISO-8859-1");
+ iconv_t cd_utf7_to_gb18030 = iconv_open ("GB18030", "UTF-7");
+# endif
iconveh_t cdeh_ascii_to_88591;
iconveh_t cdeh_ascii_to_88591_indirectly;
iconveh_t cdeh_88592_to_88591;
@@ -71,12 +82,21 @@ main ()
iconveh_t cdeh_88591_to_utf8;
iconveh_t cdeh_utf8_to_88591;
iconveh_t cdeh_utf7_to_utf8;
+# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__)
+ iconveh_t cdeh_ascii_to_gb18030;
+ iconveh_t cdeh_88591_to_gb18030;
+ iconveh_t cdeh_utf7_to_gb18030;
+# endif
ASSERT (cd_ascii_to_utf8 != (iconv_t)(-1));
ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));
ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));
ASSERT (cd_88592_to_utf8 != (iconv_t)(-1));
ASSERT (cd_utf8_to_88592 != (iconv_t)(-1));
+# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__)
+ ASSERT (cd_ascii_to_gb18030 != (iconv_t)(-1));
+ ASSERT (cd_utf8_to_gb18030 != (iconv_t)(-1));
+# endif
cdeh_ascii_to_88591.cd = cd_ascii_to_88591;
cdeh_ascii_to_88591.cd1 = cd_ascii_to_utf8;
@@ -110,6 +130,20 @@ main ()
cdeh_utf7_to_utf8.cd1 = cd_utf7_to_utf8;
cdeh_utf7_to_utf8.cd2 = (iconv_t)(-1);
+# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__)
+ cdeh_ascii_to_gb18030.cd = cd_ascii_to_gb18030;
+ cdeh_ascii_to_gb18030.cd1 = cd_ascii_to_utf8;
+ cdeh_ascii_to_gb18030.cd2 = cd_utf8_to_gb18030;
+
+ cdeh_88591_to_gb18030.cd = cd_88591_to_gb18030;
+ cdeh_88591_to_gb18030.cd1 = cd_88591_to_utf8;
+ cdeh_88591_to_gb18030.cd2 = cd_utf8_to_gb18030;
+
+ cdeh_utf7_to_gb18030.cd = cd_utf7_to_gb18030;
+ cdeh_utf7_to_gb18030.cd1 = cd_utf7_to_utf8;
+ cdeh_utf7_to_gb18030.cd2 = cd_utf8_to_gb18030;
+# endif
+
/* ------------------------ Test mem_cd_iconveh() ------------------------ */
/* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
@@ -175,6 +209,7 @@ main ()
free (offsets);
break;
case iconveh_question_mark:
+ case iconveh_replacement_character:
case iconveh_escape_sequence:
{
static const char expected[] = "Rafa? Maszkowski";
@@ -224,6 +259,7 @@ main ()
free (offsets);
break;
case iconveh_question_mark:
+ case iconveh_replacement_character:
{
static const char expected[] = "Rafa? Maszkowski";
ASSERT (retval == 0);
@@ -294,6 +330,41 @@ main ()
}
}
+# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__)
+ /* Test conversion from ISO-8859-1 to GB18030 with no errors. */
+ for (h = 0; h < SIZEOF (handlers); h++)
+ {
+ enum iconv_ilseq_handler handler = handlers[h];
+ static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
+ static const char expected[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118";
+ for (o = 0; o < 2; o++)
+ {
+ size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_cd_iconveh (input, strlen (input),
+ &cdeh_88591_to_gb18030,
+ handler,
+ offsets,
+ &result, &length);
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 37; i++)
+ ASSERT (offsets[i] == (i < 1 ? i :
+ i < 12 ? i + 3 :
+ i < 18 ? i + 6 :
+ i + 7));
+ ASSERT (offsets[37] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
+ }
+# endif
+
/* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
for (h = 0; h < SIZEOF (handlers); h++)
{
@@ -371,9 +442,87 @@ main ()
free (result);
}
break;
+ case iconveh_replacement_character:
+ {
+ static const char expected[] = "Rafa\357\277\275 Maszkowski";
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 16; i++)
+ ASSERT (offsets[i] == (i < 5 ? i : i + 2));
+ ASSERT (offsets[16] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
+ break;
+ }
+ }
+ }
+
+# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__)
+ /* Test conversion from ASCII to GB18030 with invalid input (EILSEQ). */
+ for (h = 0; h < SIZEOF (handlers); h++)
+ {
+ enum iconv_ilseq_handler handler = handlers[h];
+ static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
+ for (o = 0; o < 2; o++)
+ {
+ size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_cd_iconveh (input, strlen (input),
+ &cdeh_ascii_to_gb18030,
+ handler,
+ offsets,
+ &result, &length);
+ switch (handler)
+ {
+ case iconveh_error:
+ ASSERT (retval == -1 && errno == EILSEQ);
+ ASSERT (result == NULL);
+ if (o)
+ free (offsets);
+ break;
+ case iconveh_question_mark:
+ case iconveh_escape_sequence:
+ {
+ static const char expected[] = "Rafa? Maszkowski";
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 16; i++)
+ ASSERT (offsets[i] == i);
+ ASSERT (offsets[16] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
+ break;
+ case iconveh_replacement_character:
+ {
+ static const char expected[] = "Rafa\2041\2447 Maszkowski";
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 16; i++)
+ ASSERT (offsets[i] == (i < 5 ? i : i + 3));
+ ASSERT (offsets[16] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
+ break;
}
}
}
+# endif
/* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
for (h = 0; h < SIZEOF (handlers); h++)
@@ -399,6 +548,7 @@ main ()
free (offsets);
break;
case iconveh_question_mark:
+ case iconveh_replacement_character:
{
static const char expected[] = "Rafa? Maszkowski";
ASSERT (retval == 0);
@@ -496,6 +646,34 @@ main ()
free (result);
}
+# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__)
+ /* Test conversion from UTF-7 to GB18030 with EINVAL. */
+ for (h = 0; h < SIZEOF (handlers); h++)
+ {
+ enum iconv_ilseq_handler handler = handlers[h];
+ /* This is base64 encoded 0x54 0x32 0xD8 0x3F 0xD8 0x40. It would
+ convert to U+5432 U+D83F U+D840 but these are Unicode surrogates. */
+ static const char input[] = "+VDLYP9hA";
+ static const char expected1[] = "\337\305"; /* 吲 glibc */
+ static const char expected2[] = ""; /* libiconv */
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_cd_iconveh (input, 7,
+ &cdeh_utf7_to_gb18030,
+ handler,
+ NULL,
+ &result, &length);
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected1) || length == strlen (expected2));
+ ASSERT (result != NULL);
+ if (length == strlen (expected1))
+ ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
+ else
+ ASSERT (memcmp (result, expected2, strlen (expected2)) == 0);
+ free (result);
+ }
+# endif
+
/* Disabled on NetBSD, because NetBSD 5.0 iconv() is buggy: it converts
the input "+2D/YQNhB" to U+1FED8 U+3FD8 U+40D8. */
# if !(defined __NetBSD__ && !defined _LIBICONV_VERSION)
@@ -544,8 +722,98 @@ main ()
free (result);
}
break;
+ case iconveh_replacement_character:
+ {
+ /* glibc result */
+ static const char expected1[] = "\357\277\275\357\277\275\357\277\275\357\277\275\357\277\275";
+ /* libiconv <= 1.12 result */
+ static const char expected2[] = "\357\277\2752D/YQNhB";
+ /* libiconv >= 1.13 result */
+ static const char expected3[] = "\357\277\275\340\277\266\341\200\266";
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected1)
+ || length == strlen (expected2)
+ || length == strlen (expected3));
+ ASSERT (result != NULL);
+ if (length == strlen (expected1))
+ ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
+ else if (length == strlen (expected2))
+ ASSERT (memcmp (result, expected2, strlen (expected2)) == 0);
+ else
+ ASSERT (memcmp (result, expected3, strlen (expected3)) == 0);
+ free (result);
+ }
}
}
+
+# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__)
+ /* Test conversion from UTF-7 to GB18030 with EILSEQ. */
+ for (h = 0; h < SIZEOF (handlers); h++)
+ {
+ enum iconv_ilseq_handler handler = handlers[h];
+ /* This is base64 encoded 0xD8 0x3F 0xD8 0x40 0xD8 0x41. It would
+ convert to U+D83F U+D840 U+D841 but these are Unicode surrogates. */
+ static const char input[] = "+2D/YQNhB";
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_cd_iconveh (input, strlen (input),
+ &cdeh_utf7_to_gb18030,
+ handler,
+ NULL,
+ &result, &length);
+ switch (handler)
+ {
+ case iconveh_error:
+ ASSERT (retval == -1 && errno == EILSEQ);
+ ASSERT (result == NULL);
+ break;
+ case iconveh_question_mark:
+ case iconveh_escape_sequence:
+ {
+ /* glibc result */
+ static const char expected1[] = "?????";
+ /* libiconv <= 1.12 result */
+ static const char expected2[] = "?2D/YQNhB";
+ /* libiconv behaviour changed in version 1.13: the result is
+ '?' U+0FF6 U+1036; this is U+D83F U+D840 U+D841 shifted left
+ by 6 bits. */
+ static const char expected3[] = "?\2013\2030\2013\2114";
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected1)
+ || length == strlen (expected2)
+ || length == strlen (expected3));
+ ASSERT (result != NULL);
+ if (length == strlen (expected1))
+ ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
+ else if (length == strlen (expected2))
+ ASSERT (memcmp (result, expected2, strlen (expected2)) == 0
+ || memcmp (result, expected3, strlen (expected3)) == 0);
+ free (result);
+ }
+ break;
+ case iconveh_replacement_character:
+ {
+ /* glibc result */
+ static const char expected1[] = "\2041\2447\2041\2447\2041\2447\2041\2447\2041\2447";
+ /* libiconv <= 1.12 result */
+ static const char expected2[] = "\2041\24472D/YQNhB";
+ /* libiconv >= 1.13 result */
+ static const char expected3[] = "\2041\2447\2013\2030\2013\2114";
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected1)
+ || length == strlen (expected2)
+ || length == strlen (expected3));
+ ASSERT (result != NULL);
+ if (length == strlen (expected1))
+ ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
+ else if (length == strlen (expected2))
+ ASSERT (memcmp (result, expected2, strlen (expected2)) == 0
+ || memcmp (result, expected3, strlen (expected3)) == 0);
+ free (result);
+ }
+ }
+ }
+# endif
# endif
# endif
}
@@ -589,6 +857,7 @@ main ()
ASSERT (result == NULL && errno == EILSEQ);
break;
case iconveh_question_mark:
+ case iconveh_replacement_character:
case iconveh_escape_sequence:
{
static const char expected[] = "Rafa? Maszkowski";
@@ -619,6 +888,7 @@ main ()
ASSERT (result == NULL && errno == EILSEQ);
break;
case iconveh_question_mark:
+ case iconveh_replacement_character:
{
static const char expected[] = "Rafa? Maszkowski";
ASSERT (result != NULL);
@@ -652,6 +922,22 @@ main ()
free (result);
}
+# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__)
+ /* Test conversion from ISO-8859-1 to GB18030 with no errors. */
+ for (h = 0; h < SIZEOF (handlers); h++)
+ {
+ enum iconv_ilseq_handler handler = handlers[h];
+ static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
+ static const char expected[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118";
+ char *result = str_cd_iconveh (input,
+ &cdeh_88591_to_gb18030,
+ handler);
+ ASSERT (result != NULL);
+ ASSERT (strcmp (result, expected) == 0);
+ free (result);
+ }
+# endif
+
/* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
for (h = 0; h < SIZEOF (handlers); h++)
{
@@ -688,8 +974,51 @@ main ()
free (result);
}
break;
+ case iconveh_replacement_character:
+ {
+ static const char expected[] = "Rafa\357\277\275 Maszkowski";
+ ASSERT (result != NULL);
+ ASSERT (strcmp (result, expected) == 0);
+ free (result);
+ }
+ break;
+ }
+ }
+
+# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__)
+ /* Test conversion from ASCII to GB18030 with invalid input (EILSEQ). */
+ for (h = 0; h < SIZEOF (handlers); h++)
+ {
+ enum iconv_ilseq_handler handler = handlers[h];
+ static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
+ char *result = str_cd_iconveh (input,
+ &cdeh_ascii_to_gb18030,
+ handler);
+ switch (handler)
+ {
+ case iconveh_error:
+ ASSERT (result == NULL && errno == EILSEQ);
+ break;
+ case iconveh_question_mark:
+ case iconveh_escape_sequence:
+ {
+ static const char expected[] = "Rafa? Maszkowski";
+ ASSERT (result != NULL);
+ ASSERT (strcmp (result, expected) == 0);
+ free (result);
+ }
+ break;
+ case iconveh_replacement_character:
+ {
+ static const char expected[] = "Rafa\2041\2447 Maszkowski";
+ ASSERT (result != NULL);
+ ASSERT (strcmp (result, expected) == 0);
+ free (result);
+ }
+ break;
}
}
+# endif
/* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
for (h = 0; h < SIZEOF (handlers); h++)
@@ -705,6 +1034,7 @@ main ()
ASSERT (result == NULL && errno == EILSEQ);
break;
case iconveh_question_mark:
+ case iconveh_replacement_character:
{
static const char expected[] = "Costs: 27 ?";
ASSERT (result != NULL);
@@ -801,6 +1131,7 @@ main ()
free (offsets);
break;
case iconveh_question_mark:
+ case iconveh_replacement_character:
{
static const char expected[] = "Rafa? Maszkowski";
ASSERT (retval == 0);
@@ -870,6 +1201,41 @@ main ()
}
}
+# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__)
+ /* Test conversion from ISO-8859-1 to GB18030 with no errors. */
+ for (h = 0; h < SIZEOF (handlers); h++)
+ {
+ enum iconv_ilseq_handler handler = handlers[h];
+ static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
+ static const char expected[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118";
+ for (o = 0; o < 2; o++)
+ {
+ size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_iconveh (input, strlen (input),
+ "ISO-8859-1", "GB18030",
+ handler,
+ offsets,
+ &result, &length);
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 37; i++)
+ ASSERT (offsets[i] == (i < 1 ? i :
+ i < 12 ? i + 3 :
+ i < 18 ? i + 6 :
+ i + 7));
+ ASSERT (offsets[37] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
+ }
+# endif
+
/* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
for (h = 0; h < SIZEOF (handlers); h++)
{
@@ -931,6 +1297,7 @@ main ()
free (offsets);
break;
case iconveh_question_mark:
+ case iconveh_replacement_character:
{
static const char expected[] = "Rafa? Maszkowski";
ASSERT (retval == 0);
@@ -1023,6 +1390,7 @@ main ()
ASSERT (result == NULL && errno == EILSEQ);
break;
case iconveh_question_mark:
+ case iconveh_replacement_character:
{
static const char expected[] = "Rafa? Maszkowski";
ASSERT (result != NULL);
@@ -1053,6 +1421,20 @@ main ()
free (result);
}
+# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__)
+ /* Test conversion from ISO-8859-1 to GB18030 with no errors. */
+ for (h = 0; h < SIZEOF (handlers); h++)
+ {
+ enum iconv_ilseq_handler handler = handlers[h];
+ static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
+ static const char expected[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118";
+ char *result = str_iconveh (input, "ISO-8859-1", "GB18030", handler);
+ ASSERT (result != NULL);
+ ASSERT (strcmp (result, expected) == 0);
+ free (result);
+ }
+# endif
+
/* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
for (h = 0; h < SIZEOF (handlers); h++)
{
@@ -1077,6 +1459,7 @@ main ()
ASSERT (result == NULL && errno == EILSEQ);
break;
case iconveh_question_mark:
+ case iconveh_replacement_character:
{
static const char expected[] = "Costs: 27 ?";
ASSERT (result != NULL);
@@ -1106,6 +1489,14 @@ main ()
free (result);
}
+ /* -------------------------------- Done. -------------------------------- */
+
+ if (cd_ascii_to_88591 != (iconv_t)(-1))
+ iconv_close (cd_ascii_to_88591);
+ iconv_close (cd_ascii_to_utf8);
+ if (cd_utf7_to_utf8 != (iconv_t)(-1))
+ iconv_close (cd_utf7_to_utf8);
+
#endif
return 0;