diff options
Diffstat (limited to 'tests/test-striconveh.c')
| -rw-r--r-- | tests/test-striconveh.c | 399 | 
1 files changed, 395 insertions, 4 deletions
| diff --git a/tests/test-striconveh.c b/tests/test-striconveh.c index cb21a4c3..781aa525 100644 --- a/tests/test-striconveh.c +++ b/tests/test-striconveh.c @@ -1,5 +1,5 @@  /* Test of character set conversion with error handling. -   Copyright (C) 2007-2018 Free Software Foundation, Inc. +   Copyright (C) 2007-2022 Free Software Foundation, Inc.     This program is free software: you can redistribute it and/or modify     it under the terms of the GNU General Public License as published by @@ -44,16 +44,21 @@ new_offsets (size_t n)  int  main ()  { +#if HAVE_ICONV    static enum iconv_ilseq_handler handlers[] = -    { iconveh_error, iconveh_question_mark, iconveh_escape_sequence }; +    { +      iconveh_error, +      iconveh_question_mark, +      iconveh_replacement_character, +      iconveh_escape_sequence +    };    size_t indirect;    size_t h;    size_t o;    size_t i; -#if HAVE_ICONV    /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1, -     ISO-8859-2, and UTF-8.  */ +     ISO-8859-2, UTF-8, and with libiconv or glibc also GB18030.  */    iconv_t cd_ascii_to_88591 = iconv_open ("ISO-8859-1", "ASCII");    iconv_t cd_88591_to_88592 = iconv_open ("ISO-8859-2", "ISO-8859-1");    iconv_t cd_88592_to_88591 = iconv_open ("ISO-8859-1", "ISO-8859-2"); @@ -63,6 +68,12 @@ main ()    iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2");    iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8");    iconv_t cd_utf7_to_utf8 = iconv_open ("UTF-8", "UTF-7"); +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) +  iconv_t cd_ascii_to_gb18030 = iconv_open ("GB18030", "ASCII"); +  iconv_t cd_utf8_to_gb18030 = iconv_open ("GB18030", "UTF-8"); +  iconv_t cd_88591_to_gb18030 = iconv_open ("GB18030", "ISO-8859-1"); +  iconv_t cd_utf7_to_gb18030 = iconv_open ("GB18030", "UTF-7"); +# endif    iconveh_t cdeh_ascii_to_88591;    iconveh_t cdeh_ascii_to_88591_indirectly;    iconveh_t cdeh_88592_to_88591; @@ -71,12 +82,21 @@ main ()    iconveh_t cdeh_88591_to_utf8;    iconveh_t cdeh_utf8_to_88591;    iconveh_t cdeh_utf7_to_utf8; +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) +  iconveh_t cdeh_ascii_to_gb18030; +  iconveh_t cdeh_88591_to_gb18030; +  iconveh_t cdeh_utf7_to_gb18030; +# endif    ASSERT (cd_ascii_to_utf8 != (iconv_t)(-1));    ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));    ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));    ASSERT (cd_88592_to_utf8 != (iconv_t)(-1));    ASSERT (cd_utf8_to_88592 != (iconv_t)(-1)); +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) +  ASSERT (cd_ascii_to_gb18030 != (iconv_t)(-1)); +  ASSERT (cd_utf8_to_gb18030 != (iconv_t)(-1)); +# endif    cdeh_ascii_to_88591.cd = cd_ascii_to_88591;    cdeh_ascii_to_88591.cd1 = cd_ascii_to_utf8; @@ -110,6 +130,20 @@ main ()    cdeh_utf7_to_utf8.cd1 = cd_utf7_to_utf8;    cdeh_utf7_to_utf8.cd2 = (iconv_t)(-1); +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) +  cdeh_ascii_to_gb18030.cd = cd_ascii_to_gb18030; +  cdeh_ascii_to_gb18030.cd1 = cd_ascii_to_utf8; +  cdeh_ascii_to_gb18030.cd2 = cd_utf8_to_gb18030; + +  cdeh_88591_to_gb18030.cd = cd_88591_to_gb18030; +  cdeh_88591_to_gb18030.cd1 = cd_88591_to_utf8; +  cdeh_88591_to_gb18030.cd2 = cd_utf8_to_gb18030; + +  cdeh_utf7_to_gb18030.cd = cd_utf7_to_gb18030; +  cdeh_utf7_to_gb18030.cd1 = cd_utf7_to_utf8; +  cdeh_utf7_to_gb18030.cd2 = cd_utf8_to_gb18030; +# endif +    /* ------------------------ Test mem_cd_iconveh() ------------------------ */    /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors.  */ @@ -175,6 +209,7 @@ main ()                      free (offsets);                    break;                  case iconveh_question_mark: +                case iconveh_replacement_character:                  case iconveh_escape_sequence:                    {                      static const char expected[] = "Rafa? Maszkowski"; @@ -224,6 +259,7 @@ main ()                      free (offsets);                    break;                  case iconveh_question_mark: +                case iconveh_replacement_character:                    {                      static const char expected[] = "Rafa? Maszkowski";                      ASSERT (retval == 0); @@ -294,6 +330,41 @@ main ()          }      } +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) +  /* Test conversion from ISO-8859-1 to GB18030 with no errors.  */ +  for (h = 0; h < SIZEOF (handlers); h++) +    { +      enum iconv_ilseq_handler handler = handlers[h]; +      static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; +      static const char expected[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118"; +      for (o = 0; o < 2; o++) +        { +          size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); +          char *result = NULL; +          size_t length = 0; +          int retval = mem_cd_iconveh (input, strlen (input), +                                       &cdeh_88591_to_gb18030, +                                       handler, +                                       offsets, +                                       &result, &length); +          ASSERT (retval == 0); +          ASSERT (length == strlen (expected)); +          ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); +          if (o) +            { +              for (i = 0; i < 37; i++) +                ASSERT (offsets[i] == (i < 1 ? i : +                                       i < 12 ? i + 3 : +                                       i < 18 ? i + 6 : +                                       i + 7)); +              ASSERT (offsets[37] == MAGIC); +              free (offsets); +            } +          free (result); +        } +    } +# endif +    /* Test conversion from UTF-8 to ISO-8859-1 with no errors.  */    for (h = 0; h < SIZEOF (handlers); h++)      { @@ -371,9 +442,87 @@ main ()                  free (result);                }                break; +            case iconveh_replacement_character: +              { +                static const char expected[] = "Rafa\357\277\275 Maszkowski"; +                ASSERT (retval == 0); +                ASSERT (length == strlen (expected)); +                ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); +                if (o) +                  { +                    for (i = 0; i < 16; i++) +                      ASSERT (offsets[i] == (i < 5 ? i : i + 2)); +                    ASSERT (offsets[16] == MAGIC); +                    free (offsets); +                  } +                free (result); +              } +              break; +            } +        } +    } + +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) +  /* Test conversion from ASCII to GB18030 with invalid input (EILSEQ).  */ +  for (h = 0; h < SIZEOF (handlers); h++) +    { +      enum iconv_ilseq_handler handler = handlers[h]; +      static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */ +      for (o = 0; o < 2; o++) +        { +          size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); +          char *result = NULL; +          size_t length = 0; +          int retval = mem_cd_iconveh (input, strlen (input), +                                       &cdeh_ascii_to_gb18030, +                                       handler, +                                       offsets, +                                       &result, &length); +          switch (handler) +            { +            case iconveh_error: +              ASSERT (retval == -1 && errno == EILSEQ); +              ASSERT (result == NULL); +              if (o) +                free (offsets); +              break; +            case iconveh_question_mark: +            case iconveh_escape_sequence: +              { +                static const char expected[] = "Rafa? Maszkowski"; +                ASSERT (retval == 0); +                ASSERT (length == strlen (expected)); +                ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); +                if (o) +                  { +                    for (i = 0; i < 16; i++) +                      ASSERT (offsets[i] == i); +                    ASSERT (offsets[16] == MAGIC); +                    free (offsets); +                  } +                free (result); +              } +              break; +            case iconveh_replacement_character: +              { +                static const char expected[] = "Rafa\2041\2447 Maszkowski"; +                ASSERT (retval == 0); +                ASSERT (length == strlen (expected)); +                ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); +                if (o) +                  { +                    for (i = 0; i < 16; i++) +                      ASSERT (offsets[i] == (i < 5 ? i : i + 3)); +                    ASSERT (offsets[16] == MAGIC); +                    free (offsets); +                  } +                free (result); +              } +              break;              }          }      } +# endif    /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ.  */    for (h = 0; h < SIZEOF (handlers); h++) @@ -399,6 +548,7 @@ main ()                  free (offsets);                break;              case iconveh_question_mark: +            case iconveh_replacement_character:                {                  static const char expected[] = "Rafa? Maszkowski";                  ASSERT (retval == 0); @@ -496,6 +646,34 @@ main ()            free (result);          } +#  if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) +      /* Test conversion from UTF-7 to GB18030 with EINVAL.  */ +      for (h = 0; h < SIZEOF (handlers); h++) +        { +          enum iconv_ilseq_handler handler = handlers[h]; +          /* This is base64 encoded 0x54 0x32 0xD8 0x3F 0xD8 0x40.  It would +             convert to U+5432 U+D83F U+D840 but these are Unicode surrogates.  */ +          static const char input[] = "+VDLYP9hA"; +          static const char expected1[] = "\337\305"; /* 吲 glibc */ +          static const char expected2[] = ""; /* libiconv */ +          char *result = NULL; +          size_t length = 0; +          int retval = mem_cd_iconveh (input, 7, +                                       &cdeh_utf7_to_gb18030, +                                       handler, +                                       NULL, +                                       &result, &length); +          ASSERT (retval == 0); +          ASSERT (length == strlen (expected1) || length == strlen (expected2)); +          ASSERT (result != NULL); +          if (length == strlen (expected1)) +            ASSERT (memcmp (result, expected1, strlen (expected1)) == 0); +          else +            ASSERT (memcmp (result, expected2, strlen (expected2)) == 0); +          free (result); +        } +#  endif +        /* Disabled on NetBSD, because NetBSD 5.0 iconv() is buggy: it converts           the input "+2D/YQNhB" to U+1FED8 U+3FD8 U+40D8.  */  #  if !(defined __NetBSD__ && !defined _LIBICONV_VERSION) @@ -544,8 +722,98 @@ main ()                  free (result);                }                break; +            case iconveh_replacement_character: +              { +                /* glibc result */ +                static const char expected1[] = "\357\277\275\357\277\275\357\277\275\357\277\275\357\277\275"; +                /* libiconv <= 1.12 result */ +                static const char expected2[] = "\357\277\2752D/YQNhB"; +                /* libiconv >= 1.13 result */ +                static const char expected3[] = "\357\277\275\340\277\266\341\200\266"; +                ASSERT (retval == 0); +                ASSERT (length == strlen (expected1) +                        || length == strlen (expected2) +                        || length == strlen (expected3)); +                ASSERT (result != NULL); +                if (length == strlen (expected1)) +                  ASSERT (memcmp (result, expected1, strlen (expected1)) == 0); +                else if (length == strlen (expected2)) +                  ASSERT (memcmp (result, expected2, strlen (expected2)) == 0); +                else +                  ASSERT (memcmp (result, expected3, strlen (expected3)) == 0); +                free (result); +              }              }          } + +#   if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) +      /* Test conversion from UTF-7 to GB18030 with EILSEQ.  */ +      for (h = 0; h < SIZEOF (handlers); h++) +        { +          enum iconv_ilseq_handler handler = handlers[h]; +          /* This is base64 encoded 0xD8 0x3F 0xD8 0x40 0xD8 0x41.  It would +             convert to U+D83F U+D840 U+D841 but these are Unicode surrogates.  */ +          static const char input[] = "+2D/YQNhB"; +          char *result = NULL; +          size_t length = 0; +          int retval = mem_cd_iconveh (input, strlen (input), +                                       &cdeh_utf7_to_gb18030, +                                       handler, +                                       NULL, +                                       &result, &length); +          switch (handler) +            { +            case iconveh_error: +              ASSERT (retval == -1 && errno == EILSEQ); +              ASSERT (result == NULL); +              break; +            case iconveh_question_mark: +            case iconveh_escape_sequence: +              { +                /* glibc result */ +                static const char expected1[] = "?????"; +                /* libiconv <= 1.12 result */ +                static const char expected2[] = "?2D/YQNhB"; +                /* libiconv behaviour changed in version 1.13: the result is +                   '?' U+0FF6 U+1036; this is U+D83F U+D840 U+D841 shifted left +                   by 6 bits.  */ +                static const char expected3[] = "?\2013\2030\2013\2114"; +                ASSERT (retval == 0); +                ASSERT (length == strlen (expected1) +                        || length == strlen (expected2) +                        || length == strlen (expected3)); +                ASSERT (result != NULL); +                if (length == strlen (expected1)) +                  ASSERT (memcmp (result, expected1, strlen (expected1)) == 0); +                else if (length == strlen (expected2)) +                  ASSERT (memcmp (result, expected2, strlen (expected2)) == 0 +                          || memcmp (result, expected3, strlen (expected3)) == 0); +                free (result); +              } +              break; +            case iconveh_replacement_character: +              { +                /* glibc result */ +                static const char expected1[] = "\2041\2447\2041\2447\2041\2447\2041\2447\2041\2447"; +                /* libiconv <= 1.12 result */ +                static const char expected2[] = "\2041\24472D/YQNhB"; +                /* libiconv >= 1.13 result */ +                static const char expected3[] = "\2041\2447\2013\2030\2013\2114"; +                ASSERT (retval == 0); +                ASSERT (length == strlen (expected1) +                        || length == strlen (expected2) +                        || length == strlen (expected3)); +                ASSERT (result != NULL); +                if (length == strlen (expected1)) +                  ASSERT (memcmp (result, expected1, strlen (expected1)) == 0); +                else if (length == strlen (expected2)) +                  ASSERT (memcmp (result, expected2, strlen (expected2)) == 0 +                          || memcmp (result, expected3, strlen (expected3)) == 0); +                free (result); +              } +            } +        } +#   endif  #  endif  # endif      } @@ -589,6 +857,7 @@ main ()                ASSERT (result == NULL && errno == EILSEQ);                break;              case iconveh_question_mark: +            case iconveh_replacement_character:              case iconveh_escape_sequence:                {                  static const char expected[] = "Rafa? Maszkowski"; @@ -619,6 +888,7 @@ main ()                ASSERT (result == NULL && errno == EILSEQ);                break;              case iconveh_question_mark: +            case iconveh_replacement_character:                {                  static const char expected[] = "Rafa? Maszkowski";                  ASSERT (result != NULL); @@ -652,6 +922,22 @@ main ()        free (result);      } +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) +  /* Test conversion from ISO-8859-1 to GB18030 with no errors.  */ +  for (h = 0; h < SIZEOF (handlers); h++) +    { +      enum iconv_ilseq_handler handler = handlers[h]; +      static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; +      static const char expected[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118"; +      char *result = str_cd_iconveh (input, +                                     &cdeh_88591_to_gb18030, +                                     handler); +      ASSERT (result != NULL); +      ASSERT (strcmp (result, expected) == 0); +      free (result); +    } +# endif +    /* Test conversion from UTF-8 to ISO-8859-1 with no errors.  */    for (h = 0; h < SIZEOF (handlers); h++)      { @@ -688,8 +974,51 @@ main ()              free (result);            }            break; +        case iconveh_replacement_character: +          { +            static const char expected[] = "Rafa\357\277\275 Maszkowski"; +            ASSERT (result != NULL); +            ASSERT (strcmp (result, expected) == 0); +            free (result); +          } +          break; +        } +    } + +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) +  /* Test conversion from ASCII to GB18030 with invalid input (EILSEQ).  */ +  for (h = 0; h < SIZEOF (handlers); h++) +    { +      enum iconv_ilseq_handler handler = handlers[h]; +      static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */ +      char *result = str_cd_iconveh (input, +                                     &cdeh_ascii_to_gb18030, +                                     handler); +      switch (handler) +        { +        case iconveh_error: +          ASSERT (result == NULL && errno == EILSEQ); +          break; +        case iconveh_question_mark: +        case iconveh_escape_sequence: +          { +            static const char expected[] = "Rafa? Maszkowski"; +            ASSERT (result != NULL); +            ASSERT (strcmp (result, expected) == 0); +            free (result); +          } +          break; +        case iconveh_replacement_character: +          { +            static const char expected[] = "Rafa\2041\2447 Maszkowski"; +            ASSERT (result != NULL); +            ASSERT (strcmp (result, expected) == 0); +            free (result); +          } +          break;          }      } +# endif    /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ.  */    for (h = 0; h < SIZEOF (handlers); h++) @@ -705,6 +1034,7 @@ main ()            ASSERT (result == NULL && errno == EILSEQ);            break;          case iconveh_question_mark: +        case iconveh_replacement_character:            {              static const char expected[] = "Costs: 27 ?";              ASSERT (result != NULL); @@ -801,6 +1131,7 @@ main ()                  free (offsets);                break;              case iconveh_question_mark: +            case iconveh_replacement_character:                {                  static const char expected[] = "Rafa? Maszkowski";                  ASSERT (retval == 0); @@ -870,6 +1201,41 @@ main ()          }      } +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) +  /* Test conversion from ISO-8859-1 to GB18030 with no errors.  */ +  for (h = 0; h < SIZEOF (handlers); h++) +    { +      enum iconv_ilseq_handler handler = handlers[h]; +      static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; +      static const char expected[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118"; +      for (o = 0; o < 2; o++) +        { +          size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); +          char *result = NULL; +          size_t length = 0; +          int retval = mem_iconveh (input, strlen (input), +                                    "ISO-8859-1", "GB18030", +                                    handler, +                                    offsets, +                                    &result, &length); +          ASSERT (retval == 0); +          ASSERT (length == strlen (expected)); +          ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); +          if (o) +            { +              for (i = 0; i < 37; i++) +                ASSERT (offsets[i] == (i < 1 ? i : +                                       i < 12 ? i + 3 : +                                       i < 18 ? i + 6 : +                                       i + 7)); +              ASSERT (offsets[37] == MAGIC); +              free (offsets); +            } +          free (result); +        } +    } +# endif +    /* Test conversion from UTF-8 to ISO-8859-1 with no errors.  */    for (h = 0; h < SIZEOF (handlers); h++)      { @@ -931,6 +1297,7 @@ main ()                  free (offsets);                break;              case iconveh_question_mark: +            case iconveh_replacement_character:                {                  static const char expected[] = "Rafa? Maszkowski";                  ASSERT (retval == 0); @@ -1023,6 +1390,7 @@ main ()            ASSERT (result == NULL && errno == EILSEQ);            break;          case iconveh_question_mark: +        case iconveh_replacement_character:            {              static const char expected[] = "Rafa? Maszkowski";              ASSERT (result != NULL); @@ -1053,6 +1421,20 @@ main ()        free (result);      } +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) +  /* Test conversion from ISO-8859-1 to GB18030 with no errors.  */ +  for (h = 0; h < SIZEOF (handlers); h++) +    { +      enum iconv_ilseq_handler handler = handlers[h]; +      static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; +      static const char expected[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118"; +      char *result = str_iconveh (input, "ISO-8859-1", "GB18030", handler); +      ASSERT (result != NULL); +      ASSERT (strcmp (result, expected) == 0); +      free (result); +    } +# endif +    /* Test conversion from UTF-8 to ISO-8859-1 with no errors.  */    for (h = 0; h < SIZEOF (handlers); h++)      { @@ -1077,6 +1459,7 @@ main ()            ASSERT (result == NULL && errno == EILSEQ);            break;          case iconveh_question_mark: +        case iconveh_replacement_character:            {              static const char expected[] = "Costs: 27 ?";              ASSERT (result != NULL); @@ -1106,6 +1489,14 @@ main ()        free (result);      } +  /* -------------------------------- Done. -------------------------------- */ + +  if (cd_ascii_to_88591 != (iconv_t)(-1)) +    iconv_close (cd_ascii_to_88591); +  iconv_close (cd_ascii_to_utf8); +  if (cd_utf7_to_utf8 != (iconv_t)(-1)) +    iconv_close (cd_utf7_to_utf8); +  #endif    return 0; | 
