summaryrefslogtreecommitdiff
path: root/tests/uninorm/test-decomposition.c
diff options
context:
space:
mode:
Diffstat (limited to 'tests/uninorm/test-decomposition.c')
-rw-r--r--tests/uninorm/test-decomposition.c220
1 files changed, 220 insertions, 0 deletions
diff --git a/tests/uninorm/test-decomposition.c b/tests/uninorm/test-decomposition.c
new file mode 100644
index 00000000..684dee45
--- /dev/null
+++ b/tests/uninorm/test-decomposition.c
@@ -0,0 +1,220 @@
+/* Test of decomposition of Unicode characters.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2009. */
+
+#include <config.h>
+
+#include "uninorm.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define ASSERT(expr) \
+ do \
+ { \
+ if (!(expr)) \
+ { \
+ fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
+ fflush (stderr); \
+ abort (); \
+ } \
+ } \
+ while (0)
+
+int
+main ()
+{
+ ucs4_t decomposed[UC_DECOMPOSITION_MAX_LENGTH];
+ int tag;
+ int ret;
+
+ /* SPACE */
+ ret = uc_decomposition (0x0020, &tag, decomposed);
+ ASSERT (ret == -1);
+
+ /* LATIN CAPITAL LETTER A WITH DIAERESIS */
+ ret = uc_decomposition (0x00C4, &tag, decomposed);
+ ASSERT (ret == 2);
+ ASSERT (tag == UC_DECOMP_CANONICAL);
+ ASSERT (decomposed[0] == 0x0041);
+ ASSERT (decomposed[1] == 0x0308);
+
+ /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
+ ret = uc_decomposition (0x01DE, &tag, decomposed);
+ ASSERT (ret == 2);
+ ASSERT (tag == UC_DECOMP_CANONICAL);
+ ASSERT (decomposed[0] == 0x00C4);
+ ASSERT (decomposed[1] == 0x0304);
+
+ /* GREEK DIALYTIKA AND PERISPOMENI */
+ ret = uc_decomposition (0x1FC1, &tag, decomposed);
+ ASSERT (ret == 2);
+ ASSERT (tag == UC_DECOMP_CANONICAL);
+ ASSERT (decomposed[0] == 0x00A8);
+ ASSERT (decomposed[1] == 0x0342);
+
+ /* SCRIPT SMALL L */
+ ret = uc_decomposition (0x2113, &tag, decomposed);
+ ASSERT (ret == 1);
+ ASSERT (tag == UC_DECOMP_FONT);
+ ASSERT (decomposed[0] == 0x006C);
+
+ /* NO-BREAK SPACE */
+ ret = uc_decomposition (0x00A0, &tag, decomposed);
+ ASSERT (ret == 1);
+ ASSERT (tag == UC_DECOMP_NOBREAK);
+ ASSERT (decomposed[0] == 0x0020);
+
+ /* ARABIC LETTER VEH INITIAL FORM */
+ ret = uc_decomposition (0xFB6C, &tag, decomposed);
+ ASSERT (ret == 1);
+ ASSERT (tag == UC_DECOMP_INITIAL);
+ ASSERT (decomposed[0] == 0x06A4);
+
+ /* ARABIC LETTER VEH MEDIAL FORM */
+ ret = uc_decomposition (0xFB6D, &tag, decomposed);
+ ASSERT (ret == 1);
+ ASSERT (tag == UC_DECOMP_MEDIAL);
+ ASSERT (decomposed[0] == 0x06A4);
+
+ /* ARABIC LETTER VEH FINAL FORM */
+ ret = uc_decomposition (0xFB6B, &tag, decomposed);
+ ASSERT (ret == 1);
+ ASSERT (tag == UC_DECOMP_FINAL);
+ ASSERT (decomposed[0] == 0x06A4);
+
+ /* ARABIC LETTER VEH ISOLATED FORM */
+ ret = uc_decomposition (0xFB6A, &tag, decomposed);
+ ASSERT (ret == 1);
+ ASSERT (tag == UC_DECOMP_ISOLATED);
+ ASSERT (decomposed[0] == 0x06A4);
+
+ /* CIRCLED NUMBER FIFTEEN */
+ ret = uc_decomposition (0x246E, &tag, decomposed);
+ ASSERT (ret == 2);
+ ASSERT (tag == UC_DECOMP_CIRCLE);
+ ASSERT (decomposed[0] == 0x0031);
+ ASSERT (decomposed[1] == 0x0035);
+
+ /* TRADE MARK SIGN */
+ ret = uc_decomposition (0x2122, &tag, decomposed);
+ ASSERT (ret == 2);
+ ASSERT (tag == UC_DECOMP_SUPER);
+ ASSERT (decomposed[0] == 0x0054);
+ ASSERT (decomposed[1] == 0x004D);
+
+ /* LATIN SUBSCRIPT SMALL LETTER I */
+ ret = uc_decomposition (0x1D62, &tag, decomposed);
+ ASSERT (ret == 1);
+ ASSERT (tag == UC_DECOMP_SUB);
+ ASSERT (decomposed[0] == 0x0069);
+
+ /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
+ ret = uc_decomposition (0xFE35, &tag, decomposed);
+ ASSERT (ret == 1);
+ ASSERT (tag == UC_DECOMP_VERTICAL);
+ ASSERT (decomposed[0] == 0x0028);
+
+ /* FULLWIDTH LATIN CAPITAL LETTER A */
+ ret = uc_decomposition (0xFF21, &tag, decomposed);
+ ASSERT (ret == 1);
+ ASSERT (tag == UC_DECOMP_WIDE);
+ ASSERT (decomposed[0] == 0x0041);
+
+ /* HALFWIDTH IDEOGRAPHIC COMMA */
+ ret = uc_decomposition (0xFF64, &tag, decomposed);
+ ASSERT (ret == 1);
+ ASSERT (tag == UC_DECOMP_NARROW);
+ ASSERT (decomposed[0] == 0x3001);
+
+ /* SMALL IDEOGRAPHIC COMMA */
+ ret = uc_decomposition (0xFE51, &tag, decomposed);
+ ASSERT (ret == 1);
+ ASSERT (tag == UC_DECOMP_SMALL);
+ ASSERT (decomposed[0] == 0x3001);
+
+ /* SQUARE MHZ */
+ ret = uc_decomposition (0x3392, &tag, decomposed);
+ ASSERT (ret == 3);
+ ASSERT (tag == UC_DECOMP_SQUARE);
+ ASSERT (decomposed[0] == 0x004D);
+ ASSERT (decomposed[1] == 0x0048);
+ ASSERT (decomposed[2] == 0x007A);
+
+ /* VULGAR FRACTION THREE EIGHTHS */
+ ret = uc_decomposition (0x215C, &tag, decomposed);
+ ASSERT (ret == 3);
+ ASSERT (tag == UC_DECOMP_FRACTION);
+ ASSERT (decomposed[0] == 0x0033);
+ ASSERT (decomposed[1] == 0x2044);
+ ASSERT (decomposed[2] == 0x0038);
+
+ /* MICRO SIGN */
+ ret = uc_decomposition (0x00B5, &tag, decomposed);
+ ASSERT (ret == 1);
+ ASSERT (tag == UC_DECOMP_COMPAT);
+ ASSERT (decomposed[0] == 0x03BC);
+
+ /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
+ ret = uc_decomposition (0xFDFA, &tag, decomposed);
+ ASSERT (ret == 18);
+ ASSERT (tag == UC_DECOMP_ISOLATED);
+ ASSERT (decomposed[0] == 0x0635);
+ ASSERT (decomposed[1] == 0x0644);
+ ASSERT (decomposed[2] == 0x0649);
+ ASSERT (decomposed[3] == 0x0020);
+ ASSERT (decomposed[4] == 0x0627);
+ ASSERT (decomposed[5] == 0x0644);
+ ASSERT (decomposed[6] == 0x0644);
+ ASSERT (decomposed[7] == 0x0647);
+ ASSERT (decomposed[8] == 0x0020);
+ ASSERT (decomposed[9] == 0x0639);
+ ASSERT (decomposed[10] == 0x0644);
+ ASSERT (decomposed[11] == 0x064A);
+ ASSERT (decomposed[12] == 0x0647);
+ ASSERT (decomposed[13] == 0x0020);
+ ASSERT (decomposed[14] == 0x0648);
+ ASSERT (decomposed[15] == 0x0633);
+ ASSERT (decomposed[16] == 0x0644);
+ ASSERT (decomposed[17] == 0x0645);
+
+ /* HANGUL SYLLABLE GEUL */
+ ret = uc_decomposition (0xAE00, &tag, decomposed);
+ /* See the clarification at <http://www.unicode.org/versions/Unicode5.1.0/>,
+ section "Clarification of Hangul Jamo Handling". */
+#if 1
+ ASSERT (ret == 2);
+ ASSERT (tag == UC_DECOMP_CANONICAL);
+ ASSERT (decomposed[0] == 0xADF8);
+ ASSERT (decomposed[1] == 0x11AF);
+#else
+ ASSERT (ret == 3);
+ ASSERT (tag == UC_DECOMP_CANONICAL);
+ ASSERT (decomposed[0] == 0x1100);
+ ASSERT (decomposed[1] == 0x1173);
+ ASSERT (decomposed[2] == 0x11AF);
+#endif
+
+ /* HANGUL SYLLABLE GEU */
+ ret = uc_decomposition (0xADF8, &tag, decomposed);
+ ASSERT (ret == 2);
+ ASSERT (tag == UC_DECOMP_CANONICAL);
+ ASSERT (decomposed[0] == 0x1100);
+ ASSERT (decomposed[1] == 0x1173);
+
+ return 0;
+}