From fa095a4504cbe668e4244547e2c141597bea4ecf Mon Sep 17 00:00:00 2001 From: Andreas Rottmann Date: Mon, 14 Sep 2009 12:32:44 +0200 Subject: Imported Upstream version 0.9.1 --- tests/unicase/test-casecmp.h | 69 +++ tests/unicase/test-cased.c | 135 ++++ tests/unicase/test-ignorable.c | 246 ++++++++ tests/unicase/test-is-cased.h | 58 ++ tests/unicase/test-is-casefolded.h | 43 ++ tests/unicase/test-is-lowercase.h | 95 +++ tests/unicase/test-is-titlecase.h | 95 +++ tests/unicase/test-is-uppercase.h | 95 +++ tests/unicase/test-locale-language.c | 56 ++ tests/unicase/test-locale-language.sh | 30 + tests/unicase/test-mapping-part1.h | 45 ++ tests/unicase/test-mapping-part2.h | 37 ++ tests/unicase/test-predicate-part1.h | 45 ++ tests/unicase/test-predicate-part2.h | 37 ++ tests/unicase/test-u16-casecmp.c | 336 ++++++++++ tests/unicase/test-u16-casecoll.c | 54 ++ tests/unicase/test-u16-casefold.c | 214 +++++++ tests/unicase/test-u16-is-cased.c | 51 ++ tests/unicase/test-u16-is-casefolded.c | 65 ++ tests/unicase/test-u16-is-lowercase.c | 93 +++ tests/unicase/test-u16-is-titlecase.c | 51 ++ tests/unicase/test-u16-is-uppercase.c | 93 +++ tests/unicase/test-u16-tolower.c | 251 ++++++++ tests/unicase/test-u16-totitle.c | 211 +++++++ tests/unicase/test-u16-toupper.c | 211 +++++++ tests/unicase/test-u32-casecmp.c | 336 ++++++++++ tests/unicase/test-u32-casecoll.c | 54 ++ tests/unicase/test-u32-casefold.c | 214 +++++++ tests/unicase/test-u32-is-cased.c | 51 ++ tests/unicase/test-u32-is-casefolded.c | 65 ++ tests/unicase/test-u32-is-lowercase.c | 93 +++ tests/unicase/test-u32-is-titlecase.c | 51 ++ tests/unicase/test-u32-is-uppercase.c | 93 +++ tests/unicase/test-u32-tolower.c | 251 ++++++++ tests/unicase/test-u32-totitle.c | 211 +++++++ tests/unicase/test-u32-toupper.c | 211 +++++++ tests/unicase/test-u8-casecmp.c | 345 +++++++++++ tests/unicase/test-u8-casecoll.c | 54 ++ tests/unicase/test-u8-casefold.c | 220 +++++++ tests/unicase/test-u8-is-cased.c | 51 ++ tests/unicase/test-u8-is-casefolded.c | 65 ++ tests/unicase/test-u8-is-lowercase.c | 93 +++ tests/unicase/test-u8-is-titlecase.c | 51 ++ tests/unicase/test-u8-is-uppercase.c | 93 +++ tests/unicase/test-u8-tolower.c | 257 ++++++++ tests/unicase/test-u8-totitle.c | 217 +++++++ tests/unicase/test-u8-toupper.c | 217 +++++++ tests/unicase/test-uc_tolower.c | 1047 +++++++++++++++++++++++++++++++ tests/unicase/test-uc_totitle.c | 1055 ++++++++++++++++++++++++++++++++ tests/unicase/test-uc_toupper.c | 1055 ++++++++++++++++++++++++++++++++ tests/unicase/test-ulc-casecmp.c | 408 ++++++++++++ tests/unicase/test-ulc-casecmp1.sh | 15 + tests/unicase/test-ulc-casecmp2.sh | 15 + tests/unicase/test-ulc-casecoll.c | 59 ++ tests/unicase/test-ulc-casecoll1.sh | 15 + tests/unicase/test-ulc-casecoll2.sh | 15 + 56 files changed, 9693 insertions(+) create mode 100644 tests/unicase/test-casecmp.h create mode 100644 tests/unicase/test-cased.c create mode 100644 tests/unicase/test-ignorable.c create mode 100644 tests/unicase/test-is-cased.h create mode 100644 tests/unicase/test-is-casefolded.h create mode 100644 tests/unicase/test-is-lowercase.h create mode 100644 tests/unicase/test-is-titlecase.h create mode 100644 tests/unicase/test-is-uppercase.h create mode 100644 tests/unicase/test-locale-language.c create mode 100755 tests/unicase/test-locale-language.sh create mode 100644 tests/unicase/test-mapping-part1.h create mode 100644 tests/unicase/test-mapping-part2.h create mode 100644 tests/unicase/test-predicate-part1.h create mode 100644 tests/unicase/test-predicate-part2.h create mode 100644 tests/unicase/test-u16-casecmp.c create mode 100644 tests/unicase/test-u16-casecoll.c create mode 100644 tests/unicase/test-u16-casefold.c create mode 100644 tests/unicase/test-u16-is-cased.c create mode 100644 tests/unicase/test-u16-is-casefolded.c create mode 100644 tests/unicase/test-u16-is-lowercase.c create mode 100644 tests/unicase/test-u16-is-titlecase.c create mode 100644 tests/unicase/test-u16-is-uppercase.c create mode 100644 tests/unicase/test-u16-tolower.c create mode 100644 tests/unicase/test-u16-totitle.c create mode 100644 tests/unicase/test-u16-toupper.c create mode 100644 tests/unicase/test-u32-casecmp.c create mode 100644 tests/unicase/test-u32-casecoll.c create mode 100644 tests/unicase/test-u32-casefold.c create mode 100644 tests/unicase/test-u32-is-cased.c create mode 100644 tests/unicase/test-u32-is-casefolded.c create mode 100644 tests/unicase/test-u32-is-lowercase.c create mode 100644 tests/unicase/test-u32-is-titlecase.c create mode 100644 tests/unicase/test-u32-is-uppercase.c create mode 100644 tests/unicase/test-u32-tolower.c create mode 100644 tests/unicase/test-u32-totitle.c create mode 100644 tests/unicase/test-u32-toupper.c create mode 100644 tests/unicase/test-u8-casecmp.c create mode 100644 tests/unicase/test-u8-casecoll.c create mode 100644 tests/unicase/test-u8-casefold.c create mode 100644 tests/unicase/test-u8-is-cased.c create mode 100644 tests/unicase/test-u8-is-casefolded.c create mode 100644 tests/unicase/test-u8-is-lowercase.c create mode 100644 tests/unicase/test-u8-is-titlecase.c create mode 100644 tests/unicase/test-u8-is-uppercase.c create mode 100644 tests/unicase/test-u8-tolower.c create mode 100644 tests/unicase/test-u8-totitle.c create mode 100644 tests/unicase/test-u8-toupper.c create mode 100644 tests/unicase/test-uc_tolower.c create mode 100644 tests/unicase/test-uc_totitle.c create mode 100644 tests/unicase/test-uc_toupper.c create mode 100644 tests/unicase/test-ulc-casecmp.c create mode 100755 tests/unicase/test-ulc-casecmp1.sh create mode 100755 tests/unicase/test-ulc-casecmp2.sh create mode 100644 tests/unicase/test-ulc-casecoll.c create mode 100755 tests/unicase/test-ulc-casecoll1.sh create mode 100755 tests/unicase/test-ulc-casecoll2.sh (limited to 'tests/unicase') diff --git a/tests/unicase/test-casecmp.h b/tests/unicase/test-casecmp.h new file mode 100644 index 00000000..d057f898 --- /dev/null +++ b/tests/unicase/test-casecmp.h @@ -0,0 +1,69 @@ +/* Test of case and normalization insensitive comparison of Unicode strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +static void +test_ascii (int (*my_casecmp) (const UNIT *, size_t, const UNIT *, size_t, const char *, uninorm_t, int *), + uninorm_t nf) +{ + /* Empty string. */ + { + int cmp; + + ASSERT (my_casecmp (NULL, 0, NULL, 0, NULL, nf, &cmp) == 0); + ASSERT (cmp == 0); + } + { + static const UNIT input[] = { 'x', 'y' }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), NULL, 0, NULL, nf, &cmp) == 0); + ASSERT (cmp == 1); + + ASSERT (my_casecmp (NULL, 0, input, SIZEOF (input), NULL, nf, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input, SIZEOF (input), input, SIZEOF (input), NULL, nf, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Normal lexicographic order. */ + { + static const UNIT input1[] = { 'A', 'm', 'e', 'r', 'i', 'c', 'a' }; + static const UNIT input2[] = { 'A', 'm', 'i', 'g', 'o' }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, nf, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input1, SIZEOF (input1), NULL, nf, &cmp) == 0); + ASSERT (cmp == 1); + } + + /* Shorter and longer strings. */ + { + static const UNIT input1[] = { 'R', 'e', 'a', 'g', 'a', 'n' }; + static const UNIT input2[] = { 'R', 'e', 'a', 'g', 'a', 'n', 'o', 'm', 'i', 'c', 's' }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, nf, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input1, SIZEOF (input1), NULL, nf, &cmp) == 0); + ASSERT (cmp == 1); + } +} diff --git a/tests/unicase/test-cased.c b/tests/unicase/test-cased.c new file mode 100644 index 00000000..37af3d22 --- /dev/null +++ b/tests/unicase/test-cased.c @@ -0,0 +1,135 @@ +/* DO NOT EDIT! GENERATED AUTOMATICALLY! */ +/* Test the Unicode character type functions. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include "test-predicate-part1.h" + + { 0x0041, 0x005A }, + { 0x0061, 0x007A }, + { 0x00AA, 0x00AA }, + { 0x00B5, 0x00B5 }, + { 0x00BA, 0x00BA }, + { 0x00C0, 0x00D6 }, + { 0x00D8, 0x00F6 }, + { 0x00F8, 0x01BA }, + { 0x01BC, 0x01BF }, + { 0x01C4, 0x0293 }, + { 0x0295, 0x02B8 }, + { 0x02C0, 0x02C1 }, + { 0x02E0, 0x02E4 }, + { 0x0345, 0x0345 }, + { 0x0370, 0x0373 }, + { 0x0376, 0x0377 }, + { 0x037A, 0x037D }, + { 0x0386, 0x0386 }, + { 0x0388, 0x038A }, + { 0x038C, 0x038C }, + { 0x038E, 0x03A1 }, + { 0x03A3, 0x03F5 }, + { 0x03F7, 0x0481 }, + { 0x048A, 0x0523 }, + { 0x0531, 0x0556 }, + { 0x0561, 0x0587 }, + { 0x10A0, 0x10C5 }, + { 0x1D00, 0x1DBF }, + { 0x1E00, 0x1F15 }, + { 0x1F18, 0x1F1D }, + { 0x1F20, 0x1F45 }, + { 0x1F48, 0x1F4D }, + { 0x1F50, 0x1F57 }, + { 0x1F59, 0x1F59 }, + { 0x1F5B, 0x1F5B }, + { 0x1F5D, 0x1F5D }, + { 0x1F5F, 0x1F7D }, + { 0x1F80, 0x1FB4 }, + { 0x1FB6, 0x1FBC }, + { 0x1FBE, 0x1FBE }, + { 0x1FC2, 0x1FC4 }, + { 0x1FC6, 0x1FCC }, + { 0x1FD0, 0x1FD3 }, + { 0x1FD6, 0x1FDB }, + { 0x1FE0, 0x1FEC }, + { 0x1FF2, 0x1FF4 }, + { 0x1FF6, 0x1FFC }, + { 0x2071, 0x2071 }, + { 0x207F, 0x207F }, + { 0x2090, 0x2094 }, + { 0x2102, 0x2102 }, + { 0x2107, 0x2107 }, + { 0x210A, 0x2113 }, + { 0x2115, 0x2115 }, + { 0x2119, 0x211D }, + { 0x2124, 0x2124 }, + { 0x2126, 0x2126 }, + { 0x2128, 0x2128 }, + { 0x212A, 0x212D }, + { 0x212F, 0x2134 }, + { 0x2139, 0x2139 }, + { 0x213C, 0x213F }, + { 0x2145, 0x2149 }, + { 0x214E, 0x214E }, + { 0x2160, 0x217F }, + { 0x2183, 0x2184 }, + { 0x24B6, 0x24E9 }, + { 0x2C00, 0x2C2E }, + { 0x2C30, 0x2C5E }, + { 0x2C60, 0x2C6F }, + { 0x2C71, 0x2C7D }, + { 0x2C80, 0x2CE4 }, + { 0x2D00, 0x2D25 }, + { 0xA640, 0xA65F }, + { 0xA662, 0xA66D }, + { 0xA680, 0xA697 }, + { 0xA722, 0xA787 }, + { 0xA78B, 0xA78C }, + { 0xFB00, 0xFB06 }, + { 0xFB13, 0xFB17 }, + { 0xFF21, 0xFF3A }, + { 0xFF41, 0xFF5A }, + { 0x10400, 0x1044F }, + { 0x1D400, 0x1D454 }, + { 0x1D456, 0x1D49C }, + { 0x1D49E, 0x1D49F }, + { 0x1D4A2, 0x1D4A2 }, + { 0x1D4A5, 0x1D4A6 }, + { 0x1D4A9, 0x1D4AC }, + { 0x1D4AE, 0x1D4B9 }, + { 0x1D4BB, 0x1D4BB }, + { 0x1D4BD, 0x1D4C3 }, + { 0x1D4C5, 0x1D505 }, + { 0x1D507, 0x1D50A }, + { 0x1D50D, 0x1D514 }, + { 0x1D516, 0x1D51C }, + { 0x1D51E, 0x1D539 }, + { 0x1D53B, 0x1D53E }, + { 0x1D540, 0x1D544 }, + { 0x1D546, 0x1D546 }, + { 0x1D54A, 0x1D550 }, + { 0x1D552, 0x1D6A5 }, + { 0x1D6A8, 0x1D6C0 }, + { 0x1D6C2, 0x1D6DA }, + { 0x1D6DC, 0x1D6FA }, + { 0x1D6FC, 0x1D714 }, + { 0x1D716, 0x1D734 }, + { 0x1D736, 0x1D74E }, + { 0x1D750, 0x1D76E }, + { 0x1D770, 0x1D788 }, + { 0x1D78A, 0x1D7A8 }, + { 0x1D7AA, 0x1D7C2 }, + { 0x1D7C4, 0x1D7CB } + +#define PREDICATE(c) uc_is_cased (c) +#include "test-predicate-part2.h" diff --git a/tests/unicase/test-ignorable.c b/tests/unicase/test-ignorable.c new file mode 100644 index 00000000..94f809e9 --- /dev/null +++ b/tests/unicase/test-ignorable.c @@ -0,0 +1,246 @@ +/* DO NOT EDIT! GENERATED AUTOMATICALLY! */ +/* Test the Unicode character type functions. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include "test-predicate-part1.h" + + { 0x0027, 0x0027 }, + { 0x002E, 0x002E }, + { 0x003A, 0x003A }, + { 0x005E, 0x005E }, + { 0x0060, 0x0060 }, + { 0x00A8, 0x00A8 }, + { 0x00AD, 0x00AD }, + { 0x00AF, 0x00AF }, + { 0x00B4, 0x00B4 }, + { 0x00B7, 0x00B8 }, + { 0x02B9, 0x02BF }, + { 0x02C2, 0x02DF }, + { 0x02E5, 0x0344 }, + { 0x0346, 0x036F }, + { 0x0374, 0x0375 }, + { 0x0384, 0x0385 }, + { 0x0387, 0x0387 }, + { 0x0483, 0x0489 }, + { 0x0559, 0x0559 }, + { 0x0591, 0x05BD }, + { 0x05BF, 0x05BF }, + { 0x05C1, 0x05C2 }, + { 0x05C4, 0x05C5 }, + { 0x05C7, 0x05C7 }, + { 0x05F4, 0x05F4 }, + { 0x0600, 0x0603 }, + { 0x0610, 0x061A }, + { 0x0640, 0x0640 }, + { 0x064B, 0x065E }, + { 0x0670, 0x0670 }, + { 0x06D6, 0x06E8 }, + { 0x06EA, 0x06ED }, + { 0x070F, 0x070F }, + { 0x0711, 0x0711 }, + { 0x0730, 0x074A }, + { 0x07A6, 0x07B0 }, + { 0x07EB, 0x07F5 }, + { 0x07FA, 0x07FA }, + { 0x0901, 0x0902 }, + { 0x093C, 0x093C }, + { 0x0941, 0x0948 }, + { 0x094D, 0x094D }, + { 0x0951, 0x0954 }, + { 0x0962, 0x0963 }, + { 0x0971, 0x0971 }, + { 0x0981, 0x0981 }, + { 0x09BC, 0x09BC }, + { 0x09C1, 0x09C4 }, + { 0x09CD, 0x09CD }, + { 0x09E2, 0x09E3 }, + { 0x0A01, 0x0A02 }, + { 0x0A3C, 0x0A3C }, + { 0x0A41, 0x0A42 }, + { 0x0A47, 0x0A48 }, + { 0x0A4B, 0x0A4D }, + { 0x0A51, 0x0A51 }, + { 0x0A70, 0x0A71 }, + { 0x0A75, 0x0A75 }, + { 0x0A81, 0x0A82 }, + { 0x0ABC, 0x0ABC }, + { 0x0AC1, 0x0AC5 }, + { 0x0AC7, 0x0AC8 }, + { 0x0ACD, 0x0ACD }, + { 0x0AE2, 0x0AE3 }, + { 0x0B01, 0x0B01 }, + { 0x0B3C, 0x0B3C }, + { 0x0B3F, 0x0B3F }, + { 0x0B41, 0x0B44 }, + { 0x0B4D, 0x0B4D }, + { 0x0B56, 0x0B56 }, + { 0x0B62, 0x0B63 }, + { 0x0B82, 0x0B82 }, + { 0x0BC0, 0x0BC0 }, + { 0x0BCD, 0x0BCD }, + { 0x0C3E, 0x0C40 }, + { 0x0C46, 0x0C48 }, + { 0x0C4A, 0x0C4D }, + { 0x0C55, 0x0C56 }, + { 0x0C62, 0x0C63 }, + { 0x0CBC, 0x0CBC }, + { 0x0CBF, 0x0CBF }, + { 0x0CC6, 0x0CC6 }, + { 0x0CCC, 0x0CCD }, + { 0x0CE2, 0x0CE3 }, + { 0x0D41, 0x0D44 }, + { 0x0D4D, 0x0D4D }, + { 0x0D62, 0x0D63 }, + { 0x0DCA, 0x0DCA }, + { 0x0DD2, 0x0DD4 }, + { 0x0DD6, 0x0DD6 }, + { 0x0E31, 0x0E31 }, + { 0x0E34, 0x0E3A }, + { 0x0E46, 0x0E4E }, + { 0x0EB1, 0x0EB1 }, + { 0x0EB4, 0x0EB9 }, + { 0x0EBB, 0x0EBC }, + { 0x0EC6, 0x0EC6 }, + { 0x0EC8, 0x0ECD }, + { 0x0F18, 0x0F19 }, + { 0x0F35, 0x0F35 }, + { 0x0F37, 0x0F37 }, + { 0x0F39, 0x0F39 }, + { 0x0F71, 0x0F7E }, + { 0x0F80, 0x0F84 }, + { 0x0F86, 0x0F87 }, + { 0x0F90, 0x0F97 }, + { 0x0F99, 0x0FBC }, + { 0x0FC6, 0x0FC6 }, + { 0x102D, 0x1030 }, + { 0x1032, 0x1037 }, + { 0x1039, 0x103A }, + { 0x103D, 0x103E }, + { 0x1058, 0x1059 }, + { 0x105E, 0x1060 }, + { 0x1071, 0x1074 }, + { 0x1082, 0x1082 }, + { 0x1085, 0x1086 }, + { 0x108D, 0x108D }, + { 0x10FC, 0x10FC }, + { 0x135F, 0x135F }, + { 0x1712, 0x1714 }, + { 0x1732, 0x1734 }, + { 0x1752, 0x1753 }, + { 0x1772, 0x1773 }, + { 0x17B4, 0x17B5 }, + { 0x17B7, 0x17BD }, + { 0x17C6, 0x17C6 }, + { 0x17C9, 0x17D3 }, + { 0x17D7, 0x17D7 }, + { 0x17DD, 0x17DD }, + { 0x180B, 0x180D }, + { 0x1843, 0x1843 }, + { 0x18A9, 0x18A9 }, + { 0x1920, 0x1922 }, + { 0x1927, 0x1928 }, + { 0x1932, 0x1932 }, + { 0x1939, 0x193B }, + { 0x1A17, 0x1A18 }, + { 0x1B00, 0x1B03 }, + { 0x1B34, 0x1B34 }, + { 0x1B36, 0x1B3A }, + { 0x1B3C, 0x1B3C }, + { 0x1B42, 0x1B42 }, + { 0x1B6B, 0x1B73 }, + { 0x1B80, 0x1B81 }, + { 0x1BA2, 0x1BA5 }, + { 0x1BA8, 0x1BA9 }, + { 0x1C2C, 0x1C33 }, + { 0x1C36, 0x1C37 }, + { 0x1C78, 0x1C7D }, + { 0x1DC0, 0x1DE6 }, + { 0x1DFE, 0x1DFF }, + { 0x1FBD, 0x1FBD }, + { 0x1FBF, 0x1FC1 }, + { 0x1FCD, 0x1FCF }, + { 0x1FDD, 0x1FDF }, + { 0x1FED, 0x1FEF }, + { 0x1FFD, 0x1FFE }, + { 0x200B, 0x200F }, + { 0x2018, 0x2019 }, + { 0x2024, 0x2024 }, + { 0x2027, 0x2027 }, + { 0x202A, 0x202E }, + { 0x2060, 0x2064 }, + { 0x206A, 0x206F }, + { 0x20D0, 0x20F0 }, + { 0x2D6F, 0x2D6F }, + { 0x2DE0, 0x2DFF }, + { 0x2E2F, 0x2E2F }, + { 0x3005, 0x3005 }, + { 0x302A, 0x302F }, + { 0x3031, 0x3035 }, + { 0x303B, 0x303B }, + { 0x3099, 0x309E }, + { 0x30FC, 0x30FE }, + { 0xA015, 0xA015 }, + { 0xA60C, 0xA60C }, + { 0xA66F, 0xA672 }, + { 0xA67C, 0xA67D }, + { 0xA67F, 0xA67F }, + { 0xA700, 0xA721 }, + { 0xA788, 0xA78A }, + { 0xA802, 0xA802 }, + { 0xA806, 0xA806 }, + { 0xA80B, 0xA80B }, + { 0xA825, 0xA826 }, + { 0xA8C4, 0xA8C4 }, + { 0xA926, 0xA92D }, + { 0xA947, 0xA951 }, + { 0xAA29, 0xAA2E }, + { 0xAA31, 0xAA32 }, + { 0xAA35, 0xAA36 }, + { 0xAA43, 0xAA43 }, + { 0xAA4C, 0xAA4C }, + { 0xFB1E, 0xFB1E }, + { 0xFE00, 0xFE0F }, + { 0xFE13, 0xFE13 }, + { 0xFE20, 0xFE26 }, + { 0xFE52, 0xFE52 }, + { 0xFE55, 0xFE55 }, + { 0xFEFF, 0xFEFF }, + { 0xFF07, 0xFF07 }, + { 0xFF0E, 0xFF0E }, + { 0xFF1A, 0xFF1A }, + { 0xFF3E, 0xFF3E }, + { 0xFF40, 0xFF40 }, + { 0xFF70, 0xFF70 }, + { 0xFF9E, 0xFF9F }, + { 0xFFE3, 0xFFE3 }, + { 0xFFF9, 0xFFFB }, + { 0x101FD, 0x101FD }, + { 0x10A01, 0x10A03 }, + { 0x10A05, 0x10A06 }, + { 0x10A0C, 0x10A0F }, + { 0x10A38, 0x10A3A }, + { 0x10A3F, 0x10A3F }, + { 0x1D167, 0x1D169 }, + { 0x1D173, 0x1D182 }, + { 0x1D185, 0x1D18B }, + { 0x1D1AA, 0x1D1AD }, + { 0x1D242, 0x1D244 }, + { 0xE0001, 0xE0001 }, + { 0xE0020, 0xE007F }, + { 0xE0100, 0xE01EF } + +#define PREDICATE(c) uc_is_case_ignorable (c) +#include "test-predicate-part2.h" diff --git a/tests/unicase/test-is-cased.h b/tests/unicase/test-is-cased.h new file mode 100644 index 00000000..474b8a45 --- /dev/null +++ b/tests/unicase/test-is-cased.h @@ -0,0 +1,58 @@ +/* Test of test whether case matters for a Unicode string. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +static void +test_ascii (int (*my_is) (const UNIT *, size_t, const char *, bool *)) +{ + /* Test cases from Unicode 5.0, chapter 3. */ + { + static const UNIT input[] = { 'A', 'B', 'C' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const UNIT input[] = { 'a', 'b', 'c' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const UNIT input[] = { 'a', 'B', 'c' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const UNIT input[] = { 'a', 'B', 'c', '1', '2', '3' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const UNIT input[] = { '1', '2', '3' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } +} diff --git a/tests/unicase/test-is-casefolded.h b/tests/unicase/test-is-casefolded.h new file mode 100644 index 00000000..99638c6c --- /dev/null +++ b/tests/unicase/test-is-casefolded.h @@ -0,0 +1,43 @@ +/* Test of test whether a Unicode string is already case-folded. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +static void +test_ascii (int (*my_is) (const UNIT *, size_t, const char *, bool *)) +{ + { + static const UNIT input[] = { 'J', 'O', 'H', 'N', ' ', 'S', 'M', 'I', 'T', 'H' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const UNIT input[] = { 'J', 'o', 'h', 'n', ' ', 'S', 'm', 'i', 't', 'h' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const UNIT input[] = { 'j', 'o', 'h', 'n', ' ', 's', 'm', 'i', 't', 'h' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } +} diff --git a/tests/unicase/test-is-lowercase.h b/tests/unicase/test-is-lowercase.h new file mode 100644 index 00000000..cc37471b --- /dev/null +++ b/tests/unicase/test-is-lowercase.h @@ -0,0 +1,95 @@ +/* Test of test whether a Unicode string is entirely lower case. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +static void +test_ascii (int (*my_is) (const UNIT *, size_t, const char *, bool *)) +{ + /* Test cases from Unicode 5.0, chapter 3. */ + { + static const UNIT input[] = { 'a' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const UNIT input[] = { 'J', 'O', 'H', 'N', ' ', 'S', 'M', 'I', 'T', 'H' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const UNIT input[] = { 'J', 'o', 'h', 'n', ' ', 'S', 'm', 'i', 't', 'h' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const UNIT input[] = { 'j', 'o', 'h', 'n', ' ', 's', 'm', 'i', 't', 'h' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const UNIT input[] = { 'a', '2' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const UNIT input[] = { '3' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + + /* Test cases from Unicode 5.1.0. */ + { + static const UNIT input[] = { 'H', 'H' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const UNIT input[] = { 'h', 'h' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const UNIT input[] = { 'H', 'h' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const UNIT input[] = { 'h', 'H' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } +} diff --git a/tests/unicase/test-is-titlecase.h b/tests/unicase/test-is-titlecase.h new file mode 100644 index 00000000..b36ababa --- /dev/null +++ b/tests/unicase/test-is-titlecase.h @@ -0,0 +1,95 @@ +/* Test of test whether a Unicode string is entirely title case. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +static void +test_ascii (int (*my_is) (const UNIT *, size_t, const char *, bool *)) +{ + /* Test cases from Unicode 5.0, chapter 3. */ + { + static const UNIT input[] = { 'A' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const UNIT input[] = { 'J', 'O', 'H', 'N', ' ', 'S', 'M', 'I', 'T', 'H' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const UNIT input[] = { 'J', 'o', 'h', 'n', ' ', 'S', 'm', 'i', 't', 'h' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const UNIT input[] = { 'j', 'o', 'h', 'n', ' ', 's', 'm', 'i', 't', 'h' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const UNIT input[] = { 'A', '2' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const UNIT input[] = { '3' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + + /* Test cases from Unicode 5.1.0. */ + { + static const UNIT input[] = { 'H', 'H' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const UNIT input[] = { 'h', 'h' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const UNIT input[] = { 'H', 'h' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const UNIT input[] = { 'h', 'H' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } +} diff --git a/tests/unicase/test-is-uppercase.h b/tests/unicase/test-is-uppercase.h new file mode 100644 index 00000000..d1b7ca1e --- /dev/null +++ b/tests/unicase/test-is-uppercase.h @@ -0,0 +1,95 @@ +/* Test of test whether a Unicode string is entirely upper case. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +static void +test_ascii (int (*my_is) (const UNIT *, size_t, const char *, bool *)) +{ + /* Test cases from Unicode 5.0, chapter 3. */ + { + static const UNIT input[] = { 'A' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const UNIT input[] = { 'J', 'O', 'H', 'N', ' ', 'S', 'M', 'I', 'T', 'H' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const UNIT input[] = { 'J', 'o', 'h', 'n', ' ', 'S', 'm', 'i', 't', 'h' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const UNIT input[] = { 'j', 'o', 'h', 'n', ' ', 's', 'm', 'i', 't', 'h' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const UNIT input[] = { 'A', '2' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const UNIT input[] = { '3' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + + /* Test cases from Unicode 5.1.0. */ + { + static const UNIT input[] = { 'H', 'H' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const UNIT input[] = { 'h', 'h' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const UNIT input[] = { 'H', 'h' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const UNIT input[] = { 'h', 'H' }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } +} diff --git a/tests/unicase/test-locale-language.c b/tests/unicase/test-locale-language.c new file mode 100644 index 00000000..e8fe1815 --- /dev/null +++ b/tests/unicase/test-locale-language.c @@ -0,0 +1,56 @@ +/* Test of language code determination. + Copyright (C) 2007-2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include +#include +#include + +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +int +main (int argc, char *argv[]) +{ + const char *expected; + const char *language; + + /* configure should already have checked that the locale is supported. */ + if (setlocale (LC_ALL, "") == NULL) + return 1; + + expected = argv[1]; + + language = uc_locale_language (); + ASSERT (strcmp (language, expected) == 0); + + return 0; +} diff --git a/tests/unicase/test-locale-language.sh b/tests/unicase/test-locale-language.sh new file mode 100755 index 00000000..0a04a0da --- /dev/null +++ b/tests/unicase/test-locale-language.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +LC_ALL=C ./test-locale-language${EXEEXT} '' || exit 1 + +: ${LOCALE_FR=fr_FR} +if test $LOCALE_FR_UTF8 != none; then + LC_ALL=$LOCALE_FR_UTF8 ./test-locale-language${EXEEXT} fr || exit 1 +fi + +: ${LOCALE_FR_UTF8=fr_FR.UTF-8} +if test $LOCALE_FR_UTF8 != none; then + LC_ALL=$LOCALE_FR_UTF8 ./test-locale-language${EXEEXT} fr || exit 1 +fi + +: ${LOCALE_JA=ja_JP} +if test $LOCALE_JA != none; then + LC_ALL=$LOCALE_JA ./test-locale-language${EXEEXT} ja || exit 1 +fi + +: ${LOCALE_TR_UTF8=tr_TR.UTF-8} +if test $LOCALE_TR_UTF8 != none; then + LC_ALL=$LOCALE_TR_UTF8 ./test-locale-language${EXEEXT} tr || exit 1 +fi + +: ${LOCALE_ZH_CN=zh_CN.GB18030} +if test $LOCALE_ZH_CN != none; then + LC_ALL=$LOCALE_ZH_CN ./test-locale-language${EXEEXT} zh || exit 1 +fi + +exit 0 diff --git a/tests/unicase/test-mapping-part1.h b/tests/unicase/test-mapping-part1.h new file mode 100644 index 00000000..4b7ac8ec --- /dev/null +++ b/tests/unicase/test-mapping-part1.h @@ -0,0 +1,45 @@ +/* Test of single character case mapping functions. + Copyright (C) 2007-2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "unicase.h" + +#include +#include +#include + +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed for c = 0x%04X\n", \ + __FILE__, __LINE__, c); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) +#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) + +/* Pair of Unicode characters. */ +typedef struct { ucs4_t ch; ucs4_t value; } pair_t; + +/* Characters and their mapping values, ignoring no-op mappings, in increasing + order. */ +static const pair_t mapping[] = + { diff --git a/tests/unicase/test-mapping-part2.h b/tests/unicase/test-mapping-part2.h new file mode 100644 index 00000000..ef43cdde --- /dev/null +++ b/tests/unicase/test-mapping-part2.h @@ -0,0 +1,37 @@ +/* Test of single character case mapping functions. + Copyright (C) 2007-2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + + }; + +int +main () +{ + unsigned int c; + size_t i; + + c = 0; + for (i = 0; i < SIZEOF (mapping); i++) + { + for (; c < mapping[i].ch; c++) + ASSERT (MAP (c) == c); + ASSERT (MAP (c) == mapping[i].value); + c++; + } + for (; c < 0x110000; c++) + ASSERT (MAP (c) == c); + + return 0; +} diff --git a/tests/unicase/test-predicate-part1.h b/tests/unicase/test-predicate-part1.h new file mode 100644 index 00000000..76d804e1 --- /dev/null +++ b/tests/unicase/test-predicate-part1.h @@ -0,0 +1,45 @@ +/* Test the Unicode character type functions. + Copyright (C) 2007-2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "unicase/caseprop.h" + +#include +#include +#include + +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed for c = 0x%04X\n", \ + __FILE__, __LINE__, c); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) +#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) + +/* Interval of Unicode characters. */ +typedef struct { ucs4_t start; ucs4_t end; } interval_t; + +/* Set of Unicode characters, described as list of intervals, in increasing + order. */ +static const interval_t set[] = + { diff --git a/tests/unicase/test-predicate-part2.h b/tests/unicase/test-predicate-part2.h new file mode 100644 index 00000000..60b7c246 --- /dev/null +++ b/tests/unicase/test-predicate-part2.h @@ -0,0 +1,37 @@ +/* Test the Unicode character type functions. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + + }; + +int +main () +{ + unsigned int c; + size_t i; + + c = 0; + for (i = 0; i < SIZEOF (set); i++) + { + for (; c < set[i].start; c++) + ASSERT (!PREDICATE (c)); + for (; c <= set[i].end; c++) + ASSERT (PREDICATE (c)); + } + for (; c < 0x110000; c++) + ASSERT (!PREDICATE (c)); + + return 0; +} diff --git a/tests/unicase/test-u16-casecmp.c b/tests/unicase/test-u16-casecmp.c new file mode 100644 index 00000000..f079bbf8 --- /dev/null +++ b/tests/unicase/test-u16-casecmp.c @@ -0,0 +1,336 @@ +/* Test of case and normalization insensitive comparison of UTF-16 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint16_t +#include "test-casecmp.h" +#undef UNIT + +static void +test_nonascii (int (*my_casecmp) (const uint16_t *, size_t, const uint16_t *, size_t, const char *, uninorm_t, int *)) +{ + /* Normalization effects. */ + { + static const uint16_t input1[] = { 'H', 0x00F6, 'h', 'l', 'e' }; + static const uint16_t input2[] = { 'H', 'O', 0x0308, 'h', 'L', 'e' }; + static const uint16_t input3[] = { 'H', 0x00F6, 'h', 'l', 'e', 'n' }; + static const uint16_t input4[] = { 'H', 'O', 0x0308, 'h', 'L', 'e', 'n' }; + static const uint16_t input5[] = { 'H', 'u', 'r', 'z' }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input1, SIZEOF (input1), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input3, SIZEOF (input3), input4, SIZEOF (input4), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input4, SIZEOF (input4), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input4, SIZEOF (input4), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input5, SIZEOF (input5), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input5, SIZEOF (input5), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + } + { /* LATIN CAPITAL LETTER A WITH DIAERESIS */ + static const uint16_t input1[] = { 0x00C4 }; + static const uint16_t input2[] = { 0x0041, 0x0308 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */ + static const uint16_t input1[] = { 0x01DE }; + static const uint16_t input2[] = { 0x0041, 0x0308, 0x0304 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* GREEK DIALYTIKA AND PERISPOMENI */ + static const uint16_t input1[] = { 0x1FC1 }; + static const uint16_t input2[] = { 0x00A8, 0x0342 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* HANGUL SYLLABLE GEUL */ + static const uint16_t input1[] = { 0xAE00 }; + static const uint16_t input2[] = { 0xADF8, 0x11AF }; + static const uint16_t input3[] = { 0x1100, 0x1173, 0x11AF }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* HANGUL SYLLABLE GEU */ + static const uint16_t input1[] = { 0xADF8 }; + static const uint16_t input2[] = { 0x1100, 0x1173 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Simple string. */ + { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ + static const uint16_t input1[] = + { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', + 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + static const uint16_t input2[] = + { 'g', 'r', 0x00FC, 0x0073, 0x0073, ' ', 'g', 'o', 't', 't', '.', ' ', + 0x0437, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + static const uint16_t input3[] = + { 'G', 'R', 0x00DC, 0x0053, 0x0053, ' ', 'G', 'O', 'T', 'T', '.', ' ', + 0x0417, 0x0414, 0x0420, 0x0410, 0x0412, 0x0421, 0x0422, 0x0412, 0x0423, + 0x0419, 0x0422, 0x0415, '!', ' ', + 'X', '=', '(', '-', 'B', 0x00B1, 'S', 'Q', 'R', 'T', '(', 'B', 0x00B2, + '-', '4', 'A', 'C', ')', ')', '/', '(', '2', 'A', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Case mapping can increase the number of Unicode characters. */ + { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */ + static const uint16_t input1[] = { 0x0149 }; + static const uint16_t input2[] = { 0x02BC, 0x006E }; + static const uint16_t input3[] = { 0x02BC, 0x004E }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */ + static const uint16_t input1[] = { 0x0390 }; + static const uint16_t input2[] = { 0x03B9, 0x0308, 0x0301 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Turkish letters i İ ı I */ + { /* LATIN CAPITAL LETTER I */ + static const uint16_t input[] = { 0x0049 }; + static const uint16_t casefolded[] = { 0x0069 }; + static const uint16_t casefolded_tr[] = { 0x0131 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* LATIN SMALL LETTER I */ + static const uint16_t input[] = { 0x0069 }; + static const uint16_t casefolded[] = { 0x0049 }; + static const uint16_t casefolded_tr[] = { 0x0130 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ + static const uint16_t input[] = { 0x0130 }; + static const uint16_t casefolded[] = { 0x0069, 0x0307 }; + static const uint16_t casefolded_tr[] = { 0x0069 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* LATIN SMALL LETTER DOTLESS I */ + static const uint16_t input[] = { 0x0131 }; + static const uint16_t casefolded[] = { 0x0049 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 1); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* "topkapı" */ + static const uint16_t input[] = + { 0x0054, 0x004F, 0x0050, 0x004B, 0x0041, 0x0050, 0x0049 }; + static const uint16_t casefolded[] = + { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "heiß" */ + static const uint16_t input1[] = { 0x0068, 0x0065, 0x0069, 0x00DF }; + static const uint16_t input2[] = { 0x0068, 0x0065, 0x0069, 0x0073, 0x0073 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Case mappings for some characters can depend on the surrounding characters. */ + { /* "περισσότερες πληροφορίες" */ + static const uint16_t input1[] = + { + 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4, + 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7, + 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2 + }; + static const uint16_t input2[] = + { + 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4, + 0x03B5, 0x03C1, 0x03B5, 0x03C3, 0x0020, 0x03C0, 0x03BB, 0x03B7, + 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C3 + }; + static const uint16_t input3[] = + { + 0x03A0, 0x0395, 0x03A1, 0x0399, 0x03A3, 0x03A3, 0x038C, 0x03A4, + 0x0395, 0x03A1, 0x0395, 0x03A3, 0x0020, 0x03A0, 0x039B, 0x0397, + 0x03A1, 0x039F, 0x03A6, 0x039F, 0x03A1, 0x038A, 0x0395, 0x03A3 + }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Case mapping can require subsequent normalization. */ + { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */ + static const uint16_t input[] = { 0x01F0, 0x0323 }; + static const uint16_t casefolded[] = { 0x006A, 0x030C, 0x0323 }; + static const uint16_t casefolded_decomposed[] = { 0x006A, 0x0323, 0x030C }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_decomposed, SIZEOF (casefolded_decomposed), NULL, NULL, &cmp) == 0); + ASSERT (cmp != 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_decomposed, SIZEOF (casefolded_decomposed), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } +} + +int +main () +{ + test_ascii (u16_casecmp, UNINORM_NFD); + test_nonascii (u16_casecmp); + + return 0; +} diff --git a/tests/unicase/test-u16-casecoll.c b/tests/unicase/test-u16-casecoll.c new file mode 100644 index 00000000..5428cf04 --- /dev/null +++ b/tests/unicase/test-u16-casecoll.c @@ -0,0 +1,54 @@ +/* Test of locale dependent, case and normalization insensitive comparison of + UTF-16 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint16_t +#include "test-casecmp.h" +#undef UNIT + +int +main () +{ + /* In the "C" locale, strcoll is equivalent to strcmp, therefore u8_casecoll + on ASCII strings should behave like strcasecmp. */ + test_ascii (u16_casecoll, UNINORM_NFC); + + return 0; +} diff --git a/tests/unicase/test-u16-casefold.c b/tests/unicase/test-u16-casefold.c new file mode 100644 index 00000000..3f4f633a --- /dev/null +++ b/tests/unicase/test-u16-casefold.c @@ -0,0 +1,214 @@ +/* Test of casefolding mapping for UTF-16 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +check (const uint16_t *input, size_t input_length, + const char *iso639_language, uninorm_t nf, + const uint16_t *expected, size_t expected_length) +{ + size_t length; + uint16_t *result; + + /* Test return conventions with resultbuf == NULL. */ + result = u16_casefold (input, input_length, iso639_language, nf, NULL, &length); + if (!(result != NULL)) + return 1; + if (!(length == expected_length)) + return 2; + if (!(u16_cmp (result, expected, expected_length) == 0)) + return 3; + free (result); + + /* Test return conventions with resultbuf too small. */ + if (expected_length > 0) + { + uint16_t *preallocated; + + length = expected_length - 1; + preallocated = (uint16_t *) malloc (length * sizeof (uint16_t)); + result = u16_casefold (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 4; + if (!(result != preallocated)) + return 5; + if (!(length == expected_length)) + return 6; + if (!(u16_cmp (result, expected, expected_length) == 0)) + return 7; + free (result); + free (preallocated); + } + + /* Test return conventions with resultbuf large enough. */ + { + uint16_t *preallocated; + + length = expected_length; + preallocated = (uint16_t *) malloc (length * sizeof (uint16_t)); + result = u16_casefold (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 8; + if (!(preallocated == NULL || result == preallocated)) + return 9; + if (!(length == expected_length)) + return 10; + if (!(u16_cmp (result, expected, expected_length) == 0)) + return 11; + free (preallocated); + } + + return 0; +} + +int +main () +{ + { /* Empty string. */ + ASSERT (check (NULL, 0, NULL, NULL, NULL, 0) == 0); + ASSERT (check (NULL, 0, NULL, UNINORM_NFC, NULL, 0) == 0); + } + + /* Simple string. */ + { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ + static const uint16_t input[] = + { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', + 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + static const uint16_t casefolded[] = + { 'g', 'r', 0x00FC, 0x0073, 0x0073, ' ', 'g', 'o', 't', 't', '.', ' ', + 0x0437, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + } + + /* Case mapping can increase the number of Unicode characters. */ + { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */ + static const uint16_t input[] = { 0x0149 }; + static const uint16_t casefolded[] = { 0x02BC, 0x006E }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + } + { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */ + static const uint16_t input[] = { 0x0390 }; + static const uint16_t casefolded[] = { 0x03B9, 0x0308, 0x0301 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + } + + /* Turkish letters i İ ı I */ + { /* LATIN CAPITAL LETTER I */ + static const uint16_t input[] = { 0x0049 }; + static const uint16_t casefolded[] = { 0x0069 }; + static const uint16_t casefolded_tr[] = { 0x0131 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded_tr, SIZEOF (casefolded_tr)) == 0); + } + { /* LATIN SMALL LETTER I */ + static const uint16_t input[] = { 0x0069 }; + static const uint16_t casefolded[] = { 0x0069 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded, SIZEOF (casefolded)) == 0); + } + { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ + static const uint16_t input[] = { 0x0130 }; + static const uint16_t casefolded[] = { 0x0069, 0x0307 }; + static const uint16_t casefolded_tr[] = { 0x0069 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded_tr, SIZEOF (casefolded_tr)) == 0); + } + { /* LATIN SMALL LETTER DOTLESS I */ + static const uint16_t input[] = { 0x0131 }; + static const uint16_t casefolded[] = { 0x0131 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded, SIZEOF (casefolded)) == 0); + } + { /* "topkapı" */ + static const uint16_t input[] = + { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 }; + static const uint16_t casefolded[] = + { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded, SIZEOF (casefolded)) == 0); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "heiß" */ + static const uint16_t input[] = { 0x0068, 0x0065, 0x0069, 0x00DF }; + static const uint16_t casefolded[] = { 0x0068, 0x0065, 0x0069, 0x0073, 0x0073 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + } + + /* Case mappings for some characters can depend on the surrounding characters. */ + { /* "περισσότερες πληροφορίες" */ + static const uint16_t input[] = + { + 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4, + 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7, + 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2 + }; + static const uint16_t casefolded[] = + { + 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4, + 0x03B5, 0x03C1, 0x03B5, 0x03C3, 0x0020, 0x03C0, 0x03BB, 0x03B7, + 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C3 + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + } + + /* Case mapping can require subsequent normalization. */ + { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */ + static const uint16_t input[] = { 0x01F0, 0x0323 }; + static const uint16_t casefolded[] = { 0x006A, 0x030C, 0x0323 }; + static const uint16_t casefolded_decomposed[] = { 0x006A, 0x0323, 0x030C }; + static const uint16_t casefolded_normalized[] = { 0x01F0, 0x0323 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFD, casefolded_decomposed, SIZEOF (casefolded_decomposed)) == 0); + ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFC, casefolded_normalized, SIZEOF (casefolded_normalized)) == 0); + } + + return 0; +} diff --git a/tests/unicase/test-u16-is-cased.c b/tests/unicase/test-u16-is-cased.c new file mode 100644 index 00000000..a4b9dc39 --- /dev/null +++ b/tests/unicase/test-u16-is-cased.c @@ -0,0 +1,51 @@ +/* Test of test whether case matters for an UTF-16 string. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint16_t +#include "test-is-cased.h" +#undef UNIT + +int +main () +{ + test_ascii (u16_is_cased); + + return 0; +} diff --git a/tests/unicase/test-u16-is-casefolded.c b/tests/unicase/test-u16-is-casefolded.c new file mode 100644 index 00000000..8c0122da --- /dev/null +++ b/tests/unicase/test-u16-is-casefolded.c @@ -0,0 +1,65 @@ +/* Test of test whether an UTF-16 string is already case-folded. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint16_t +#include "test-is-casefolded.h" +#undef UNIT + +static void +test_nonascii (int (*my_is) (const uint16_t *, size_t, const char *, bool *)) +{ + /* Test cases from Unicode 5.1.0. */ + { + static const uint16_t input[] = { 0x00DF }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } +} + +int +main () +{ + test_ascii (u16_is_casefolded); + test_nonascii (u16_is_casefolded); + + return 0; +} diff --git a/tests/unicase/test-u16-is-lowercase.c b/tests/unicase/test-u16-is-lowercase.c new file mode 100644 index 00000000..f7bea495 --- /dev/null +++ b/tests/unicase/test-u16-is-lowercase.c @@ -0,0 +1,93 @@ +/* Test of test whether an UTF-16 string is entirely lower case. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint16_t +#include "test-is-lowercase.h" +#undef UNIT + +static void +test_nonascii (int (*my_is) (const uint16_t *, size_t, const char *, bool *)) +{ + /* Test cases from Unicode 5.1.0. */ + { + static const uint16_t input[] = { 0x24D7 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint16_t input[] = { 0x24BD }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const uint16_t input[] = { 0x02B0 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint16_t input[] = { 0x1D34 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint16_t input[] = { 0x02BD }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } +} + +int +main () +{ + test_ascii (u16_is_lowercase); + test_nonascii (u16_is_lowercase); + + return 0; +} diff --git a/tests/unicase/test-u16-is-titlecase.c b/tests/unicase/test-u16-is-titlecase.c new file mode 100644 index 00000000..6b1c6d10 --- /dev/null +++ b/tests/unicase/test-u16-is-titlecase.c @@ -0,0 +1,51 @@ +/* Test of test whether an UTF-16 string is entirely title case. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint16_t +#include "test-is-titlecase.h" +#undef UNIT + +int +main () +{ + test_ascii (u16_is_titlecase); + + return 0; +} diff --git a/tests/unicase/test-u16-is-uppercase.c b/tests/unicase/test-u16-is-uppercase.c new file mode 100644 index 00000000..1acf7055 --- /dev/null +++ b/tests/unicase/test-u16-is-uppercase.c @@ -0,0 +1,93 @@ +/* Test of test whether an UTF-16 string is entirely upper case. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint16_t +#include "test-is-uppercase.h" +#undef UNIT + +static void +test_nonascii (int (*my_is) (const uint16_t *, size_t, const char *, bool *)) +{ + /* Test cases from Unicode 5.1.0. */ + { + static const uint16_t input[] = { 0x24D7 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const uint16_t input[] = { 0x24BD }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint16_t input[] = { 0x02B0 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint16_t input[] = { 0x1D34 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint16_t input[] = { 0x02BD }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } +} + +int +main () +{ + test_ascii (u16_is_uppercase); + test_nonascii (u16_is_uppercase); + + return 0; +} diff --git a/tests/unicase/test-u16-tolower.c b/tests/unicase/test-u16-tolower.c new file mode 100644 index 00000000..5ec764c4 --- /dev/null +++ b/tests/unicase/test-u16-tolower.c @@ -0,0 +1,251 @@ +/* Test of lowercase mapping for UTF-16 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +check (const uint16_t *input, size_t input_length, + const char *iso639_language, uninorm_t nf, + const uint16_t *expected, size_t expected_length) +{ + size_t length; + uint16_t *result; + + /* Test return conventions with resultbuf == NULL. */ + result = u16_tolower (input, input_length, iso639_language, nf, NULL, &length); + if (!(result != NULL)) + return 1; + if (!(length == expected_length)) + return 2; + if (!(u16_cmp (result, expected, expected_length) == 0)) + return 3; + free (result); + + /* Test return conventions with resultbuf too small. */ + if (expected_length > 0) + { + uint16_t *preallocated; + + length = expected_length - 1; + preallocated = (uint16_t *) malloc (length * sizeof (uint16_t)); + result = u16_tolower (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 4; + if (!(result != preallocated)) + return 5; + if (!(length == expected_length)) + return 6; + if (!(u16_cmp (result, expected, expected_length) == 0)) + return 7; + free (result); + free (preallocated); + } + + /* Test return conventions with resultbuf large enough. */ + { + uint16_t *preallocated; + + length = expected_length; + preallocated = (uint16_t *) malloc (length * sizeof (uint16_t)); + result = u16_tolower (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 8; + if (!(preallocated == NULL || result == preallocated)) + return 9; + if (!(length == expected_length)) + return 10; + if (!(u16_cmp (result, expected, expected_length) == 0)) + return 11; + free (preallocated); + } + + return 0; +} + +int +main () +{ + { /* Empty string. */ + ASSERT (check (NULL, 0, NULL, NULL, NULL, 0) == 0); + ASSERT (check (NULL, 0, NULL, UNINORM_NFC, NULL, 0) == 0); + } + + /* Simple string. */ + { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ + static const uint16_t input[] = + { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', + 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + static const uint16_t casemapped[] = + { 'g', 'r', 0x00FC, 0x00DF, ' ', 'g', 'o', 't', 't', '.', ' ', + 0x0437, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Turkish letters i İ ı I */ + { /* LATIN CAPITAL LETTER I */ + static const uint16_t input[] = { 0x0049 }; + static const uint16_t casemapped[] = { 0x0069 }; + static const uint16_t casemapped_tr[] = { 0x0131 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped_tr, SIZEOF (casemapped_tr)) == 0); + } + { /* LATIN SMALL LETTER I */ + static const uint16_t input[] = { 0x0069 }; + static const uint16_t casemapped[] = { 0x0069 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ + static const uint16_t input[] = { 0x0130 }; + static const uint16_t casemapped[] = { 0x0069, 0x0307 }; + static const uint16_t casemapped_tr[] = { 0x0069 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped_tr, SIZEOF (casemapped_tr)) == 0); + } + { /* LATIN SMALL LETTER DOTLESS I */ + static const uint16_t input[] = { 0x0131 }; + static const uint16_t casemapped[] = { 0x0131 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "TOPKAPI" */ + static const uint16_t input[] = + { 0x0054, 0x004F, 0x0050, 0x004B, 0x0041, 0x0050, 0x0049 }; + static const uint16_t casemapped[] = + { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 }; + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "HEIß" */ + static const uint16_t input[] = { 0x0048, 0x0045, 0x0049, 0x00DF }; + static const uint16_t casemapped[] = { 0x0068, 0x0065, 0x0069, 0x00DF }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mappings for some characters can depend on the surrounding characters. */ + { /* "ΠΕΡΙΣΣΌΤΕΡΕΣ ΠΛΗΡΟΦΟΡΊΕΣ" */ + static const uint16_t input[] = + { + 0x03A0, 0x0395, 0x03A1, 0x0399, 0x03A3, 0x03A3, 0x038C, 0x03A4, + 0x0395, 0x03A1, 0x0395, 0x03A3, 0x0020, 0x03A0, 0x039B, 0x0397, + 0x03A1, 0x039F, 0x03A6, 0x039F, 0x03A1, 0x038A, 0x0395, 0x03A3 + }; + static const uint16_t casemapped[] = + { + 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4, + 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7, + 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2 + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "Σ" -> "σ" */ + static const uint16_t input[] = { 0x03A3 }; + static const uint16_t casemapped[] = { 0x03C3 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ" -> "ας" */ + static const uint16_t input[] = { 0x0391, 0x03A3 }; + static const uint16_t casemapped[] = { 0x03B1, 0x03C2 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + /* It's a final sigma only if not followed by a case-ignorable sequence and + then a cased letter. Note that U+0345 and U+037A are simultaneously + case-ignorable and cased (which is a bit paradoxical). */ + { /* "ΑΣΑ" -> "ασα" */ + static const uint16_t input[] = { 0x0391, 0x03A3, 0x0391 }; + static const uint16_t casemapped[] = { 0x03B1, 0x03C3, 0x03B1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:" -> "ας:" */ + static const uint16_t input[] = { 0x0391, 0x03A3, 0x003A }; + static const uint16_t casemapped[] = { 0x03B1, 0x03C2, 0x003A }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:Α" -> "ασ:α" */ + static const uint16_t input[] = { 0x0391, 0x03A3, 0x003A, 0x0391 }; + static const uint16_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x03B1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:ͺ" -> "ασ:ͺ" */ + static const uint16_t input[] = { 0x0391, 0x03A3, 0x003A, 0x037A }; + static const uint16_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x037A }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:ͺ " -> "ασ:ͺ " */ + static const uint16_t input[] = { 0x0391, 0x03A3, 0x003A, 0x037A, 0x0020 }; + static const uint16_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x037A, 0x0020 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + /* It's a final sigma only if preceded by a case-ignorable sequence and + a cased letter before it. Note that U+0345 and U+037A are simultaneously + case-ignorable and cased (which is a bit paradoxical). */ + { /* ":Σ" -> ":σ" */ + static const uint16_t input[] = { 0x003A, 0x03A3 }; + static const uint16_t casemapped[] = { 0x003A, 0x03C3 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "Α:Σ" -> "α:ς" */ + static const uint16_t input[] = { 0x0391, 0x003A, 0x03A3 }; + static const uint16_t casemapped[] = { 0x03B1, 0x003A, 0x03C2 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ͺ:Σ" -> "ͺ:ς" */ + static const uint16_t input[] = { 0x037A, 0x003A, 0x03A3 }; + static const uint16_t casemapped[] = { 0x037A, 0x003A, 0x03C2 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* " ͺ:Σ" -> " ͺ:ς" */ + static const uint16_t input[] = { 0x0020, 0x037A, 0x003A, 0x03A3 }; + static const uint16_t casemapped[] = { 0x0020, 0x037A, 0x003A, 0x03C2 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + return 0; +} diff --git a/tests/unicase/test-u16-totitle.c b/tests/unicase/test-u16-totitle.c new file mode 100644 index 00000000..3839c907 --- /dev/null +++ b/tests/unicase/test-u16-totitle.c @@ -0,0 +1,211 @@ +/* Test of titlecase mapping for UTF-16 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +check (const uint16_t *input, size_t input_length, + const char *iso639_language, uninorm_t nf, + const uint16_t *expected, size_t expected_length) +{ + size_t length; + uint16_t *result; + + /* Test return conventions with resultbuf == NULL. */ + result = u16_totitle (input, input_length, iso639_language, nf, NULL, &length); + if (!(result != NULL)) + return 1; + if (!(length == expected_length)) + return 2; + if (!(u16_cmp (result, expected, expected_length) == 0)) + return 3; + free (result); + + /* Test return conventions with resultbuf too small. */ + if (expected_length > 0) + { + uint16_t *preallocated; + + length = expected_length - 1; + preallocated = (uint16_t *) malloc (length * sizeof (uint16_t)); + result = u16_totitle (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 4; + if (!(result != preallocated)) + return 5; + if (!(length == expected_length)) + return 6; + if (!(u16_cmp (result, expected, expected_length) == 0)) + return 7; + free (result); + free (preallocated); + } + + /* Test return conventions with resultbuf large enough. */ + { + uint16_t *preallocated; + + length = expected_length; + preallocated = (uint16_t *) malloc (length * sizeof (uint16_t)); + result = u16_totitle (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 8; + if (!(preallocated == NULL || result == preallocated)) + return 9; + if (!(length == expected_length)) + return 10; + if (!(u16_cmp (result, expected, expected_length) == 0)) + return 11; + free (preallocated); + } + + return 0; +} + +int +main () +{ + { /* Empty string. */ + ASSERT (check (NULL, 0, NULL, NULL, NULL, 0) == 0); + ASSERT (check (NULL, 0, NULL, UNINORM_NFC, NULL, 0) == 0); + } + + /* Simple string. */ + { /* "GRÜß GOTT. ЗДРАВСТВУЙТЕ! X=(-B±SQRT(B²-4AC))/(2A) 日本語,中文,한글" */ + static const uint16_t input[] = + { 'G', 'R', 0x00DC, 0x00DF, ' ', 'G', 'O', 'T', 'T', '.', ' ', + 0x0417, 0x0414, 0x0420, 0x0410, 0x0412, 0x0421, 0x0422, 0x0412, 0x0423, + 0x0419, 0x0422, 0x0415, '!', ' ', + 'X', '=', '(', '-', 'B', 0x00B1, 'S', 'Q', 'R', 'T', '(', 'B', 0x00B2, + '-', '4', 'A', 'C', ')', ')', '/', '(', '2', 'A', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + static const uint16_t casemapped[] = + { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', + 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'X', '=', '(', '-', 'B', 0x00B1, 'S', 'q', 'r', 't', '(', 'B', 0x00B2, + '-', '4', 'A', 'c', ')', ')', '/', '(', '2', 'A', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mapping can increase the number of Unicode characters. */ + { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */ + static const uint16_t input[] = { 0x0149 }; + static const uint16_t casemapped[] = { 0x02BC, 0x004E }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */ + static const uint16_t input[] = { 0x0390 }; + static const uint16_t casemapped[] = { 0x0399, 0x0308, 0x0301 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Turkish letters i İ ı I */ + { /* LATIN CAPITAL LETTER I */ + static const uint16_t input[] = { 0x0049 }; + static const uint16_t casemapped[] = { 0x0049 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* LATIN SMALL LETTER I */ + static const uint16_t input[] = { 0x0069 }; + static const uint16_t casemapped[] = { 0x0049 }; + static const uint16_t casemapped_tr[] = { 0x0130 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped_tr, SIZEOF (casemapped_tr)) == 0); + } + { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ + static const uint16_t input[] = { 0x0130 }; + static const uint16_t casemapped[] = { 0x0130 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* LATIN SMALL LETTER DOTLESS I */ + static const uint16_t input[] = { 0x0131 }; + static const uint16_t casemapped[] = { 0x0049 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "topkapı" */ + static const uint16_t input[] = + { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 }; + static const uint16_t casemapped[] = + { 0x0054, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "heiß" */ + static const uint16_t input[] = { 0x0068, 0x0065, 0x0069, 0x00DF }; + static const uint16_t casemapped[] = { 0x0048, 0x0065, 0x0069, 0x00DF }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mappings for some characters can depend on the surrounding characters. */ + { /* "περισσότερες πληροφορίες" */ + static const uint16_t input[] = + { + 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4, + 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7, + 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2 + }; + static const uint16_t casemapped[] = + { + 0x03A0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4, + 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03A0, 0x03BB, 0x03B7, + 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2 + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mapping can require subsequent normalization. */ + { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */ + static const uint16_t input[] = { 0x01F0, 0x0323 }; + static const uint16_t casemapped[] = { 0x004A, 0x030C, 0x0323 }; + static const uint16_t casemapped_normalized[] = { 0x004A, 0x0323, 0x030C }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFC, casemapped_normalized, SIZEOF (casemapped_normalized)) == 0); + } + + return 0; +} diff --git a/tests/unicase/test-u16-toupper.c b/tests/unicase/test-u16-toupper.c new file mode 100644 index 00000000..ed9fd6b1 --- /dev/null +++ b/tests/unicase/test-u16-toupper.c @@ -0,0 +1,211 @@ +/* Test of uppercase mapping for UTF-16 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +check (const uint16_t *input, size_t input_length, + const char *iso639_language, uninorm_t nf, + const uint16_t *expected, size_t expected_length) +{ + size_t length; + uint16_t *result; + + /* Test return conventions with resultbuf == NULL. */ + result = u16_toupper (input, input_length, iso639_language, nf, NULL, &length); + if (!(result != NULL)) + return 1; + if (!(length == expected_length)) + return 2; + if (!(u16_cmp (result, expected, expected_length) == 0)) + return 3; + free (result); + + /* Test return conventions with resultbuf too small. */ + if (expected_length > 0) + { + uint16_t *preallocated; + + length = expected_length - 1; + preallocated = (uint16_t *) malloc (length * sizeof (uint16_t)); + result = u16_toupper (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 4; + if (!(result != preallocated)) + return 5; + if (!(length == expected_length)) + return 6; + if (!(u16_cmp (result, expected, expected_length) == 0)) + return 7; + free (result); + free (preallocated); + } + + /* Test return conventions with resultbuf large enough. */ + { + uint16_t *preallocated; + + length = expected_length; + preallocated = (uint16_t *) malloc (length * sizeof (uint16_t)); + result = u16_toupper (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 8; + if (!(preallocated == NULL || result == preallocated)) + return 9; + if (!(length == expected_length)) + return 10; + if (!(u16_cmp (result, expected, expected_length) == 0)) + return 11; + free (preallocated); + } + + return 0; +} + +int +main () +{ + { /* Empty string. */ + ASSERT (check (NULL, 0, NULL, NULL, NULL, 0) == 0); + ASSERT (check (NULL, 0, NULL, UNINORM_NFC, NULL, 0) == 0); + } + + /* Simple string. */ + { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ + static const uint16_t input[] = + { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', + 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + static const uint16_t casemapped[] = + { 'G', 'R', 0x00DC, 0x0053, 0x0053, ' ', 'G', 'O', 'T', 'T', '.', ' ', + 0x0417, 0x0414, 0x0420, 0x0410, 0x0412, 0x0421, 0x0422, 0x0412, 0x0423, + 0x0419, 0x0422, 0x0415, '!', ' ', + 'X', '=', '(', '-', 'B', 0x00B1, 'S', 'Q', 'R', 'T', '(', 'B', 0x00B2, + '-', '4', 'A', 'C', ')', ')', '/', '(', '2', 'A', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mapping can increase the number of Unicode characters. */ + { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */ + static const uint16_t input[] = { 0x0149 }; + static const uint16_t casemapped[] = { 0x02BC, 0x004E }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */ + static const uint16_t input[] = { 0x0390 }; + static const uint16_t casemapped[] = { 0x0399, 0x0308, 0x0301 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Turkish letters i İ ı I */ + { /* LATIN CAPITAL LETTER I */ + static const uint16_t input[] = { 0x0049 }; + static const uint16_t casemapped[] = { 0x0049 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* LATIN SMALL LETTER I */ + static const uint16_t input[] = { 0x0069 }; + static const uint16_t casemapped[] = { 0x0049 }; + static const uint16_t casemapped_tr[] = { 0x0130 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped_tr, SIZEOF (casemapped_tr)) == 0); + } + { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ + static const uint16_t input[] = { 0x0130 }; + static const uint16_t casemapped[] = { 0x0130 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* LATIN SMALL LETTER DOTLESS I */ + static const uint16_t input[] = { 0x0131 }; + static const uint16_t casemapped[] = { 0x0049 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "topkapı" */ + static const uint16_t input[] = + { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 }; + static const uint16_t casemapped[] = + { 0x0054, 0x004F, 0x0050, 0x004B, 0x0041, 0x0050, 0x0049 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "heiß" */ + static const uint16_t input[] = { 0x0068, 0x0065, 0x0069, 0x00DF }; + static const uint16_t casemapped[] = { 0x0048, 0x0045, 0x0049, 0x0053, 0x0053 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mappings for some characters can depend on the surrounding characters. */ + { /* "περισσότερες πληροφορίες" */ + static const uint16_t input[] = + { + 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4, + 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7, + 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2 + }; + static const uint16_t casemapped[] = + { + 0x03A0, 0x0395, 0x03A1, 0x0399, 0x03A3, 0x03A3, 0x038C, 0x03A4, + 0x0395, 0x03A1, 0x0395, 0x03A3, 0x0020, 0x03A0, 0x039B, 0x0397, + 0x03A1, 0x039F, 0x03A6, 0x039F, 0x03A1, 0x038A, 0x0395, 0x03A3 + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mapping can require subsequent normalization. */ + { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */ + static const uint16_t input[] = { 0x01F0, 0x0323 }; + static const uint16_t casemapped[] = { 0x004A, 0x030C, 0x0323 }; + static const uint16_t casemapped_normalized[] = { 0x004A, 0x0323, 0x030C }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFC, casemapped_normalized, SIZEOF (casemapped_normalized)) == 0); + } + + return 0; +} diff --git a/tests/unicase/test-u32-casecmp.c b/tests/unicase/test-u32-casecmp.c new file mode 100644 index 00000000..8d58bc76 --- /dev/null +++ b/tests/unicase/test-u32-casecmp.c @@ -0,0 +1,336 @@ +/* Test of case and normalization insensitive comparison of UTF-32 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint32_t +#include "test-casecmp.h" +#undef UNIT + +static void +test_nonascii (int (*my_casecmp) (const uint32_t *, size_t, const uint32_t *, size_t, const char *, uninorm_t, int *)) +{ + /* Normalization effects. */ + { + static const uint32_t input1[] = { 'H', 0x00F6, 'h', 'l', 'e' }; + static const uint32_t input2[] = { 'H', 'O', 0x0308, 'h', 'L', 'e' }; + static const uint32_t input3[] = { 'H', 0x00F6, 'h', 'l', 'e', 'n' }; + static const uint32_t input4[] = { 'H', 'O', 0x0308, 'h', 'L', 'e', 'n' }; + static const uint32_t input5[] = { 'H', 'u', 'r', 'z' }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input1, SIZEOF (input1), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input3, SIZEOF (input3), input4, SIZEOF (input4), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input4, SIZEOF (input4), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input4, SIZEOF (input4), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input5, SIZEOF (input5), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input5, SIZEOF (input5), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + } + { /* LATIN CAPITAL LETTER A WITH DIAERESIS */ + static const uint32_t input1[] = { 0x00C4 }; + static const uint32_t input2[] = { 0x0041, 0x0308 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */ + static const uint32_t input1[] = { 0x01DE }; + static const uint32_t input2[] = { 0x0041, 0x0308, 0x0304 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* GREEK DIALYTIKA AND PERISPOMENI */ + static const uint32_t input1[] = { 0x1FC1 }; + static const uint32_t input2[] = { 0x00A8, 0x0342 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* HANGUL SYLLABLE GEUL */ + static const uint32_t input1[] = { 0xAE00 }; + static const uint32_t input2[] = { 0xADF8, 0x11AF }; + static const uint32_t input3[] = { 0x1100, 0x1173, 0x11AF }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* HANGUL SYLLABLE GEU */ + static const uint32_t input1[] = { 0xADF8 }; + static const uint32_t input2[] = { 0x1100, 0x1173 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Simple string. */ + { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ + static const uint32_t input1[] = + { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', + 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + static const uint32_t input2[] = + { 'g', 'r', 0x00FC, 0x0073, 0x0073, ' ', 'g', 'o', 't', 't', '.', ' ', + 0x0437, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + static const uint32_t input3[] = + { 'G', 'R', 0x00DC, 0x0053, 0x0053, ' ', 'G', 'O', 'T', 'T', '.', ' ', + 0x0417, 0x0414, 0x0420, 0x0410, 0x0412, 0x0421, 0x0422, 0x0412, 0x0423, + 0x0419, 0x0422, 0x0415, '!', ' ', + 'X', '=', '(', '-', 'B', 0x00B1, 'S', 'Q', 'R', 'T', '(', 'B', 0x00B2, + '-', '4', 'A', 'C', ')', ')', '/', '(', '2', 'A', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Case mapping can increase the number of Unicode characters. */ + { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */ + static const uint32_t input1[] = { 0x0149 }; + static const uint32_t input2[] = { 0x02BC, 0x006E }; + static const uint32_t input3[] = { 0x02BC, 0x004E }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */ + static const uint32_t input1[] = { 0x0390 }; + static const uint32_t input2[] = { 0x03B9, 0x0308, 0x0301 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Turkish letters i İ ı I */ + { /* LATIN CAPITAL LETTER I */ + static const uint32_t input[] = { 0x0049 }; + static const uint32_t casefolded[] = { 0x0069 }; + static const uint32_t casefolded_tr[] = { 0x0131 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* LATIN SMALL LETTER I */ + static const uint32_t input[] = { 0x0069 }; + static const uint32_t casefolded[] = { 0x0049 }; + static const uint32_t casefolded_tr[] = { 0x0130 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ + static const uint32_t input[] = { 0x0130 }; + static const uint32_t casefolded[] = { 0x0069, 0x0307 }; + static const uint32_t casefolded_tr[] = { 0x0069 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* LATIN SMALL LETTER DOTLESS I */ + static const uint32_t input[] = { 0x0131 }; + static const uint32_t casefolded[] = { 0x0049 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 1); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* "topkapı" */ + static const uint32_t input[] = + { 0x0054, 0x004F, 0x0050, 0x004B, 0x0041, 0x0050, 0x0049 }; + static const uint32_t casefolded[] = + { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "heiß" */ + static const uint32_t input1[] = { 0x0068, 0x0065, 0x0069, 0x00DF }; + static const uint32_t input2[] = { 0x0068, 0x0065, 0x0069, 0x0073, 0x0073 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Case mappings for some characters can depend on the surrounding characters. */ + { /* "περισσότερες πληροφορίες" */ + static const uint32_t input1[] = + { + 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4, + 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7, + 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2 + }; + static const uint32_t input2[] = + { + 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4, + 0x03B5, 0x03C1, 0x03B5, 0x03C3, 0x0020, 0x03C0, 0x03BB, 0x03B7, + 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C3 + }; + static const uint32_t input3[] = + { + 0x03A0, 0x0395, 0x03A1, 0x0399, 0x03A3, 0x03A3, 0x038C, 0x03A4, + 0x0395, 0x03A1, 0x0395, 0x03A3, 0x0020, 0x03A0, 0x039B, 0x0397, + 0x03A1, 0x039F, 0x03A6, 0x039F, 0x03A1, 0x038A, 0x0395, 0x03A3 + }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Case mapping can require subsequent normalization. */ + { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */ + static const uint32_t input[] = { 0x01F0, 0x0323 }; + static const uint32_t casefolded[] = { 0x006A, 0x030C, 0x0323 }; + static const uint32_t casefolded_decomposed[] = { 0x006A, 0x0323, 0x030C }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_decomposed, SIZEOF (casefolded_decomposed), NULL, NULL, &cmp) == 0); + ASSERT (cmp != 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_decomposed, SIZEOF (casefolded_decomposed), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } +} + +int +main () +{ + test_ascii (u32_casecmp, UNINORM_NFD); + test_nonascii (u32_casecmp); + + return 0; +} diff --git a/tests/unicase/test-u32-casecoll.c b/tests/unicase/test-u32-casecoll.c new file mode 100644 index 00000000..23edfbdf --- /dev/null +++ b/tests/unicase/test-u32-casecoll.c @@ -0,0 +1,54 @@ +/* Test of locale dependent, case and normalization insensitive comparison of + UTF-32 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint32_t +#include "test-casecmp.h" +#undef UNIT + +int +main () +{ + /* In the "C" locale, strcoll is equivalent to strcmp, therefore u8_casecoll + on ASCII strings should behave like strcasecmp. */ + test_ascii (u32_casecoll, UNINORM_NFC); + + return 0; +} diff --git a/tests/unicase/test-u32-casefold.c b/tests/unicase/test-u32-casefold.c new file mode 100644 index 00000000..d48e8dc8 --- /dev/null +++ b/tests/unicase/test-u32-casefold.c @@ -0,0 +1,214 @@ +/* Test of casefolding mapping for UTF-32 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +check (const uint32_t *input, size_t input_length, + const char *iso639_language, uninorm_t nf, + const uint32_t *expected, size_t expected_length) +{ + size_t length; + uint32_t *result; + + /* Test return conventions with resultbuf == NULL. */ + result = u32_casefold (input, input_length, iso639_language, nf, NULL, &length); + if (!(result != NULL)) + return 1; + if (!(length == expected_length)) + return 2; + if (!(u32_cmp (result, expected, expected_length) == 0)) + return 3; + free (result); + + /* Test return conventions with resultbuf too small. */ + if (expected_length > 0) + { + uint32_t *preallocated; + + length = expected_length - 1; + preallocated = (uint32_t *) malloc (length * sizeof (uint32_t)); + result = u32_casefold (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 4; + if (!(result != preallocated)) + return 5; + if (!(length == expected_length)) + return 6; + if (!(u32_cmp (result, expected, expected_length) == 0)) + return 7; + free (result); + free (preallocated); + } + + /* Test return conventions with resultbuf large enough. */ + { + uint32_t *preallocated; + + length = expected_length; + preallocated = (uint32_t *) malloc (length * sizeof (uint32_t)); + result = u32_casefold (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 8; + if (!(preallocated == NULL || result == preallocated)) + return 9; + if (!(length == expected_length)) + return 10; + if (!(u32_cmp (result, expected, expected_length) == 0)) + return 11; + free (preallocated); + } + + return 0; +} + +int +main () +{ + { /* Empty string. */ + ASSERT (check (NULL, 0, NULL, NULL, NULL, 0) == 0); + ASSERT (check (NULL, 0, NULL, UNINORM_NFC, NULL, 0) == 0); + } + + /* Simple string. */ + { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ + static const uint32_t input[] = + { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', + 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + static const uint32_t casefolded[] = + { 'g', 'r', 0x00FC, 0x0073, 0x0073, ' ', 'g', 'o', 't', 't', '.', ' ', + 0x0437, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + } + + /* Case mapping can increase the number of Unicode characters. */ + { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */ + static const uint32_t input[] = { 0x0149 }; + static const uint32_t casefolded[] = { 0x02BC, 0x006E }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + } + { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */ + static const uint32_t input[] = { 0x0390 }; + static const uint32_t casefolded[] = { 0x03B9, 0x0308, 0x0301 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + } + + /* Turkish letters i İ ı I */ + { /* LATIN CAPITAL LETTER I */ + static const uint32_t input[] = { 0x0049 }; + static const uint32_t casefolded[] = { 0x0069 }; + static const uint32_t casefolded_tr[] = { 0x0131 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded_tr, SIZEOF (casefolded_tr)) == 0); + } + { /* LATIN SMALL LETTER I */ + static const uint32_t input[] = { 0x0069 }; + static const uint32_t casefolded[] = { 0x0069 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded, SIZEOF (casefolded)) == 0); + } + { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ + static const uint32_t input[] = { 0x0130 }; + static const uint32_t casefolded[] = { 0x0069, 0x0307 }; + static const uint32_t casefolded_tr[] = { 0x0069 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded_tr, SIZEOF (casefolded_tr)) == 0); + } + { /* LATIN SMALL LETTER DOTLESS I */ + static const uint32_t input[] = { 0x0131 }; + static const uint32_t casefolded[] = { 0x0131 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded, SIZEOF (casefolded)) == 0); + } + { /* "topkapı" */ + static const uint32_t input[] = + { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 }; + static const uint32_t casefolded[] = + { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded, SIZEOF (casefolded)) == 0); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "heiß" */ + static const uint32_t input[] = { 0x0068, 0x0065, 0x0069, 0x00DF }; + static const uint32_t casefolded[] = { 0x0068, 0x0065, 0x0069, 0x0073, 0x0073 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + } + + /* Case mappings for some characters can depend on the surrounding characters. */ + { /* "περισσότερες πληροφορίες" */ + static const uint32_t input[] = + { + 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4, + 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7, + 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2 + }; + static const uint32_t casefolded[] = + { + 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4, + 0x03B5, 0x03C1, 0x03B5, 0x03C3, 0x0020, 0x03C0, 0x03BB, 0x03B7, + 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C3 + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + } + + /* Case mapping can require subsequent normalization. */ + { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */ + static const uint32_t input[] = { 0x01F0, 0x0323 }; + static const uint32_t casefolded[] = { 0x006A, 0x030C, 0x0323 }; + static const uint32_t casefolded_decomposed[] = { 0x006A, 0x0323, 0x030C }; + static const uint32_t casefolded_normalized[] = { 0x01F0, 0x0323 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFD, casefolded_decomposed, SIZEOF (casefolded_decomposed)) == 0); + ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFC, casefolded_normalized, SIZEOF (casefolded_normalized)) == 0); + } + + return 0; +} diff --git a/tests/unicase/test-u32-is-cased.c b/tests/unicase/test-u32-is-cased.c new file mode 100644 index 00000000..d8353fc4 --- /dev/null +++ b/tests/unicase/test-u32-is-cased.c @@ -0,0 +1,51 @@ +/* Test of test whether case matters for an UTF-32 string. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint32_t +#include "test-is-cased.h" +#undef UNIT + +int +main () +{ + test_ascii (u32_is_cased); + + return 0; +} diff --git a/tests/unicase/test-u32-is-casefolded.c b/tests/unicase/test-u32-is-casefolded.c new file mode 100644 index 00000000..d79241d7 --- /dev/null +++ b/tests/unicase/test-u32-is-casefolded.c @@ -0,0 +1,65 @@ +/* Test of test whether an UTF-32 string is already case-folded. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint32_t +#include "test-is-casefolded.h" +#undef UNIT + +static void +test_nonascii (int (*my_is) (const uint32_t *, size_t, const char *, bool *)) +{ + /* Test cases from Unicode 5.1.0. */ + { + static const uint32_t input[] = { 0x00DF }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } +} + +int +main () +{ + test_ascii (u32_is_casefolded); + test_nonascii (u32_is_casefolded); + + return 0; +} diff --git a/tests/unicase/test-u32-is-lowercase.c b/tests/unicase/test-u32-is-lowercase.c new file mode 100644 index 00000000..2cff3925 --- /dev/null +++ b/tests/unicase/test-u32-is-lowercase.c @@ -0,0 +1,93 @@ +/* Test of test whether an UTF-32 string is entirely lower case. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint32_t +#include "test-is-lowercase.h" +#undef UNIT + +static void +test_nonascii (int (*my_is) (const uint32_t *, size_t, const char *, bool *)) +{ + /* Test cases from Unicode 5.1.0. */ + { + static const uint32_t input[] = { 0x24D7 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint32_t input[] = { 0x24BD }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const uint32_t input[] = { 0x02B0 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint32_t input[] = { 0x1D34 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint32_t input[] = { 0x02BD }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } +} + +int +main () +{ + test_ascii (u32_is_lowercase); + test_nonascii (u32_is_lowercase); + + return 0; +} diff --git a/tests/unicase/test-u32-is-titlecase.c b/tests/unicase/test-u32-is-titlecase.c new file mode 100644 index 00000000..2c287e98 --- /dev/null +++ b/tests/unicase/test-u32-is-titlecase.c @@ -0,0 +1,51 @@ +/* Test of test whether an UTF-32 string is entirely title case. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint32_t +#include "test-is-titlecase.h" +#undef UNIT + +int +main () +{ + test_ascii (u32_is_titlecase); + + return 0; +} diff --git a/tests/unicase/test-u32-is-uppercase.c b/tests/unicase/test-u32-is-uppercase.c new file mode 100644 index 00000000..0044ec1d --- /dev/null +++ b/tests/unicase/test-u32-is-uppercase.c @@ -0,0 +1,93 @@ +/* Test of test whether an UTF-32 string is entirely upper case. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint32_t +#include "test-is-uppercase.h" +#undef UNIT + +static void +test_nonascii (int (*my_is) (const uint32_t *, size_t, const char *, bool *)) +{ + /* Test cases from Unicode 5.1.0. */ + { + static const uint32_t input[] = { 0x24D7 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const uint32_t input[] = { 0x24BD }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint32_t input[] = { 0x02B0 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint32_t input[] = { 0x1D34 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint32_t input[] = { 0x02BD }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } +} + +int +main () +{ + test_ascii (u32_is_uppercase); + test_nonascii (u32_is_uppercase); + + return 0; +} diff --git a/tests/unicase/test-u32-tolower.c b/tests/unicase/test-u32-tolower.c new file mode 100644 index 00000000..7f348da4 --- /dev/null +++ b/tests/unicase/test-u32-tolower.c @@ -0,0 +1,251 @@ +/* Test of lowercase mapping for UTF-32 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +check (const uint32_t *input, size_t input_length, + const char *iso639_language, uninorm_t nf, + const uint32_t *expected, size_t expected_length) +{ + size_t length; + uint32_t *result; + + /* Test return conventions with resultbuf == NULL. */ + result = u32_tolower (input, input_length, iso639_language, nf, NULL, &length); + if (!(result != NULL)) + return 1; + if (!(length == expected_length)) + return 2; + if (!(u32_cmp (result, expected, expected_length) == 0)) + return 3; + free (result); + + /* Test return conventions with resultbuf too small. */ + if (expected_length > 0) + { + uint32_t *preallocated; + + length = expected_length - 1; + preallocated = (uint32_t *) malloc (length * sizeof (uint32_t)); + result = u32_tolower (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 4; + if (!(result != preallocated)) + return 5; + if (!(length == expected_length)) + return 6; + if (!(u32_cmp (result, expected, expected_length) == 0)) + return 7; + free (result); + free (preallocated); + } + + /* Test return conventions with resultbuf large enough. */ + { + uint32_t *preallocated; + + length = expected_length; + preallocated = (uint32_t *) malloc (length * sizeof (uint32_t)); + result = u32_tolower (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 8; + if (!(preallocated == NULL || result == preallocated)) + return 9; + if (!(length == expected_length)) + return 10; + if (!(u32_cmp (result, expected, expected_length) == 0)) + return 11; + free (preallocated); + } + + return 0; +} + +int +main () +{ + { /* Empty string. */ + ASSERT (check (NULL, 0, NULL, NULL, NULL, 0) == 0); + ASSERT (check (NULL, 0, NULL, UNINORM_NFC, NULL, 0) == 0); + } + + /* Simple string. */ + { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ + static const uint32_t input[] = + { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', + 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + static const uint32_t casemapped[] = + { 'g', 'r', 0x00FC, 0x00DF, ' ', 'g', 'o', 't', 't', '.', ' ', + 0x0437, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Turkish letters i İ ı I */ + { /* LATIN CAPITAL LETTER I */ + static const uint32_t input[] = { 0x0049 }; + static const uint32_t casemapped[] = { 0x0069 }; + static const uint32_t casemapped_tr[] = { 0x0131 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped_tr, SIZEOF (casemapped_tr)) == 0); + } + { /* LATIN SMALL LETTER I */ + static const uint32_t input[] = { 0x0069 }; + static const uint32_t casemapped[] = { 0x0069 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ + static const uint32_t input[] = { 0x0130 }; + static const uint32_t casemapped[] = { 0x0069, 0x0307 }; + static const uint32_t casemapped_tr[] = { 0x0069 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped_tr, SIZEOF (casemapped_tr)) == 0); + } + { /* LATIN SMALL LETTER DOTLESS I */ + static const uint32_t input[] = { 0x0131 }; + static const uint32_t casemapped[] = { 0x0131 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "TOPKAPI" */ + static const uint32_t input[] = + { 0x0054, 0x004F, 0x0050, 0x004B, 0x0041, 0x0050, 0x0049 }; + static const uint32_t casemapped[] = + { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 }; + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "HEIß" */ + static const uint32_t input[] = { 0x0048, 0x0045, 0x0049, 0x00DF }; + static const uint32_t casemapped[] = { 0x0068, 0x0065, 0x0069, 0x00DF }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mappings for some characters can depend on the surrounding characters. */ + { /* "ΠΕΡΙΣΣΌΤΕΡΕΣ ΠΛΗΡΟΦΟΡΊΕΣ" */ + static const uint32_t input[] = + { + 0x03A0, 0x0395, 0x03A1, 0x0399, 0x03A3, 0x03A3, 0x038C, 0x03A4, + 0x0395, 0x03A1, 0x0395, 0x03A3, 0x0020, 0x03A0, 0x039B, 0x0397, + 0x03A1, 0x039F, 0x03A6, 0x039F, 0x03A1, 0x038A, 0x0395, 0x03A3 + }; + static const uint32_t casemapped[] = + { + 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4, + 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7, + 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2 + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "Σ" -> "σ" */ + static const uint32_t input[] = { 0x03A3 }; + static const uint32_t casemapped[] = { 0x03C3 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ" -> "ας" */ + static const uint32_t input[] = { 0x0391, 0x03A3 }; + static const uint32_t casemapped[] = { 0x03B1, 0x03C2 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + /* It's a final sigma only if not followed by a case-ignorable sequence and + then a cased letter. Note that U+0345 and U+037A are simultaneously + case-ignorable and cased (which is a bit paradoxical). */ + { /* "ΑΣΑ" -> "ασα" */ + static const uint32_t input[] = { 0x0391, 0x03A3, 0x0391 }; + static const uint32_t casemapped[] = { 0x03B1, 0x03C3, 0x03B1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:" -> "ας:" */ + static const uint32_t input[] = { 0x0391, 0x03A3, 0x003A }; + static const uint32_t casemapped[] = { 0x03B1, 0x03C2, 0x003A }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:Α" -> "ασ:α" */ + static const uint32_t input[] = { 0x0391, 0x03A3, 0x003A, 0x0391 }; + static const uint32_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x03B1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:ͺ" -> "ασ:ͺ" */ + static const uint32_t input[] = { 0x0391, 0x03A3, 0x003A, 0x037A }; + static const uint32_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x037A }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:ͺ " -> "ασ:ͺ " */ + static const uint32_t input[] = { 0x0391, 0x03A3, 0x003A, 0x037A, 0x0020 }; + static const uint32_t casemapped[] = { 0x03B1, 0x03C3, 0x003A, 0x037A, 0x0020 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + /* It's a final sigma only if preceded by a case-ignorable sequence and + a cased letter before it. Note that U+0345 and U+037A are simultaneously + case-ignorable and cased (which is a bit paradoxical). */ + { /* ":Σ" -> ":σ" */ + static const uint32_t input[] = { 0x003A, 0x03A3 }; + static const uint32_t casemapped[] = { 0x003A, 0x03C3 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "Α:Σ" -> "α:ς" */ + static const uint32_t input[] = { 0x0391, 0x003A, 0x03A3 }; + static const uint32_t casemapped[] = { 0x03B1, 0x003A, 0x03C2 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ͺ:Σ" -> "ͺ:ς" */ + static const uint32_t input[] = { 0x037A, 0x003A, 0x03A3 }; + static const uint32_t casemapped[] = { 0x037A, 0x003A, 0x03C2 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* " ͺ:Σ" -> " ͺ:ς" */ + static const uint32_t input[] = { 0x0020, 0x037A, 0x003A, 0x03A3 }; + static const uint32_t casemapped[] = { 0x0020, 0x037A, 0x003A, 0x03C2 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + return 0; +} diff --git a/tests/unicase/test-u32-totitle.c b/tests/unicase/test-u32-totitle.c new file mode 100644 index 00000000..82c4b0f1 --- /dev/null +++ b/tests/unicase/test-u32-totitle.c @@ -0,0 +1,211 @@ +/* Test of titlecase mapping for UTF-32 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +check (const uint32_t *input, size_t input_length, + const char *iso639_language, uninorm_t nf, + const uint32_t *expected, size_t expected_length) +{ + size_t length; + uint32_t *result; + + /* Test return conventions with resultbuf == NULL. */ + result = u32_totitle (input, input_length, iso639_language, nf, NULL, &length); + if (!(result != NULL)) + return 1; + if (!(length == expected_length)) + return 2; + if (!(u32_cmp (result, expected, expected_length) == 0)) + return 3; + free (result); + + /* Test return conventions with resultbuf too small. */ + if (expected_length > 0) + { + uint32_t *preallocated; + + length = expected_length - 1; + preallocated = (uint32_t *) malloc (length * sizeof (uint32_t)); + result = u32_totitle (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 4; + if (!(result != preallocated)) + return 5; + if (!(length == expected_length)) + return 6; + if (!(u32_cmp (result, expected, expected_length) == 0)) + return 7; + free (result); + free (preallocated); + } + + /* Test return conventions with resultbuf large enough. */ + { + uint32_t *preallocated; + + length = expected_length; + preallocated = (uint32_t *) malloc (length * sizeof (uint32_t)); + result = u32_totitle (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 8; + if (!(preallocated == NULL || result == preallocated)) + return 9; + if (!(length == expected_length)) + return 10; + if (!(u32_cmp (result, expected, expected_length) == 0)) + return 11; + free (preallocated); + } + + return 0; +} + +int +main () +{ + { /* Empty string. */ + ASSERT (check (NULL, 0, NULL, NULL, NULL, 0) == 0); + ASSERT (check (NULL, 0, NULL, UNINORM_NFC, NULL, 0) == 0); + } + + /* Simple string. */ + { /* "GRÜß GOTT. ЗДРАВСТВУЙТЕ! X=(-B±SQRT(B²-4AC))/(2A) 日本語,中文,한글" */ + static const uint32_t input[] = + { 'G', 'R', 0x00DC, 0x00DF, ' ', 'G', 'O', 'T', 'T', '.', ' ', + 0x0417, 0x0414, 0x0420, 0x0410, 0x0412, 0x0421, 0x0422, 0x0412, 0x0423, + 0x0419, 0x0422, 0x0415, '!', ' ', + 'X', '=', '(', '-', 'B', 0x00B1, 'S', 'Q', 'R', 'T', '(', 'B', 0x00B2, + '-', '4', 'A', 'C', ')', ')', '/', '(', '2', 'A', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + static const uint32_t casemapped[] = + { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', + 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'X', '=', '(', '-', 'B', 0x00B1, 'S', 'q', 'r', 't', '(', 'B', 0x00B2, + '-', '4', 'A', 'c', ')', ')', '/', '(', '2', 'A', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mapping can increase the number of Unicode characters. */ + { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */ + static const uint32_t input[] = { 0x0149 }; + static const uint32_t casemapped[] = { 0x02BC, 0x004E }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */ + static const uint32_t input[] = { 0x0390 }; + static const uint32_t casemapped[] = { 0x0399, 0x0308, 0x0301 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Turkish letters i İ ı I */ + { /* LATIN CAPITAL LETTER I */ + static const uint32_t input[] = { 0x0049 }; + static const uint32_t casemapped[] = { 0x0049 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* LATIN SMALL LETTER I */ + static const uint32_t input[] = { 0x0069 }; + static const uint32_t casemapped[] = { 0x0049 }; + static const uint32_t casemapped_tr[] = { 0x0130 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped_tr, SIZEOF (casemapped_tr)) == 0); + } + { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ + static const uint32_t input[] = { 0x0130 }; + static const uint32_t casemapped[] = { 0x0130 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* LATIN SMALL LETTER DOTLESS I */ + static const uint32_t input[] = { 0x0131 }; + static const uint32_t casemapped[] = { 0x0049 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "topkapı" */ + static const uint32_t input[] = + { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 }; + static const uint32_t casemapped[] = + { 0x0054, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "heiß" */ + static const uint32_t input[] = { 0x0068, 0x0065, 0x0069, 0x00DF }; + static const uint32_t casemapped[] = { 0x0048, 0x0065, 0x0069, 0x00DF }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mappings for some characters can depend on the surrounding characters. */ + { /* "περισσότερες πληροφορίες" */ + static const uint32_t input[] = + { + 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4, + 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7, + 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2 + }; + static const uint32_t casemapped[] = + { + 0x03A0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4, + 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03A0, 0x03BB, 0x03B7, + 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2 + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mapping can require subsequent normalization. */ + { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */ + static const uint32_t input[] = { 0x01F0, 0x0323 }; + static const uint32_t casemapped[] = { 0x004A, 0x030C, 0x0323 }; + static const uint32_t casemapped_normalized[] = { 0x004A, 0x0323, 0x030C }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFC, casemapped_normalized, SIZEOF (casemapped_normalized)) == 0); + } + + return 0; +} diff --git a/tests/unicase/test-u32-toupper.c b/tests/unicase/test-u32-toupper.c new file mode 100644 index 00000000..dbc1619b --- /dev/null +++ b/tests/unicase/test-u32-toupper.c @@ -0,0 +1,211 @@ +/* Test of uppercase mapping for UTF-32 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +check (const uint32_t *input, size_t input_length, + const char *iso639_language, uninorm_t nf, + const uint32_t *expected, size_t expected_length) +{ + size_t length; + uint32_t *result; + + /* Test return conventions with resultbuf == NULL. */ + result = u32_toupper (input, input_length, iso639_language, nf, NULL, &length); + if (!(result != NULL)) + return 1; + if (!(length == expected_length)) + return 2; + if (!(u32_cmp (result, expected, expected_length) == 0)) + return 3; + free (result); + + /* Test return conventions with resultbuf too small. */ + if (expected_length > 0) + { + uint32_t *preallocated; + + length = expected_length - 1; + preallocated = (uint32_t *) malloc (length * sizeof (uint32_t)); + result = u32_toupper (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 4; + if (!(result != preallocated)) + return 5; + if (!(length == expected_length)) + return 6; + if (!(u32_cmp (result, expected, expected_length) == 0)) + return 7; + free (result); + free (preallocated); + } + + /* Test return conventions with resultbuf large enough. */ + { + uint32_t *preallocated; + + length = expected_length; + preallocated = (uint32_t *) malloc (length * sizeof (uint32_t)); + result = u32_toupper (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 8; + if (!(preallocated == NULL || result == preallocated)) + return 9; + if (!(length == expected_length)) + return 10; + if (!(u32_cmp (result, expected, expected_length) == 0)) + return 11; + free (preallocated); + } + + return 0; +} + +int +main () +{ + { /* Empty string. */ + ASSERT (check (NULL, 0, NULL, NULL, NULL, 0) == 0); + ASSERT (check (NULL, 0, NULL, UNINORM_NFC, NULL, 0) == 0); + } + + /* Simple string. */ + { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ + static const uint32_t input[] = + { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', + 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, + 0x0439, 0x0442, 0x0435, '!', ' ', + 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, + '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + static const uint32_t casemapped[] = + { 'G', 'R', 0x00DC, 0x0053, 0x0053, ' ', 'G', 'O', 'T', 'T', '.', ' ', + 0x0417, 0x0414, 0x0420, 0x0410, 0x0412, 0x0421, 0x0422, 0x0412, 0x0423, + 0x0419, 0x0422, 0x0415, '!', ' ', + 'X', '=', '(', '-', 'B', 0x00B1, 'S', 'Q', 'R', 'T', '(', 'B', 0x00B2, + '-', '4', 'A', 'C', ')', ')', '/', '(', '2', 'A', ')', ' ', ' ', + 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mapping can increase the number of Unicode characters. */ + { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */ + static const uint32_t input[] = { 0x0149 }; + static const uint32_t casemapped[] = { 0x02BC, 0x004E }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */ + static const uint32_t input[] = { 0x0390 }; + static const uint32_t casemapped[] = { 0x0399, 0x0308, 0x0301 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Turkish letters i İ ı I */ + { /* LATIN CAPITAL LETTER I */ + static const uint32_t input[] = { 0x0049 }; + static const uint32_t casemapped[] = { 0x0049 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* LATIN SMALL LETTER I */ + static const uint32_t input[] = { 0x0069 }; + static const uint32_t casemapped[] = { 0x0049 }; + static const uint32_t casemapped_tr[] = { 0x0130 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped_tr, SIZEOF (casemapped_tr)) == 0); + } + { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ + static const uint32_t input[] = { 0x0130 }; + static const uint32_t casemapped[] = { 0x0130 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* LATIN SMALL LETTER DOTLESS I */ + static const uint32_t input[] = { 0x0131 }; + static const uint32_t casemapped[] = { 0x0049 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "topkapı" */ + static const uint32_t input[] = + { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 }; + static const uint32_t casemapped[] = + { 0x0054, 0x004F, 0x0050, 0x004B, 0x0041, 0x0050, 0x0049 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "heiß" */ + static const uint32_t input[] = { 0x0068, 0x0065, 0x0069, 0x00DF }; + static const uint32_t casemapped[] = { 0x0048, 0x0045, 0x0049, 0x0053, 0x0053 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mappings for some characters can depend on the surrounding characters. */ + { /* "περισσότερες πληροφορίες" */ + static const uint32_t input[] = + { + 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4, + 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7, + 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2 + }; + static const uint32_t casemapped[] = + { + 0x03A0, 0x0395, 0x03A1, 0x0399, 0x03A3, 0x03A3, 0x038C, 0x03A4, + 0x0395, 0x03A1, 0x0395, 0x03A3, 0x0020, 0x03A0, 0x039B, 0x0397, + 0x03A1, 0x039F, 0x03A6, 0x039F, 0x03A1, 0x038A, 0x0395, 0x03A3 + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mapping can require subsequent normalization. */ + { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */ + static const uint32_t input[] = { 0x01F0, 0x0323 }; + static const uint32_t casemapped[] = { 0x004A, 0x030C, 0x0323 }; + static const uint32_t casemapped_normalized[] = { 0x004A, 0x0323, 0x030C }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFC, casemapped_normalized, SIZEOF (casemapped_normalized)) == 0); + } + + return 0; +} diff --git a/tests/unicase/test-u8-casecmp.c b/tests/unicase/test-u8-casecmp.c new file mode 100644 index 00000000..dbcc9d76 --- /dev/null +++ b/tests/unicase/test-u8-casecmp.c @@ -0,0 +1,345 @@ +/* Test of case and normalization insensitive comparison of UTF-8 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint8_t +#include "test-casecmp.h" +#undef UNIT + +static void +test_nonascii (int (*my_casecmp) (const uint8_t *, size_t, const uint8_t *, size_t, const char *, uninorm_t, int *)) +{ + /* Normalization effects. */ + { + static const uint8_t input1[] = { 'H', 0xC3, 0xB6, 'h', 'l', 'e' }; + static const uint8_t input2[] = { 'H', 'O', 0xCC, 0x88, 'h', 'L', 'e' }; + static const uint8_t input3[] = { 'H', 0xC3, 0xB6, 'h', 'l', 'e', 'n' }; + static const uint8_t input4[] = { 'H', 'O', 0xCC, 0x88, 'h', 'L', 'e', 'n' }; + static const uint8_t input5[] = { 'H', 'u', 'r', 'z' }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input1, SIZEOF (input1), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input3, SIZEOF (input3), input4, SIZEOF (input4), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input4, SIZEOF (input4), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input4, SIZEOF (input4), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input5, SIZEOF (input5), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input5, SIZEOF (input5), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + } + { /* LATIN CAPITAL LETTER A WITH DIAERESIS */ + static const uint8_t input1[] = { 0xC3, 0x84 }; + static const uint8_t input2[] = { 0x41, 0xCC, 0x88 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */ + static const uint8_t input1[] = { 0xC7, 0x9E }; + static const uint8_t input2[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* GREEK DIALYTIKA AND PERISPOMENI */ + static const uint8_t input1[] = { 0xE1, 0xBF, 0x81 }; + static const uint8_t input2[] = { 0xC2, 0xA8, 0xCD, 0x82 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* HANGUL SYLLABLE GEUL */ + static const uint8_t input1[] = { 0xEA, 0xB8, 0x80 }; + static const uint8_t input2[] = { 0xEA, 0xB7, 0xB8, 0xE1, 0x86, 0xAF }; + static const uint8_t input3[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* HANGUL SYLLABLE GEU */ + static const uint8_t input1[] = { 0xEA, 0xB7, 0xB8 }; + static const uint8_t input2[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Simple string. */ + { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ + static const uint8_t input1[] = + { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.', ' ', + 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, 0x81, + 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5, + '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, 's', 'q', 'r', 't', '(', + 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', + ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E, ',', + 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', + 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80, '\n' + }; + static const uint8_t input2[] = + { 'g', 'r', 0xC3, 0xBC, 0x73, 0x73, ' ', 'g', 'o', 't', 't', '.', ' ', + 0xD0, 0xB7, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, 0x81, + 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5, + '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, 's', 'q', 'r', 't', '(', + 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', + ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E, ',', + 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', + 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80, '\n' + }; + static const uint8_t input3[] = + { 'G', 'R', 0xC3, 0x9C, 0x53, 0x53, ' ', 'G', 'O', 'T', 'T', '.', ' ', + 0xD0, 0x97, 0xD0, 0x94, 0xD0, 0xA0, 0xD0, 0x90, 0xD0, 0x92, 0xD0, 0xA1, + 0xD0, 0xA2, 0xD0, 0x92, 0xD0, 0xA3, 0xD0, 0x99, 0xD0, 0xA2, 0xD0, 0x95, + '!', ' ', 'X', '=', '(', '-', 'B', 0xC2, 0xB1, 'S', 'Q', 'R', 'T', '(', + 'B', 0xC2, 0xB2, '-', '4', 'A', 'C', ')', ')', '/', '(', '2', 'A', ')', + ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E, ',', + 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', + 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80, '\n' + }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Case mapping can increase the number of Unicode characters. */ + { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */ + static const uint8_t input1[] = { 0xC5, 0x89 }; + static const uint8_t input2[] = { 0xCA, 0xBC, 0x6E }; + static const uint8_t input3[] = { 0xCA, 0xBC, 0x4E }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */ + static const uint8_t input1[] = { 0xCE, 0x90 }; + static const uint8_t input2[] = { 0xCE, 0xB9, 0xCC, 0x88, 0xCC, 0x81 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Turkish letters i İ ı I */ + { /* LATIN CAPITAL LETTER I */ + static const uint8_t input[] = { 0x49 }; + static const uint8_t casefolded[] = { 0x69 }; + static const uint8_t casefolded_tr[] = { 0xC4, 0xB1 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* LATIN SMALL LETTER I */ + static const uint8_t input[] = { 0x69 }; + static const uint8_t casefolded[] = { 0x49 }; + static const uint8_t casefolded_tr[] = { 0xC4, 0xB0 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ + static const uint8_t input[] = { 0xC4, 0xB0 }; + static const uint8_t casefolded[] = { 0x69, 0xCC, 0x87 }; + static const uint8_t casefolded_tr[] = { 0x69 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* LATIN SMALL LETTER DOTLESS I */ + static const uint8_t input[] = { 0xC4, 0xB1 }; + static const uint8_t casefolded[] = { 0x49 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 1); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* "topkapı" */ + static const uint8_t input[] = + { 0x54, 0x4F, 0x50, 0x4B, 0x41, 0x50, 0x49 }; + static const uint8_t casefolded[] = + { 0x74, 0x6F, 0x70, 0x6B, 0x61, 0x70, 0xC4, 0xB1 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "heiß" */ + static const uint8_t input1[] = { 0x68, 0x65, 0x69, 0xC3, 0x9F }; + static const uint8_t input2[] = { 0x68, 0x65, 0x69, 0x73, 0x73 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Case mappings for some characters can depend on the surrounding characters. */ + { /* "περισσότερες πληροφορίες" */ + static const uint8_t input1[] = + { + 0xCF, 0x80, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB9, 0xCF, 0x83, 0xCF, 0x83, + 0xCF, 0x8C, 0xCF, 0x84, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB5, 0xCF, 0x82, + ' ', 0xCF, 0x80, 0xCE, 0xBB, 0xCE, 0xB7, 0xCF, 0x81, 0xCE, 0xBF, + 0xCF, 0x86, 0xCE, 0xBF, 0xCF, 0x81, 0xCE, 0xAF, 0xCE, 0xB5, 0xCF, 0x82 + }; + static const uint8_t input2[] = + { + 0xCF, 0x80, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB9, 0xCF, 0x83, 0xCF, 0x83, + 0xCF, 0x8C, 0xCF, 0x84, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB5, 0xCF, 0x83, + ' ', 0xCF, 0x80, 0xCE, 0xBB, 0xCE, 0xB7, 0xCF, 0x81, 0xCE, 0xBF, + 0xCF, 0x86, 0xCE, 0xBF, 0xCF, 0x81, 0xCE, 0xAF, 0xCE, 0xB5, 0xCF, 0x83 + }; + static const uint8_t input3[] = + { + 0xCE, 0xA0, 0xCE, 0x95, 0xCE, 0xA1, 0xCE, 0x99, 0xCE, 0xA3, 0xCE, 0xA3, + 0xCE, 0x8C, 0xCE, 0xA4, 0xCE, 0x95, 0xCE, 0xA1, 0xCE, 0x95, 0xCE, 0xA3, + ' ', 0xCE, 0xA0, 0xCE, 0x9B, 0xCE, 0x97, 0xCE, 0xA1, 0xCE, 0x9F, + 0xCE, 0xA6, 0xCE, 0x9F, 0xCE, 0xA1, 0xCE, 0x8A, 0xCE, 0x95, 0xCE, 0xA3 + }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Case mapping can require subsequent normalization. */ + { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */ + static const uint8_t input[] = { 0xC7, 0xB0, 0xCC, 0xA3 }; + static const uint8_t casefolded[] = { 0x6A, 0xCC, 0x8C, 0xCC, 0xA3 }; + static const uint8_t casefolded_decomposed[] = { 0x6A, 0xCC, 0xA3, 0xCC, 0x8C }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_decomposed, SIZEOF (casefolded_decomposed), NULL, NULL, &cmp) == 0); + ASSERT (cmp != 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_decomposed, SIZEOF (casefolded_decomposed), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } +} + +int +main () +{ + test_ascii (u8_casecmp, UNINORM_NFD); + test_nonascii (u8_casecmp); + + return 0; +} diff --git a/tests/unicase/test-u8-casecoll.c b/tests/unicase/test-u8-casecoll.c new file mode 100644 index 00000000..e1b0c57c --- /dev/null +++ b/tests/unicase/test-u8-casecoll.c @@ -0,0 +1,54 @@ +/* Test of locale dependent, case and normalization insensitive comparison of + UTF-8 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint8_t +#include "test-casecmp.h" +#undef UNIT + +int +main () +{ + /* In the "C" locale, strcoll is equivalent to strcmp, therefore u8_casecoll + on ASCII strings should behave like strcasecmp. */ + test_ascii (u8_casecoll, UNINORM_NFC); + + return 0; +} diff --git a/tests/unicase/test-u8-casefold.c b/tests/unicase/test-u8-casefold.c new file mode 100644 index 00000000..225b9979 --- /dev/null +++ b/tests/unicase/test-u8-casefold.c @@ -0,0 +1,220 @@ +/* Test of casefolding mapping for UTF-8 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +check (const uint8_t *input, size_t input_length, + const char *iso639_language, uninorm_t nf, + const uint8_t *expected, size_t expected_length) +{ + size_t length; + uint8_t *result; + + /* Test return conventions with resultbuf == NULL. */ + result = u8_casefold (input, input_length, iso639_language, nf, NULL, &length); + if (!(result != NULL)) + return 1; + if (!(length == expected_length)) + return 2; + if (!(u8_cmp (result, expected, expected_length) == 0)) + return 3; + free (result); + + /* Test return conventions with resultbuf too small. */ + if (expected_length > 0) + { + uint8_t *preallocated; + + length = expected_length - 1; + preallocated = (uint8_t *) malloc (length * sizeof (uint8_t)); + result = u8_casefold (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 4; + if (!(result != preallocated)) + return 5; + if (!(length == expected_length)) + return 6; + if (!(u8_cmp (result, expected, expected_length) == 0)) + return 7; + free (result); + free (preallocated); + } + + /* Test return conventions with resultbuf large enough. */ + { + uint8_t *preallocated; + + length = expected_length; + preallocated = (uint8_t *) malloc (length * sizeof (uint8_t)); + result = u8_casefold (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 8; + if (!(preallocated == NULL || result == preallocated)) + return 9; + if (!(length == expected_length)) + return 10; + if (!(u8_cmp (result, expected, expected_length) == 0)) + return 11; + free (preallocated); + } + + return 0; +} + +int +main () +{ + { /* Empty string. */ + ASSERT (check (NULL, 0, NULL, NULL, NULL, 0) == 0); + ASSERT (check (NULL, 0, NULL, UNINORM_NFC, NULL, 0) == 0); + } + + /* Simple string. */ + { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ + static const uint8_t input[] = + { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.', ' ', + 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, 0x81, + 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5, + '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, 's', 'q', 'r', 't', '(', + 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', + ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E, ',', + 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', + 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80, '\n' + }; + static const uint8_t casefolded[] = + { 'g', 'r', 0xC3, 0xBC, 0x73, 0x73, ' ', 'g', 'o', 't', 't', '.', ' ', + 0xD0, 0xB7, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, 0x81, + 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5, + '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, 's', 'q', 'r', 't', '(', + 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', + ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E, ',', + 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', + 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80, '\n' + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + } + + /* Case mapping can increase the number of Unicode characters. */ + { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */ + static const uint8_t input[] = { 0xC5, 0x89 }; + static const uint8_t casefolded[] = { 0xCA, 0xBC, 0x6E }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + } + { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */ + static const uint8_t input[] = { 0xCE, 0x90 }; + static const uint8_t casefolded[] = { 0xCE, 0xB9, 0xCC, 0x88, 0xCC, 0x81 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + } + + /* Turkish letters i İ ı I */ + { /* LATIN CAPITAL LETTER I */ + static const uint8_t input[] = { 0x49 }; + static const uint8_t casefolded[] = { 0x69 }; + static const uint8_t casefolded_tr[] = { 0xC4, 0xB1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded_tr, SIZEOF (casefolded_tr)) == 0); + } + { /* LATIN SMALL LETTER I */ + static const uint8_t input[] = { 0x69 }; + static const uint8_t casefolded[] = { 0x69 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded, SIZEOF (casefolded)) == 0); + } + { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ + static const uint8_t input[] = { 0xC4, 0xB0 }; + static const uint8_t casefolded[] = { 0x69, 0xCC, 0x87 }; + static const uint8_t casefolded_tr[] = { 0x69 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded_tr, SIZEOF (casefolded_tr)) == 0); + } + { /* LATIN SMALL LETTER DOTLESS I */ + static const uint8_t input[] = { 0xC4, 0xB1 }; + static const uint8_t casefolded[] = { 0xC4, 0xB1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded, SIZEOF (casefolded)) == 0); + } + { /* "topkapı" */ + static const uint8_t input[] = + { 0x74, 0x6F, 0x70, 0x6B, 0x61, 0x70, 0xC4, 0xB1 }; + static const uint8_t casefolded[] = + { 0x74, 0x6F, 0x70, 0x6B, 0x61, 0x70, 0xC4, 0xB1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casefolded, SIZEOF (casefolded)) == 0); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "heiß" */ + static const uint8_t input[] = { 0x68, 0x65, 0x69, 0xC3, 0x9F }; + static const uint8_t casefolded[] = { 0x68, 0x65, 0x69, 0x73, 0x73 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + } + + /* Case mappings for some characters can depend on the surrounding characters. */ + { /* "περισσότερες πληροφορίες" */ + static const uint8_t input[] = + { + 0xCF, 0x80, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB9, 0xCF, 0x83, 0xCF, 0x83, + 0xCF, 0x8C, 0xCF, 0x84, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB5, 0xCF, 0x82, + ' ', 0xCF, 0x80, 0xCE, 0xBB, 0xCE, 0xB7, 0xCF, 0x81, 0xCE, 0xBF, + 0xCF, 0x86, 0xCE, 0xBF, 0xCF, 0x81, 0xCE, 0xAF, 0xCE, 0xB5, 0xCF, 0x82 + }; + static const uint8_t casefolded[] = + { + 0xCF, 0x80, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB9, 0xCF, 0x83, 0xCF, 0x83, + 0xCF, 0x8C, 0xCF, 0x84, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB5, 0xCF, 0x83, + ' ', 0xCF, 0x80, 0xCE, 0xBB, 0xCE, 0xB7, 0xCF, 0x81, 0xCE, 0xBF, + 0xCF, 0x86, 0xCE, 0xBF, 0xCF, 0x81, 0xCE, 0xAF, 0xCE, 0xB5, 0xCF, 0x83 + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + } + + /* Case mapping can require subsequent normalization. */ + { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */ + static const uint8_t input[] = { 0xC7, 0xB0, 0xCC, 0xA3 }; + static const uint8_t casefolded[] = { 0x6A, 0xCC, 0x8C, 0xCC, 0xA3 }; + static const uint8_t casefolded_decomposed[] = { 0x6A, 0xCC, 0xA3, 0xCC, 0x8C }; + static const uint8_t casefolded_normalized[] = { 0xC7, 0xB0, 0xCC, 0xA3 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casefolded, SIZEOF (casefolded)) == 0); + ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFD, casefolded_decomposed, SIZEOF (casefolded_decomposed)) == 0); + ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFC, casefolded_normalized, SIZEOF (casefolded_normalized)) == 0); + } + + return 0; +} diff --git a/tests/unicase/test-u8-is-cased.c b/tests/unicase/test-u8-is-cased.c new file mode 100644 index 00000000..984f81b6 --- /dev/null +++ b/tests/unicase/test-u8-is-cased.c @@ -0,0 +1,51 @@ +/* Test of test whether case matters for an UTF-8 string. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint8_t +#include "test-is-cased.h" +#undef UNIT + +int +main () +{ + test_ascii (u8_is_cased); + + return 0; +} diff --git a/tests/unicase/test-u8-is-casefolded.c b/tests/unicase/test-u8-is-casefolded.c new file mode 100644 index 00000000..b727f6bb --- /dev/null +++ b/tests/unicase/test-u8-is-casefolded.c @@ -0,0 +1,65 @@ +/* Test of test whether an UTF-8 string is already case-folded. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint8_t +#include "test-is-casefolded.h" +#undef UNIT + +static void +test_nonascii (int (*my_is) (const uint8_t *, size_t, const char *, bool *)) +{ + /* Test cases from Unicode 5.1.0. */ + { + static const uint8_t input[] = { 0xC3, 0x9F }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } +} + +int +main () +{ + test_ascii (u8_is_casefolded); + test_nonascii (u8_is_casefolded); + + return 0; +} diff --git a/tests/unicase/test-u8-is-lowercase.c b/tests/unicase/test-u8-is-lowercase.c new file mode 100644 index 00000000..230e77cf --- /dev/null +++ b/tests/unicase/test-u8-is-lowercase.c @@ -0,0 +1,93 @@ +/* Test of test whether an UTF-8 string is entirely lower case. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint8_t +#include "test-is-lowercase.h" +#undef UNIT + +static void +test_nonascii (int (*my_is) (const uint8_t *, size_t, const char *, bool *)) +{ + /* Test cases from Unicode 5.1.0. */ + { + static const uint8_t input[] = { 0xE2, 0x93, 0x97 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint8_t input[] = { 0xE2, 0x92, 0xBD }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const uint8_t input[] = { 0xCA, 0xB0 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint8_t input[] = { 0xE1, 0x80, 0xB4 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint8_t input[] = { 0xCA, 0xBD }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } +} + +int +main () +{ + test_ascii (u8_is_lowercase); + test_nonascii (u8_is_lowercase); + + return 0; +} diff --git a/tests/unicase/test-u8-is-titlecase.c b/tests/unicase/test-u8-is-titlecase.c new file mode 100644 index 00000000..fb803ea8 --- /dev/null +++ b/tests/unicase/test-u8-is-titlecase.c @@ -0,0 +1,51 @@ +/* Test of test whether an UTF-8 string is entirely title case. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint8_t +#include "test-is-titlecase.h" +#undef UNIT + +int +main () +{ + test_ascii (u8_is_titlecase); + + return 0; +} diff --git a/tests/unicase/test-u8-is-uppercase.c b/tests/unicase/test-u8-is-uppercase.c new file mode 100644 index 00000000..c66bf393 --- /dev/null +++ b/tests/unicase/test-u8-is-uppercase.c @@ -0,0 +1,93 @@ +/* Test of test whether an UTF-8 string is entirely upper case. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT uint8_t +#include "test-is-uppercase.h" +#undef UNIT + +static void +test_nonascii (int (*my_is) (const uint8_t *, size_t, const char *, bool *)) +{ + /* Test cases from Unicode 5.1.0. */ + { + static const uint8_t input[] = { 0xE2, 0x93, 0x97 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == false); + } + { + static const uint8_t input[] = { 0xE2, 0x92, 0xBD }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint8_t input[] = { 0xCA, 0xB0 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint8_t input[] = { 0xE1, 0x80, 0xB4 }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } + { + static const uint8_t input[] = { 0xCA, 0xBD }; + bool result; + + ASSERT (my_is (input, SIZEOF (input), NULL, &result) == 0); + ASSERT (result == true); + } +} + +int +main () +{ + test_ascii (u8_is_uppercase); + test_nonascii (u8_is_uppercase); + + return 0; +} diff --git a/tests/unicase/test-u8-tolower.c b/tests/unicase/test-u8-tolower.c new file mode 100644 index 00000000..6c0a5df6 --- /dev/null +++ b/tests/unicase/test-u8-tolower.c @@ -0,0 +1,257 @@ +/* Test of lowercase mapping for UTF-8 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +check (const uint8_t *input, size_t input_length, + const char *iso639_language, uninorm_t nf, + const uint8_t *expected, size_t expected_length) +{ + size_t length; + uint8_t *result; + + /* Test return conventions with resultbuf == NULL. */ + result = u8_tolower (input, input_length, iso639_language, nf, NULL, &length); + if (!(result != NULL)) + return 1; + if (!(length == expected_length)) + return 2; + if (!(u8_cmp (result, expected, expected_length) == 0)) + return 3; + free (result); + + /* Test return conventions with resultbuf too small. */ + if (expected_length > 0) + { + uint8_t *preallocated; + + length = expected_length - 1; + preallocated = (uint8_t *) malloc (length * sizeof (uint8_t)); + result = u8_tolower (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 4; + if (!(result != preallocated)) + return 5; + if (!(length == expected_length)) + return 6; + if (!(u8_cmp (result, expected, expected_length) == 0)) + return 7; + free (result); + free (preallocated); + } + + /* Test return conventions with resultbuf large enough. */ + { + uint8_t *preallocated; + + length = expected_length; + preallocated = (uint8_t *) malloc (length * sizeof (uint8_t)); + result = u8_tolower (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 8; + if (!(preallocated == NULL || result == preallocated)) + return 9; + if (!(length == expected_length)) + return 10; + if (!(u8_cmp (result, expected, expected_length) == 0)) + return 11; + free (preallocated); + } + + return 0; +} + +int +main () +{ + { /* Empty string. */ + ASSERT (check (NULL, 0, NULL, NULL, NULL, 0) == 0); + ASSERT (check (NULL, 0, NULL, UNINORM_NFC, NULL, 0) == 0); + } + + /* Simple string. */ + { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ + static const uint8_t input[] = + { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.', ' ', + 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, 0x81, + 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5, + '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, 's', 'q', 'r', 't', '(', + 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', + ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E, ',', + 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', + 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80, '\n' + }; + static const uint8_t casemapped[] = + { 'g', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'g', 'o', 't', 't', '.', ' ', + 0xD0, 0xB7, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, 0x81, + 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5, + '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, 's', 'q', 'r', 't', '(', + 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', + ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E, ',', + 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', + 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80, '\n' + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Turkish letters i İ ı I */ + { /* LATIN CAPITAL LETTER I */ + static const uint8_t input[] = { 0x49 }; + static const uint8_t casemapped[] = { 0x69 }; + static const uint8_t casemapped_tr[] = { 0xC4, 0xB1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped_tr, SIZEOF (casemapped_tr)) == 0); + } + { /* LATIN SMALL LETTER I */ + static const uint8_t input[] = { 0x69 }; + static const uint8_t casemapped[] = { 0x69 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ + static const uint8_t input[] = { 0xC4, 0xB0 }; + static const uint8_t casemapped[] = { 0x69, 0xCC, 0x87 }; + static const uint8_t casemapped_tr[] = { 0x69 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped_tr, SIZEOF (casemapped_tr)) == 0); + } + { /* LATIN SMALL LETTER DOTLESS I */ + static const uint8_t input[] = { 0xC4, 0xB1}; + static const uint8_t casemapped[] = { 0xC4, 0xB1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "TOPKAPI" */ + static const uint8_t input[] = + { 0x54, 0x4F, 0x50, 0x4B, 0x41, 0x50, 0x49 }; + static const uint8_t casemapped[] = + { 0x74, 0x6F, 0x70, 0x6B, 0x61, 0x70, 0xC4, 0xB1 }; + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "HEIß" */ + static const uint8_t input[] = { 0x48, 0x45, 0x49, 0xC3, 0x9F }; + static const uint8_t casemapped[] = { 0x68, 0x65, 0x69, 0xC3, 0x9F }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mappings for some characters can depend on the surrounding characters. */ + { /* "ΠΕΡΙΣΣΌΤΕΡΕΣ ΠΛΗΡΟΦΟΡΊΕΣ" */ + static const uint8_t input[] = + { + 0xCE, 0xA0, 0xCE, 0x95, 0xCE, 0xA1, 0xCE, 0x99, 0xCE, 0xA3, 0xCE, 0xA3, + 0xCE, 0x8C, 0xCE, 0xA4, 0xCE, 0x95, 0xCE, 0xA1, 0xCE, 0x95, 0xCE, 0xA3, + ' ', 0xCE, 0xA0, 0xCE, 0x9B, 0xCE, 0x97, 0xCE, 0xA1, 0xCE, 0x9F, + 0xCE, 0xA6, 0xCE, 0x9F, 0xCE, 0xA1, 0xCE, 0x8A, 0xCE, 0x95, 0xCE, 0xA3 + }; + static const uint8_t casemapped[] = + { + 0xCF, 0x80, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB9, 0xCF, 0x83, 0xCF, 0x83, + 0xCF, 0x8C, 0xCF, 0x84, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB5, 0xCF, 0x82, + ' ', 0xCF, 0x80, 0xCE, 0xBB, 0xCE, 0xB7, 0xCF, 0x81, 0xCE, 0xBF, + 0xCF, 0x86, 0xCE, 0xBF, 0xCF, 0x81, 0xCE, 0xAF, 0xCE, 0xB5, 0xCF, 0x82 + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "Σ" -> "σ" */ + static const uint8_t input[] = { 0xCE, 0xA3 }; + static const uint8_t casemapped[] = { 0xCF, 0x83 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ" -> "ας" */ + static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3 }; + static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x82 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + /* It's a final sigma only if not followed by a case-ignorable sequence and + then a cased letter. Note that U+0345 and U+037A are simultaneously + case-ignorable and cased (which is a bit paradoxical). */ + { /* "ΑΣΑ" -> "ασα" */ + static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3, 0xCE, 0x91 }; + static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x83, 0xCE, 0xB1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:" -> "ας:" */ + static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3, 0x3A }; + static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x82, 0x3A }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:Α" -> "ασ:α" */ + static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3, 0x3A, 0xCE, 0x91 }; + static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x83, 0x3A, 0xCE, 0xB1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:ͺ" -> "ασ:ͺ" */ + static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3, 0x3A, 0xCD, 0xBA }; + static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x83, 0x3A, 0xCD, 0xBA }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ΑΣ:ͺ " -> "ασ:ͺ " */ + static const uint8_t input[] = { 0xCE, 0x91, 0xCE, 0xA3, 0x3A, 0xCD, 0xBA, 0x20 }; + static const uint8_t casemapped[] = { 0xCE, 0xB1, 0xCF, 0x83, 0x3A, 0xCD, 0xBA, 0x20 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + /* It's a final sigma only if preceded by a case-ignorable sequence and + a cased letter before it. Note that U+0345 and U+037A are simultaneously + case-ignorable and cased (which is a bit paradoxical). */ + { /* ":Σ" -> ":σ" */ + static const uint8_t input[] = { 0x3A, 0xCE, 0xA3 }; + static const uint8_t casemapped[] = { 0x3A, 0xCF, 0x83 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "Α:Σ" -> "α:ς" */ + static const uint8_t input[] = { 0xCE, 0x91, 0x3A, 0xCE, 0xA3 }; + static const uint8_t casemapped[] = { 0xCE, 0xB1, 0x3A, 0xCF, 0x82 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "ͺ:Σ" -> "ͺ:ς" */ + static const uint8_t input[] = { 0xCD, 0xBA, 0x3A, 0xCE, 0xA3 }; + static const uint8_t casemapped[] = { 0xCD, 0xBA, 0x3A, 0xCF, 0x82 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* " ͺ:Σ" -> " ͺ:ς" */ + static const uint8_t input[] = { 0x20, 0xCD, 0xBA, 0x3A, 0xCE, 0xA3 }; + static const uint8_t casemapped[] = { 0x20, 0xCD, 0xBA, 0x3A, 0xCF, 0x82 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + return 0; +} diff --git a/tests/unicase/test-u8-totitle.c b/tests/unicase/test-u8-totitle.c new file mode 100644 index 00000000..c2506ac5 --- /dev/null +++ b/tests/unicase/test-u8-totitle.c @@ -0,0 +1,217 @@ +/* Test of titlecase mapping for UTF-8 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +check (const uint8_t *input, size_t input_length, + const char *iso639_language, uninorm_t nf, + const uint8_t *expected, size_t expected_length) +{ + size_t length; + uint8_t *result; + + /* Test return conventions with resultbuf == NULL. */ + result = u8_totitle (input, input_length, iso639_language, nf, NULL, &length); + if (!(result != NULL)) + return 1; + if (!(length == expected_length)) + return 2; + if (!(u8_cmp (result, expected, expected_length) == 0)) + return 3; + free (result); + + /* Test return conventions with resultbuf too small. */ + if (expected_length > 0) + { + uint8_t *preallocated; + + length = expected_length - 1; + preallocated = (uint8_t *) malloc (length * sizeof (uint8_t)); + result = u8_totitle (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 4; + if (!(result != preallocated)) + return 5; + if (!(length == expected_length)) + return 6; + if (!(u8_cmp (result, expected, expected_length) == 0)) + return 7; + free (result); + free (preallocated); + } + + /* Test return conventions with resultbuf large enough. */ + { + uint8_t *preallocated; + + length = expected_length; + preallocated = (uint8_t *) malloc (length * sizeof (uint8_t)); + result = u8_totitle (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 8; + if (!(preallocated == NULL || result == preallocated)) + return 9; + if (!(length == expected_length)) + return 10; + if (!(u8_cmp (result, expected, expected_length) == 0)) + return 11; + free (preallocated); + } + + return 0; +} + +int +main () +{ + { /* Empty string. */ + ASSERT (check (NULL, 0, NULL, NULL, NULL, 0) == 0); + ASSERT (check (NULL, 0, NULL, UNINORM_NFC, NULL, 0) == 0); + } + + /* Simple string. */ + { /* "GRÜß GOTT. ЗДРАВСТВУЙТЕ! X=(-B±SQRT(B²-4AC))/(2A) 日本語,中文,한글" */ + static const uint8_t input[] = + { 'G', 'R', 0xC3, 0x9C, 0xC3, 0x9F, ' ', 'G', 'O', 'T', 'T', '.', ' ', + 0xD0, 0x97, 0xD0, 0x94, 0xD0, 0xA0, 0xD0, 0x90, 0xD0, 0x92, 0xD0, 0xA1, + 0xD0, 0xA2, 0xD0, 0x92, 0xD0, 0xA3, 0xD0, 0x99, 0xD0, 0xA2, 0xD0, 0x95, + '!', ' ', 'X', '=', '(', '-', 'B', 0xC2, 0xB1, 'S', 'Q', 'R', 'T', '(', + 'B', 0xC2, 0xB2, '-', '4', 'A', 'C', ')', ')', '/', '(', '2', 'A', ')', + ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E, ',', + 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', + 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80, '\n' + }; + static const uint8_t casemapped[] = + { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.', ' ', + 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, 0x81, + 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5, + '!', ' ', 'X', '=', '(', '-', 'B', 0xC2, 0xB1, 'S', 'q', 'r', 't', '(', + 'B', 0xC2, 0xB2, '-', '4', 'A', 'c', ')', ')', '/', '(', '2', 'A', ')', + ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E, ',', + 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', + 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80, '\n' + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mapping can increase the number of Unicode characters. */ + { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */ + static const uint8_t input[] = { 0xC5, 0x89 }; + static const uint8_t casemapped[] = { 0xCA, 0xBC, 0x4E }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */ + static const uint8_t input[] = { 0xCE, 0x90 }; + static const uint8_t casemapped[] = { 0xCE, 0x99, 0xCC, 0x88, 0xCC, 0x81 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Turkish letters i İ ı I */ + { /* LATIN CAPITAL LETTER I */ + static const uint8_t input[] = { 0x49 }; + static const uint8_t casemapped[] = { 0x49 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* LATIN SMALL LETTER I */ + static const uint8_t input[] = { 0x69 }; + static const uint8_t casemapped[] = { 0x49 }; + static const uint8_t casemapped_tr[] = { 0xC4, 0xB0 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped_tr, SIZEOF (casemapped_tr)) == 0); + } + { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ + static const uint8_t input[] = { 0xC4, 0xB0 }; + static const uint8_t casemapped[] = { 0xC4, 0xB0 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* LATIN SMALL LETTER DOTLESS I */ + static const uint8_t input[] = { 0xC4, 0xB1 }; + static const uint8_t casemapped[] = { 0x49 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "topkapı" */ + static const uint8_t input[] = + { 0x74, 0x6F, 0x70, 0x6B, 0x61, 0x70, 0xC4, 0xB1 }; + static const uint8_t casemapped[] = + { 0x54, 0x6F, 0x70, 0x6B, 0x61, 0x70, 0xC4, 0xB1 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "heiß" */ + static const uint8_t input[] = { 0x68, 0x65, 0x69, 0xC3, 0x9F }; + static const uint8_t casemapped[] = { 0x48, 0x65, 0x69, 0xC3, 0x9F }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mappings for some characters can depend on the surrounding characters. */ + { /* "περισσότερες πληροφορίες" */ + static const uint8_t input[] = + { + 0xCF, 0x80, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB9, 0xCF, 0x83, 0xCF, 0x83, + 0xCF, 0x8C, 0xCF, 0x84, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB5, 0xCF, 0x82, + ' ', 0xCF, 0x80, 0xCE, 0xBB, 0xCE, 0xB7, 0xCF, 0x81, 0xCE, 0xBF, + 0xCF, 0x86, 0xCE, 0xBF, 0xCF, 0x81, 0xCE, 0xAF, 0xCE, 0xB5, 0xCF, 0x82 + }; + static const uint8_t casemapped[] = + { + 0xCE, 0xA0, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB9, 0xCF, 0x83, 0xCF, 0x83, + 0xCF, 0x8C, 0xCF, 0x84, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB5, 0xCF, 0x82, + ' ', 0xCE, 0xA0, 0xCE, 0xBB, 0xCE, 0xB7, 0xCF, 0x81, 0xCE, 0xBF, + 0xCF, 0x86, 0xCE, 0xBF, 0xCF, 0x81, 0xCE, 0xAF, 0xCE, 0xB5, 0xCF, 0x82 + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mapping can require subsequent normalization. */ + { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */ + static const uint8_t input[] = { 0xC7, 0xB0, 0xCC, 0xA3 }; + static const uint8_t casemapped[] = { 0x4A, 0xCC, 0x8C, 0xCC, 0xA3 }; + static const uint8_t casemapped_normalized[] = { 0x4A, 0xCC, 0xA3, 0xCC, 0x8C }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFC, casemapped_normalized, SIZEOF (casemapped_normalized)) == 0); + } + + return 0; +} diff --git a/tests/unicase/test-u8-toupper.c b/tests/unicase/test-u8-toupper.c new file mode 100644 index 00000000..41e536bd --- /dev/null +++ b/tests/unicase/test-u8-toupper.c @@ -0,0 +1,217 @@ +/* Test of uppercase mapping for UTF-8 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include + +#include "unistr.h" +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +check (const uint8_t *input, size_t input_length, + const char *iso639_language, uninorm_t nf, + const uint8_t *expected, size_t expected_length) +{ + size_t length; + uint8_t *result; + + /* Test return conventions with resultbuf == NULL. */ + result = u8_toupper (input, input_length, iso639_language, nf, NULL, &length); + if (!(result != NULL)) + return 1; + if (!(length == expected_length)) + return 2; + if (!(u8_cmp (result, expected, expected_length) == 0)) + return 3; + free (result); + + /* Test return conventions with resultbuf too small. */ + if (expected_length > 0) + { + uint8_t *preallocated; + + length = expected_length - 1; + preallocated = (uint8_t *) malloc (length * sizeof (uint8_t)); + result = u8_toupper (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 4; + if (!(result != preallocated)) + return 5; + if (!(length == expected_length)) + return 6; + if (!(u8_cmp (result, expected, expected_length) == 0)) + return 7; + free (result); + free (preallocated); + } + + /* Test return conventions with resultbuf large enough. */ + { + uint8_t *preallocated; + + length = expected_length; + preallocated = (uint8_t *) malloc (length * sizeof (uint8_t)); + result = u8_toupper (input, input_length, iso639_language, nf, preallocated, &length); + if (!(result != NULL)) + return 8; + if (!(preallocated == NULL || result == preallocated)) + return 9; + if (!(length == expected_length)) + return 10; + if (!(u8_cmp (result, expected, expected_length) == 0)) + return 11; + free (preallocated); + } + + return 0; +} + +int +main () +{ + { /* Empty string. */ + ASSERT (check (NULL, 0, NULL, NULL, NULL, 0) == 0); + ASSERT (check (NULL, 0, NULL, UNINORM_NFC, NULL, 0) == 0); + } + + /* Simple string. */ + { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ + static const uint8_t input[] = + { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.', ' ', + 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, 0x81, + 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5, + '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, 's', 'q', 'r', 't', '(', + 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', + ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E, ',', + 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', + 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80, '\n' + }; + static const uint8_t casemapped[] = + { 'G', 'R', 0xC3, 0x9C, 0x53, 0x53, ' ', 'G', 'O', 'T', 'T', '.', ' ', + 0xD0, 0x97, 0xD0, 0x94, 0xD0, 0xA0, 0xD0, 0x90, 0xD0, 0x92, 0xD0, 0xA1, + 0xD0, 0xA2, 0xD0, 0x92, 0xD0, 0xA3, 0xD0, 0x99, 0xD0, 0xA2, 0xD0, 0x95, + '!', ' ', 'X', '=', '(', '-', 'B', 0xC2, 0xB1, 'S', 'Q', 'R', 'T', '(', + 'B', 0xC2, 0xB2, '-', '4', 'A', 'C', ')', ')', '/', '(', '2', 'A', ')', + ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E, ',', + 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', + 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80, '\n' + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mapping can increase the number of Unicode characters. */ + { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */ + static const uint8_t input[] = { 0xC5, 0x89 }; + static const uint8_t casemapped[] = { 0xCA, 0xBC, 0x4E }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */ + static const uint8_t input[] = { 0xCE, 0x90 }; + static const uint8_t casemapped[] = { 0xCE, 0x99, 0xCC, 0x88, 0xCC, 0x81 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Turkish letters i İ ı I */ + { /* LATIN CAPITAL LETTER I */ + static const uint8_t input[] = { 0x49 }; + static const uint8_t casemapped[] = { 0x49 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* LATIN SMALL LETTER I */ + static const uint8_t input[] = { 0x69 }; + static const uint8_t casemapped[] = { 0x49 }; + static const uint8_t casemapped_tr[] = { 0xC4, 0xB0 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped_tr, SIZEOF (casemapped_tr)) == 0); + } + { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ + static const uint8_t input[] = { 0xC4, 0xB0 }; + static const uint8_t casemapped[] = { 0xC4, 0xB0 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* LATIN SMALL LETTER DOTLESS I */ + static const uint8_t input[] = { 0xC4, 0xB1 }; + static const uint8_t casemapped[] = { 0x49 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + { /* "topkapı" */ + static const uint8_t input[] = + { 0x74, 0x6F, 0x70, 0x6B, 0x61, 0x70, 0xC4, 0xB1 }; + static const uint8_t casemapped[] = + { 0x54, 0x4F, 0x50, 0x4B, 0x41, 0x50, 0x49 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), "tr", NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "heiß" */ + static const uint8_t input[] = { 0x68, 0x65, 0x69, 0xC3, 0x9F }; + static const uint8_t casemapped[] = { 0x48, 0x45, 0x49, 0x53, 0x53 }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mappings for some characters can depend on the surrounding characters. */ + { /* "περισσότερες πληροφορίες" */ + static const uint8_t input[] = + { + 0xCF, 0x80, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB9, 0xCF, 0x83, 0xCF, 0x83, + 0xCF, 0x8C, 0xCF, 0x84, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB5, 0xCF, 0x82, + ' ', 0xCF, 0x80, 0xCE, 0xBB, 0xCE, 0xB7, 0xCF, 0x81, 0xCE, 0xBF, + 0xCF, 0x86, 0xCE, 0xBF, 0xCF, 0x81, 0xCE, 0xAF, 0xCE, 0xB5, 0xCF, 0x82 + }; + static const uint8_t casemapped[] = + { + 0xCE, 0xA0, 0xCE, 0x95, 0xCE, 0xA1, 0xCE, 0x99, 0xCE, 0xA3, 0xCE, 0xA3, + 0xCE, 0x8C, 0xCE, 0xA4, 0xCE, 0x95, 0xCE, 0xA1, 0xCE, 0x95, 0xCE, 0xA3, + ' ', 0xCE, 0xA0, 0xCE, 0x9B, 0xCE, 0x97, 0xCE, 0xA1, 0xCE, 0x9F, + 0xCE, 0xA6, 0xCE, 0x9F, 0xCE, 0xA1, 0xCE, 0x8A, 0xCE, 0x95, 0xCE, 0xA3 + }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + } + + /* Case mapping can require subsequent normalization. */ + { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */ + static const uint8_t input[] = { 0xC7, 0xB0, 0xCC, 0xA3 }; + static const uint8_t casemapped[] = { 0x4A, 0xCC, 0x8C, 0xCC, 0xA3 }; + static const uint8_t casemapped_normalized[] = { 0x4A, 0xCC, 0xA3, 0xCC, 0x8C }; + ASSERT (check (input, SIZEOF (input), NULL, NULL, casemapped, SIZEOF (casemapped)) == 0); + ASSERT (check (input, SIZEOF (input), NULL, UNINORM_NFC, casemapped_normalized, SIZEOF (casemapped_normalized)) == 0); + } + + return 0; +} diff --git a/tests/unicase/test-uc_tolower.c b/tests/unicase/test-uc_tolower.c new file mode 100644 index 00000000..302e26b8 --- /dev/null +++ b/tests/unicase/test-uc_tolower.c @@ -0,0 +1,1047 @@ +/* DO NOT EDIT! GENERATED AUTOMATICALLY! */ +/* Test the Unicode character mapping functions. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Generated automatically by gen-case.c for Unicode 5.1.0. */ + +#include "test-mapping-part1.h" + + { 0x0041, 0x0061 }, + { 0x0042, 0x0062 }, + { 0x0043, 0x0063 }, + { 0x0044, 0x0064 }, + { 0x0045, 0x0065 }, + { 0x0046, 0x0066 }, + { 0x0047, 0x0067 }, + { 0x0048, 0x0068 }, + { 0x0049, 0x0069 }, + { 0x004A, 0x006A }, + { 0x004B, 0x006B }, + { 0x004C, 0x006C }, + { 0x004D, 0x006D }, + { 0x004E, 0x006E }, + { 0x004F, 0x006F }, + { 0x0050, 0x0070 }, + { 0x0051, 0x0071 }, + { 0x0052, 0x0072 }, + { 0x0053, 0x0073 }, + { 0x0054, 0x0074 }, + { 0x0055, 0x0075 }, + { 0x0056, 0x0076 }, + { 0x0057, 0x0077 }, + { 0x0058, 0x0078 }, + { 0x0059, 0x0079 }, + { 0x005A, 0x007A }, + { 0x00C0, 0x00E0 }, + { 0x00C1, 0x00E1 }, + { 0x00C2, 0x00E2 }, + { 0x00C3, 0x00E3 }, + { 0x00C4, 0x00E4 }, + { 0x00C5, 0x00E5 }, + { 0x00C6, 0x00E6 }, + { 0x00C7, 0x00E7 }, + { 0x00C8, 0x00E8 }, + { 0x00C9, 0x00E9 }, + { 0x00CA, 0x00EA }, + { 0x00CB, 0x00EB }, + { 0x00CC, 0x00EC }, + { 0x00CD, 0x00ED }, + { 0x00CE, 0x00EE }, + { 0x00CF, 0x00EF }, + { 0x00D0, 0x00F0 }, + { 0x00D1, 0x00F1 }, + { 0x00D2, 0x00F2 }, + { 0x00D3, 0x00F3 }, + { 0x00D4, 0x00F4 }, + { 0x00D5, 0x00F5 }, + { 0x00D6, 0x00F6 }, + { 0x00D8, 0x00F8 }, + { 0x00D9, 0x00F9 }, + { 0x00DA, 0x00FA }, + { 0x00DB, 0x00FB }, + { 0x00DC, 0x00FC }, + { 0x00DD, 0x00FD }, + { 0x00DE, 0x00FE }, + { 0x0100, 0x0101 }, + { 0x0102, 0x0103 }, + { 0x0104, 0x0105 }, + { 0x0106, 0x0107 }, + { 0x0108, 0x0109 }, + { 0x010A, 0x010B }, + { 0x010C, 0x010D }, + { 0x010E, 0x010F }, + { 0x0110, 0x0111 }, + { 0x0112, 0x0113 }, + { 0x0114, 0x0115 }, + { 0x0116, 0x0117 }, + { 0x0118, 0x0119 }, + { 0x011A, 0x011B }, + { 0x011C, 0x011D }, + { 0x011E, 0x011F }, + { 0x0120, 0x0121 }, + { 0x0122, 0x0123 }, + { 0x0124, 0x0125 }, + { 0x0126, 0x0127 }, + { 0x0128, 0x0129 }, + { 0x012A, 0x012B }, + { 0x012C, 0x012D }, + { 0x012E, 0x012F }, + { 0x0130, 0x0069 }, + { 0x0132, 0x0133 }, + { 0x0134, 0x0135 }, + { 0x0136, 0x0137 }, + { 0x0139, 0x013A }, + { 0x013B, 0x013C }, + { 0x013D, 0x013E }, + { 0x013F, 0x0140 }, + { 0x0141, 0x0142 }, + { 0x0143, 0x0144 }, + { 0x0145, 0x0146 }, + { 0x0147, 0x0148 }, + { 0x014A, 0x014B }, + { 0x014C, 0x014D }, + { 0x014E, 0x014F }, + { 0x0150, 0x0151 }, + { 0x0152, 0x0153 }, + { 0x0154, 0x0155 }, + { 0x0156, 0x0157 }, + { 0x0158, 0x0159 }, + { 0x015A, 0x015B }, + { 0x015C, 0x015D }, + { 0x015E, 0x015F }, + { 0x0160, 0x0161 }, + { 0x0162, 0x0163 }, + { 0x0164, 0x0165 }, + { 0x0166, 0x0167 }, + { 0x0168, 0x0169 }, + { 0x016A, 0x016B }, + { 0x016C, 0x016D }, + { 0x016E, 0x016F }, + { 0x0170, 0x0171 }, + { 0x0172, 0x0173 }, + { 0x0174, 0x0175 }, + { 0x0176, 0x0177 }, + { 0x0178, 0x00FF }, + { 0x0179, 0x017A }, + { 0x017B, 0x017C }, + { 0x017D, 0x017E }, + { 0x0181, 0x0253 }, + { 0x0182, 0x0183 }, + { 0x0184, 0x0185 }, + { 0x0186, 0x0254 }, + { 0x0187, 0x0188 }, + { 0x0189, 0x0256 }, + { 0x018A, 0x0257 }, + { 0x018B, 0x018C }, + { 0x018E, 0x01DD }, + { 0x018F, 0x0259 }, + { 0x0190, 0x025B }, + { 0x0191, 0x0192 }, + { 0x0193, 0x0260 }, + { 0x0194, 0x0263 }, + { 0x0196, 0x0269 }, + { 0x0197, 0x0268 }, + { 0x0198, 0x0199 }, + { 0x019C, 0x026F }, + { 0x019D, 0x0272 }, + { 0x019F, 0x0275 }, + { 0x01A0, 0x01A1 }, + { 0x01A2, 0x01A3 }, + { 0x01A4, 0x01A5 }, + { 0x01A6, 0x0280 }, + { 0x01A7, 0x01A8 }, + { 0x01A9, 0x0283 }, + { 0x01AC, 0x01AD }, + { 0x01AE, 0x0288 }, + { 0x01AF, 0x01B0 }, + { 0x01B1, 0x028A }, + { 0x01B2, 0x028B }, + { 0x01B3, 0x01B4 }, + { 0x01B5, 0x01B6 }, + { 0x01B7, 0x0292 }, + { 0x01B8, 0x01B9 }, + { 0x01BC, 0x01BD }, + { 0x01C4, 0x01C6 }, + { 0x01C5, 0x01C6 }, + { 0x01C7, 0x01C9 }, + { 0x01C8, 0x01C9 }, + { 0x01CA, 0x01CC }, + { 0x01CB, 0x01CC }, + { 0x01CD, 0x01CE }, + { 0x01CF, 0x01D0 }, + { 0x01D1, 0x01D2 }, + { 0x01D3, 0x01D4 }, + { 0x01D5, 0x01D6 }, + { 0x01D7, 0x01D8 }, + { 0x01D9, 0x01DA }, + { 0x01DB, 0x01DC }, + { 0x01DE, 0x01DF }, + { 0x01E0, 0x01E1 }, + { 0x01E2, 0x01E3 }, + { 0x01E4, 0x01E5 }, + { 0x01E6, 0x01E7 }, + { 0x01E8, 0x01E9 }, + { 0x01EA, 0x01EB }, + { 0x01EC, 0x01ED }, + { 0x01EE, 0x01EF }, + { 0x01F1, 0x01F3 }, + { 0x01F2, 0x01F3 }, + { 0x01F4, 0x01F5 }, + { 0x01F6, 0x0195 }, + { 0x01F7, 0x01BF }, + { 0x01F8, 0x01F9 }, + { 0x01FA, 0x01FB }, + { 0x01FC, 0x01FD }, + { 0x01FE, 0x01FF }, + { 0x0200, 0x0201 }, + { 0x0202, 0x0203 }, + { 0x0204, 0x0205 }, + { 0x0206, 0x0207 }, + { 0x0208, 0x0209 }, + { 0x020A, 0x020B }, + { 0x020C, 0x020D }, + { 0x020E, 0x020F }, + { 0x0210, 0x0211 }, + { 0x0212, 0x0213 }, + { 0x0214, 0x0215 }, + { 0x0216, 0x0217 }, + { 0x0218, 0x0219 }, + { 0x021A, 0x021B }, + { 0x021C, 0x021D }, + { 0x021E, 0x021F }, + { 0x0220, 0x019E }, + { 0x0222, 0x0223 }, + { 0x0224, 0x0225 }, + { 0x0226, 0x0227 }, + { 0x0228, 0x0229 }, + { 0x022A, 0x022B }, + { 0x022C, 0x022D }, + { 0x022E, 0x022F }, + { 0x0230, 0x0231 }, + { 0x0232, 0x0233 }, + { 0x023A, 0x2C65 }, + { 0x023B, 0x023C }, + { 0x023D, 0x019A }, + { 0x023E, 0x2C66 }, + { 0x0241, 0x0242 }, + { 0x0243, 0x0180 }, + { 0x0244, 0x0289 }, + { 0x0245, 0x028C }, + { 0x0246, 0x0247 }, + { 0x0248, 0x0249 }, + { 0x024A, 0x024B }, + { 0x024C, 0x024D }, + { 0x024E, 0x024F }, + { 0x0370, 0x0371 }, + { 0x0372, 0x0373 }, + { 0x0376, 0x0377 }, + { 0x0386, 0x03AC }, + { 0x0388, 0x03AD }, + { 0x0389, 0x03AE }, + { 0x038A, 0x03AF }, + { 0x038C, 0x03CC }, + { 0x038E, 0x03CD }, + { 0x038F, 0x03CE }, + { 0x0391, 0x03B1 }, + { 0x0392, 0x03B2 }, + { 0x0393, 0x03B3 }, + { 0x0394, 0x03B4 }, + { 0x0395, 0x03B5 }, + { 0x0396, 0x03B6 }, + { 0x0397, 0x03B7 }, + { 0x0398, 0x03B8 }, + { 0x0399, 0x03B9 }, + { 0x039A, 0x03BA }, + { 0x039B, 0x03BB }, + { 0x039C, 0x03BC }, + { 0x039D, 0x03BD }, + { 0x039E, 0x03BE }, + { 0x039F, 0x03BF }, + { 0x03A0, 0x03C0 }, + { 0x03A1, 0x03C1 }, + { 0x03A3, 0x03C3 }, + { 0x03A4, 0x03C4 }, + { 0x03A5, 0x03C5 }, + { 0x03A6, 0x03C6 }, + { 0x03A7, 0x03C7 }, + { 0x03A8, 0x03C8 }, + { 0x03A9, 0x03C9 }, + { 0x03AA, 0x03CA }, + { 0x03AB, 0x03CB }, + { 0x03CF, 0x03D7 }, + { 0x03D8, 0x03D9 }, + { 0x03DA, 0x03DB }, + { 0x03DC, 0x03DD }, + { 0x03DE, 0x03DF }, + { 0x03E0, 0x03E1 }, + { 0x03E2, 0x03E3 }, + { 0x03E4, 0x03E5 }, + { 0x03E6, 0x03E7 }, + { 0x03E8, 0x03E9 }, + { 0x03EA, 0x03EB }, + { 0x03EC, 0x03ED }, + { 0x03EE, 0x03EF }, + { 0x03F4, 0x03B8 }, + { 0x03F7, 0x03F8 }, + { 0x03F9, 0x03F2 }, + { 0x03FA, 0x03FB }, + { 0x03FD, 0x037B }, + { 0x03FE, 0x037C }, + { 0x03FF, 0x037D }, + { 0x0400, 0x0450 }, + { 0x0401, 0x0451 }, + { 0x0402, 0x0452 }, + { 0x0403, 0x0453 }, + { 0x0404, 0x0454 }, + { 0x0405, 0x0455 }, + { 0x0406, 0x0456 }, + { 0x0407, 0x0457 }, + { 0x0408, 0x0458 }, + { 0x0409, 0x0459 }, + { 0x040A, 0x045A }, + { 0x040B, 0x045B }, + { 0x040C, 0x045C }, + { 0x040D, 0x045D }, + { 0x040E, 0x045E }, + { 0x040F, 0x045F }, + { 0x0410, 0x0430 }, + { 0x0411, 0x0431 }, + { 0x0412, 0x0432 }, + { 0x0413, 0x0433 }, + { 0x0414, 0x0434 }, + { 0x0415, 0x0435 }, + { 0x0416, 0x0436 }, + { 0x0417, 0x0437 }, + { 0x0418, 0x0438 }, + { 0x0419, 0x0439 }, + { 0x041A, 0x043A }, + { 0x041B, 0x043B }, + { 0x041C, 0x043C }, + { 0x041D, 0x043D }, + { 0x041E, 0x043E }, + { 0x041F, 0x043F }, + { 0x0420, 0x0440 }, + { 0x0421, 0x0441 }, + { 0x0422, 0x0442 }, + { 0x0423, 0x0443 }, + { 0x0424, 0x0444 }, + { 0x0425, 0x0445 }, + { 0x0426, 0x0446 }, + { 0x0427, 0x0447 }, + { 0x0428, 0x0448 }, + { 0x0429, 0x0449 }, + { 0x042A, 0x044A }, + { 0x042B, 0x044B }, + { 0x042C, 0x044C }, + { 0x042D, 0x044D }, + { 0x042E, 0x044E }, + { 0x042F, 0x044F }, + { 0x0460, 0x0461 }, + { 0x0462, 0x0463 }, + { 0x0464, 0x0465 }, + { 0x0466, 0x0467 }, + { 0x0468, 0x0469 }, + { 0x046A, 0x046B }, + { 0x046C, 0x046D }, + { 0x046E, 0x046F }, + { 0x0470, 0x0471 }, + { 0x0472, 0x0473 }, + { 0x0474, 0x0475 }, + { 0x0476, 0x0477 }, + { 0x0478, 0x0479 }, + { 0x047A, 0x047B }, + { 0x047C, 0x047D }, + { 0x047E, 0x047F }, + { 0x0480, 0x0481 }, + { 0x048A, 0x048B }, + { 0x048C, 0x048D }, + { 0x048E, 0x048F }, + { 0x0490, 0x0491 }, + { 0x0492, 0x0493 }, + { 0x0494, 0x0495 }, + { 0x0496, 0x0497 }, + { 0x0498, 0x0499 }, + { 0x049A, 0x049B }, + { 0x049C, 0x049D }, + { 0x049E, 0x049F }, + { 0x04A0, 0x04A1 }, + { 0x04A2, 0x04A3 }, + { 0x04A4, 0x04A5 }, + { 0x04A6, 0x04A7 }, + { 0x04A8, 0x04A9 }, + { 0x04AA, 0x04AB }, + { 0x04AC, 0x04AD }, + { 0x04AE, 0x04AF }, + { 0x04B0, 0x04B1 }, + { 0x04B2, 0x04B3 }, + { 0x04B4, 0x04B5 }, + { 0x04B6, 0x04B7 }, + { 0x04B8, 0x04B9 }, + { 0x04BA, 0x04BB }, + { 0x04BC, 0x04BD }, + { 0x04BE, 0x04BF }, + { 0x04C0, 0x04CF }, + { 0x04C1, 0x04C2 }, + { 0x04C3, 0x04C4 }, + { 0x04C5, 0x04C6 }, + { 0x04C7, 0x04C8 }, + { 0x04C9, 0x04CA }, + { 0x04CB, 0x04CC }, + { 0x04CD, 0x04CE }, + { 0x04D0, 0x04D1 }, + { 0x04D2, 0x04D3 }, + { 0x04D4, 0x04D5 }, + { 0x04D6, 0x04D7 }, + { 0x04D8, 0x04D9 }, + { 0x04DA, 0x04DB }, + { 0x04DC, 0x04DD }, + { 0x04DE, 0x04DF }, + { 0x04E0, 0x04E1 }, + { 0x04E2, 0x04E3 }, + { 0x04E4, 0x04E5 }, + { 0x04E6, 0x04E7 }, + { 0x04E8, 0x04E9 }, + { 0x04EA, 0x04EB }, + { 0x04EC, 0x04ED }, + { 0x04EE, 0x04EF }, + { 0x04F0, 0x04F1 }, + { 0x04F2, 0x04F3 }, + { 0x04F4, 0x04F5 }, + { 0x04F6, 0x04F7 }, + { 0x04F8, 0x04F9 }, + { 0x04FA, 0x04FB }, + { 0x04FC, 0x04FD }, + { 0x04FE, 0x04FF }, + { 0x0500, 0x0501 }, + { 0x0502, 0x0503 }, + { 0x0504, 0x0505 }, + { 0x0506, 0x0507 }, + { 0x0508, 0x0509 }, + { 0x050A, 0x050B }, + { 0x050C, 0x050D }, + { 0x050E, 0x050F }, + { 0x0510, 0x0511 }, + { 0x0512, 0x0513 }, + { 0x0514, 0x0515 }, + { 0x0516, 0x0517 }, + { 0x0518, 0x0519 }, + { 0x051A, 0x051B }, + { 0x051C, 0x051D }, + { 0x051E, 0x051F }, + { 0x0520, 0x0521 }, + { 0x0522, 0x0523 }, + { 0x0531, 0x0561 }, + { 0x0532, 0x0562 }, + { 0x0533, 0x0563 }, + { 0x0534, 0x0564 }, + { 0x0535, 0x0565 }, + { 0x0536, 0x0566 }, + { 0x0537, 0x0567 }, + { 0x0538, 0x0568 }, + { 0x0539, 0x0569 }, + { 0x053A, 0x056A }, + { 0x053B, 0x056B }, + { 0x053C, 0x056C }, + { 0x053D, 0x056D }, + { 0x053E, 0x056E }, + { 0x053F, 0x056F }, + { 0x0540, 0x0570 }, + { 0x0541, 0x0571 }, + { 0x0542, 0x0572 }, + { 0x0543, 0x0573 }, + { 0x0544, 0x0574 }, + { 0x0545, 0x0575 }, + { 0x0546, 0x0576 }, + { 0x0547, 0x0577 }, + { 0x0548, 0x0578 }, + { 0x0549, 0x0579 }, + { 0x054A, 0x057A }, + { 0x054B, 0x057B }, + { 0x054C, 0x057C }, + { 0x054D, 0x057D }, + { 0x054E, 0x057E }, + { 0x054F, 0x057F }, + { 0x0550, 0x0580 }, + { 0x0551, 0x0581 }, + { 0x0552, 0x0582 }, + { 0x0553, 0x0583 }, + { 0x0554, 0x0584 }, + { 0x0555, 0x0585 }, + { 0x0556, 0x0586 }, + { 0x10A0, 0x2D00 }, + { 0x10A1, 0x2D01 }, + { 0x10A2, 0x2D02 }, + { 0x10A3, 0x2D03 }, + { 0x10A4, 0x2D04 }, + { 0x10A5, 0x2D05 }, + { 0x10A6, 0x2D06 }, + { 0x10A7, 0x2D07 }, + { 0x10A8, 0x2D08 }, + { 0x10A9, 0x2D09 }, + { 0x10AA, 0x2D0A }, + { 0x10AB, 0x2D0B }, + { 0x10AC, 0x2D0C }, + { 0x10AD, 0x2D0D }, + { 0x10AE, 0x2D0E }, + { 0x10AF, 0x2D0F }, + { 0x10B0, 0x2D10 }, + { 0x10B1, 0x2D11 }, + { 0x10B2, 0x2D12 }, + { 0x10B3, 0x2D13 }, + { 0x10B4, 0x2D14 }, + { 0x10B5, 0x2D15 }, + { 0x10B6, 0x2D16 }, + { 0x10B7, 0x2D17 }, + { 0x10B8, 0x2D18 }, + { 0x10B9, 0x2D19 }, + { 0x10BA, 0x2D1A }, + { 0x10BB, 0x2D1B }, + { 0x10BC, 0x2D1C }, + { 0x10BD, 0x2D1D }, + { 0x10BE, 0x2D1E }, + { 0x10BF, 0x2D1F }, + { 0x10C0, 0x2D20 }, + { 0x10C1, 0x2D21 }, + { 0x10C2, 0x2D22 }, + { 0x10C3, 0x2D23 }, + { 0x10C4, 0x2D24 }, + { 0x10C5, 0x2D25 }, + { 0x1E00, 0x1E01 }, + { 0x1E02, 0x1E03 }, + { 0x1E04, 0x1E05 }, + { 0x1E06, 0x1E07 }, + { 0x1E08, 0x1E09 }, + { 0x1E0A, 0x1E0B }, + { 0x1E0C, 0x1E0D }, + { 0x1E0E, 0x1E0F }, + { 0x1E10, 0x1E11 }, + { 0x1E12, 0x1E13 }, + { 0x1E14, 0x1E15 }, + { 0x1E16, 0x1E17 }, + { 0x1E18, 0x1E19 }, + { 0x1E1A, 0x1E1B }, + { 0x1E1C, 0x1E1D }, + { 0x1E1E, 0x1E1F }, + { 0x1E20, 0x1E21 }, + { 0x1E22, 0x1E23 }, + { 0x1E24, 0x1E25 }, + { 0x1E26, 0x1E27 }, + { 0x1E28, 0x1E29 }, + { 0x1E2A, 0x1E2B }, + { 0x1E2C, 0x1E2D }, + { 0x1E2E, 0x1E2F }, + { 0x1E30, 0x1E31 }, + { 0x1E32, 0x1E33 }, + { 0x1E34, 0x1E35 }, + { 0x1E36, 0x1E37 }, + { 0x1E38, 0x1E39 }, + { 0x1E3A, 0x1E3B }, + { 0x1E3C, 0x1E3D }, + { 0x1E3E, 0x1E3F }, + { 0x1E40, 0x1E41 }, + { 0x1E42, 0x1E43 }, + { 0x1E44, 0x1E45 }, + { 0x1E46, 0x1E47 }, + { 0x1E48, 0x1E49 }, + { 0x1E4A, 0x1E4B }, + { 0x1E4C, 0x1E4D }, + { 0x1E4E, 0x1E4F }, + { 0x1E50, 0x1E51 }, + { 0x1E52, 0x1E53 }, + { 0x1E54, 0x1E55 }, + { 0x1E56, 0x1E57 }, + { 0x1E58, 0x1E59 }, + { 0x1E5A, 0x1E5B }, + { 0x1E5C, 0x1E5D }, + { 0x1E5E, 0x1E5F }, + { 0x1E60, 0x1E61 }, + { 0x1E62, 0x1E63 }, + { 0x1E64, 0x1E65 }, + { 0x1E66, 0x1E67 }, + { 0x1E68, 0x1E69 }, + { 0x1E6A, 0x1E6B }, + { 0x1E6C, 0x1E6D }, + { 0x1E6E, 0x1E6F }, + { 0x1E70, 0x1E71 }, + { 0x1E72, 0x1E73 }, + { 0x1E74, 0x1E75 }, + { 0x1E76, 0x1E77 }, + { 0x1E78, 0x1E79 }, + { 0x1E7A, 0x1E7B }, + { 0x1E7C, 0x1E7D }, + { 0x1E7E, 0x1E7F }, + { 0x1E80, 0x1E81 }, + { 0x1E82, 0x1E83 }, + { 0x1E84, 0x1E85 }, + { 0x1E86, 0x1E87 }, + { 0x1E88, 0x1E89 }, + { 0x1E8A, 0x1E8B }, + { 0x1E8C, 0x1E8D }, + { 0x1E8E, 0x1E8F }, + { 0x1E90, 0x1E91 }, + { 0x1E92, 0x1E93 }, + { 0x1E94, 0x1E95 }, + { 0x1E9E, 0x00DF }, + { 0x1EA0, 0x1EA1 }, + { 0x1EA2, 0x1EA3 }, + { 0x1EA4, 0x1EA5 }, + { 0x1EA6, 0x1EA7 }, + { 0x1EA8, 0x1EA9 }, + { 0x1EAA, 0x1EAB }, + { 0x1EAC, 0x1EAD }, + { 0x1EAE, 0x1EAF }, + { 0x1EB0, 0x1EB1 }, + { 0x1EB2, 0x1EB3 }, + { 0x1EB4, 0x1EB5 }, + { 0x1EB6, 0x1EB7 }, + { 0x1EB8, 0x1EB9 }, + { 0x1EBA, 0x1EBB }, + { 0x1EBC, 0x1EBD }, + { 0x1EBE, 0x1EBF }, + { 0x1EC0, 0x1EC1 }, + { 0x1EC2, 0x1EC3 }, + { 0x1EC4, 0x1EC5 }, + { 0x1EC6, 0x1EC7 }, + { 0x1EC8, 0x1EC9 }, + { 0x1ECA, 0x1ECB }, + { 0x1ECC, 0x1ECD }, + { 0x1ECE, 0x1ECF }, + { 0x1ED0, 0x1ED1 }, + { 0x1ED2, 0x1ED3 }, + { 0x1ED4, 0x1ED5 }, + { 0x1ED6, 0x1ED7 }, + { 0x1ED8, 0x1ED9 }, + { 0x1EDA, 0x1EDB }, + { 0x1EDC, 0x1EDD }, + { 0x1EDE, 0x1EDF }, + { 0x1EE0, 0x1EE1 }, + { 0x1EE2, 0x1EE3 }, + { 0x1EE4, 0x1EE5 }, + { 0x1EE6, 0x1EE7 }, + { 0x1EE8, 0x1EE9 }, + { 0x1EEA, 0x1EEB }, + { 0x1EEC, 0x1EED }, + { 0x1EEE, 0x1EEF }, + { 0x1EF0, 0x1EF1 }, + { 0x1EF2, 0x1EF3 }, + { 0x1EF4, 0x1EF5 }, + { 0x1EF6, 0x1EF7 }, + { 0x1EF8, 0x1EF9 }, + { 0x1EFA, 0x1EFB }, + { 0x1EFC, 0x1EFD }, + { 0x1EFE, 0x1EFF }, + { 0x1F08, 0x1F00 }, + { 0x1F09, 0x1F01 }, + { 0x1F0A, 0x1F02 }, + { 0x1F0B, 0x1F03 }, + { 0x1F0C, 0x1F04 }, + { 0x1F0D, 0x1F05 }, + { 0x1F0E, 0x1F06 }, + { 0x1F0F, 0x1F07 }, + { 0x1F18, 0x1F10 }, + { 0x1F19, 0x1F11 }, + { 0x1F1A, 0x1F12 }, + { 0x1F1B, 0x1F13 }, + { 0x1F1C, 0x1F14 }, + { 0x1F1D, 0x1F15 }, + { 0x1F28, 0x1F20 }, + { 0x1F29, 0x1F21 }, + { 0x1F2A, 0x1F22 }, + { 0x1F2B, 0x1F23 }, + { 0x1F2C, 0x1F24 }, + { 0x1F2D, 0x1F25 }, + { 0x1F2E, 0x1F26 }, + { 0x1F2F, 0x1F27 }, + { 0x1F38, 0x1F30 }, + { 0x1F39, 0x1F31 }, + { 0x1F3A, 0x1F32 }, + { 0x1F3B, 0x1F33 }, + { 0x1F3C, 0x1F34 }, + { 0x1F3D, 0x1F35 }, + { 0x1F3E, 0x1F36 }, + { 0x1F3F, 0x1F37 }, + { 0x1F48, 0x1F40 }, + { 0x1F49, 0x1F41 }, + { 0x1F4A, 0x1F42 }, + { 0x1F4B, 0x1F43 }, + { 0x1F4C, 0x1F44 }, + { 0x1F4D, 0x1F45 }, + { 0x1F59, 0x1F51 }, + { 0x1F5B, 0x1F53 }, + { 0x1F5D, 0x1F55 }, + { 0x1F5F, 0x1F57 }, + { 0x1F68, 0x1F60 }, + { 0x1F69, 0x1F61 }, + { 0x1F6A, 0x1F62 }, + { 0x1F6B, 0x1F63 }, + { 0x1F6C, 0x1F64 }, + { 0x1F6D, 0x1F65 }, + { 0x1F6E, 0x1F66 }, + { 0x1F6F, 0x1F67 }, + { 0x1F88, 0x1F80 }, + { 0x1F89, 0x1F81 }, + { 0x1F8A, 0x1F82 }, + { 0x1F8B, 0x1F83 }, + { 0x1F8C, 0x1F84 }, + { 0x1F8D, 0x1F85 }, + { 0x1F8E, 0x1F86 }, + { 0x1F8F, 0x1F87 }, + { 0x1F98, 0x1F90 }, + { 0x1F99, 0x1F91 }, + { 0x1F9A, 0x1F92 }, + { 0x1F9B, 0x1F93 }, + { 0x1F9C, 0x1F94 }, + { 0x1F9D, 0x1F95 }, + { 0x1F9E, 0x1F96 }, + { 0x1F9F, 0x1F97 }, + { 0x1FA8, 0x1FA0 }, + { 0x1FA9, 0x1FA1 }, + { 0x1FAA, 0x1FA2 }, + { 0x1FAB, 0x1FA3 }, + { 0x1FAC, 0x1FA4 }, + { 0x1FAD, 0x1FA5 }, + { 0x1FAE, 0x1FA6 }, + { 0x1FAF, 0x1FA7 }, + { 0x1FB8, 0x1FB0 }, + { 0x1FB9, 0x1FB1 }, + { 0x1FBA, 0x1F70 }, + { 0x1FBB, 0x1F71 }, + { 0x1FBC, 0x1FB3 }, + { 0x1FC8, 0x1F72 }, + { 0x1FC9, 0x1F73 }, + { 0x1FCA, 0x1F74 }, + { 0x1FCB, 0x1F75 }, + { 0x1FCC, 0x1FC3 }, + { 0x1FD8, 0x1FD0 }, + { 0x1FD9, 0x1FD1 }, + { 0x1FDA, 0x1F76 }, + { 0x1FDB, 0x1F77 }, + { 0x1FE8, 0x1FE0 }, + { 0x1FE9, 0x1FE1 }, + { 0x1FEA, 0x1F7A }, + { 0x1FEB, 0x1F7B }, + { 0x1FEC, 0x1FE5 }, + { 0x1FF8, 0x1F78 }, + { 0x1FF9, 0x1F79 }, + { 0x1FFA, 0x1F7C }, + { 0x1FFB, 0x1F7D }, + { 0x1FFC, 0x1FF3 }, + { 0x2126, 0x03C9 }, + { 0x212A, 0x006B }, + { 0x212B, 0x00E5 }, + { 0x2132, 0x214E }, + { 0x2160, 0x2170 }, + { 0x2161, 0x2171 }, + { 0x2162, 0x2172 }, + { 0x2163, 0x2173 }, + { 0x2164, 0x2174 }, + { 0x2165, 0x2175 }, + { 0x2166, 0x2176 }, + { 0x2167, 0x2177 }, + { 0x2168, 0x2178 }, + { 0x2169, 0x2179 }, + { 0x216A, 0x217A }, + { 0x216B, 0x217B }, + { 0x216C, 0x217C }, + { 0x216D, 0x217D }, + { 0x216E, 0x217E }, + { 0x216F, 0x217F }, + { 0x2183, 0x2184 }, + { 0x24B6, 0x24D0 }, + { 0x24B7, 0x24D1 }, + { 0x24B8, 0x24D2 }, + { 0x24B9, 0x24D3 }, + { 0x24BA, 0x24D4 }, + { 0x24BB, 0x24D5 }, + { 0x24BC, 0x24D6 }, + { 0x24BD, 0x24D7 }, + { 0x24BE, 0x24D8 }, + { 0x24BF, 0x24D9 }, + { 0x24C0, 0x24DA }, + { 0x24C1, 0x24DB }, + { 0x24C2, 0x24DC }, + { 0x24C3, 0x24DD }, + { 0x24C4, 0x24DE }, + { 0x24C5, 0x24DF }, + { 0x24C6, 0x24E0 }, + { 0x24C7, 0x24E1 }, + { 0x24C8, 0x24E2 }, + { 0x24C9, 0x24E3 }, + { 0x24CA, 0x24E4 }, + { 0x24CB, 0x24E5 }, + { 0x24CC, 0x24E6 }, + { 0x24CD, 0x24E7 }, + { 0x24CE, 0x24E8 }, + { 0x24CF, 0x24E9 }, + { 0x2C00, 0x2C30 }, + { 0x2C01, 0x2C31 }, + { 0x2C02, 0x2C32 }, + { 0x2C03, 0x2C33 }, + { 0x2C04, 0x2C34 }, + { 0x2C05, 0x2C35 }, + { 0x2C06, 0x2C36 }, + { 0x2C07, 0x2C37 }, + { 0x2C08, 0x2C38 }, + { 0x2C09, 0x2C39 }, + { 0x2C0A, 0x2C3A }, + { 0x2C0B, 0x2C3B }, + { 0x2C0C, 0x2C3C }, + { 0x2C0D, 0x2C3D }, + { 0x2C0E, 0x2C3E }, + { 0x2C0F, 0x2C3F }, + { 0x2C10, 0x2C40 }, + { 0x2C11, 0x2C41 }, + { 0x2C12, 0x2C42 }, + { 0x2C13, 0x2C43 }, + { 0x2C14, 0x2C44 }, + { 0x2C15, 0x2C45 }, + { 0x2C16, 0x2C46 }, + { 0x2C17, 0x2C47 }, + { 0x2C18, 0x2C48 }, + { 0x2C19, 0x2C49 }, + { 0x2C1A, 0x2C4A }, + { 0x2C1B, 0x2C4B }, + { 0x2C1C, 0x2C4C }, + { 0x2C1D, 0x2C4D }, + { 0x2C1E, 0x2C4E }, + { 0x2C1F, 0x2C4F }, + { 0x2C20, 0x2C50 }, + { 0x2C21, 0x2C51 }, + { 0x2C22, 0x2C52 }, + { 0x2C23, 0x2C53 }, + { 0x2C24, 0x2C54 }, + { 0x2C25, 0x2C55 }, + { 0x2C26, 0x2C56 }, + { 0x2C27, 0x2C57 }, + { 0x2C28, 0x2C58 }, + { 0x2C29, 0x2C59 }, + { 0x2C2A, 0x2C5A }, + { 0x2C2B, 0x2C5B }, + { 0x2C2C, 0x2C5C }, + { 0x2C2D, 0x2C5D }, + { 0x2C2E, 0x2C5E }, + { 0x2C60, 0x2C61 }, + { 0x2C62, 0x026B }, + { 0x2C63, 0x1D7D }, + { 0x2C64, 0x027D }, + { 0x2C67, 0x2C68 }, + { 0x2C69, 0x2C6A }, + { 0x2C6B, 0x2C6C }, + { 0x2C6D, 0x0251 }, + { 0x2C6E, 0x0271 }, + { 0x2C6F, 0x0250 }, + { 0x2C72, 0x2C73 }, + { 0x2C75, 0x2C76 }, + { 0x2C80, 0x2C81 }, + { 0x2C82, 0x2C83 }, + { 0x2C84, 0x2C85 }, + { 0x2C86, 0x2C87 }, + { 0x2C88, 0x2C89 }, + { 0x2C8A, 0x2C8B }, + { 0x2C8C, 0x2C8D }, + { 0x2C8E, 0x2C8F }, + { 0x2C90, 0x2C91 }, + { 0x2C92, 0x2C93 }, + { 0x2C94, 0x2C95 }, + { 0x2C96, 0x2C97 }, + { 0x2C98, 0x2C99 }, + { 0x2C9A, 0x2C9B }, + { 0x2C9C, 0x2C9D }, + { 0x2C9E, 0x2C9F }, + { 0x2CA0, 0x2CA1 }, + { 0x2CA2, 0x2CA3 }, + { 0x2CA4, 0x2CA5 }, + { 0x2CA6, 0x2CA7 }, + { 0x2CA8, 0x2CA9 }, + { 0x2CAA, 0x2CAB }, + { 0x2CAC, 0x2CAD }, + { 0x2CAE, 0x2CAF }, + { 0x2CB0, 0x2CB1 }, + { 0x2CB2, 0x2CB3 }, + { 0x2CB4, 0x2CB5 }, + { 0x2CB6, 0x2CB7 }, + { 0x2CB8, 0x2CB9 }, + { 0x2CBA, 0x2CBB }, + { 0x2CBC, 0x2CBD }, + { 0x2CBE, 0x2CBF }, + { 0x2CC0, 0x2CC1 }, + { 0x2CC2, 0x2CC3 }, + { 0x2CC4, 0x2CC5 }, + { 0x2CC6, 0x2CC7 }, + { 0x2CC8, 0x2CC9 }, + { 0x2CCA, 0x2CCB }, + { 0x2CCC, 0x2CCD }, + { 0x2CCE, 0x2CCF }, + { 0x2CD0, 0x2CD1 }, + { 0x2CD2, 0x2CD3 }, + { 0x2CD4, 0x2CD5 }, + { 0x2CD6, 0x2CD7 }, + { 0x2CD8, 0x2CD9 }, + { 0x2CDA, 0x2CDB }, + { 0x2CDC, 0x2CDD }, + { 0x2CDE, 0x2CDF }, + { 0x2CE0, 0x2CE1 }, + { 0x2CE2, 0x2CE3 }, + { 0xA640, 0xA641 }, + { 0xA642, 0xA643 }, + { 0xA644, 0xA645 }, + { 0xA646, 0xA647 }, + { 0xA648, 0xA649 }, + { 0xA64A, 0xA64B }, + { 0xA64C, 0xA64D }, + { 0xA64E, 0xA64F }, + { 0xA650, 0xA651 }, + { 0xA652, 0xA653 }, + { 0xA654, 0xA655 }, + { 0xA656, 0xA657 }, + { 0xA658, 0xA659 }, + { 0xA65A, 0xA65B }, + { 0xA65C, 0xA65D }, + { 0xA65E, 0xA65F }, + { 0xA662, 0xA663 }, + { 0xA664, 0xA665 }, + { 0xA666, 0xA667 }, + { 0xA668, 0xA669 }, + { 0xA66A, 0xA66B }, + { 0xA66C, 0xA66D }, + { 0xA680, 0xA681 }, + { 0xA682, 0xA683 }, + { 0xA684, 0xA685 }, + { 0xA686, 0xA687 }, + { 0xA688, 0xA689 }, + { 0xA68A, 0xA68B }, + { 0xA68C, 0xA68D }, + { 0xA68E, 0xA68F }, + { 0xA690, 0xA691 }, + { 0xA692, 0xA693 }, + { 0xA694, 0xA695 }, + { 0xA696, 0xA697 }, + { 0xA722, 0xA723 }, + { 0xA724, 0xA725 }, + { 0xA726, 0xA727 }, + { 0xA728, 0xA729 }, + { 0xA72A, 0xA72B }, + { 0xA72C, 0xA72D }, + { 0xA72E, 0xA72F }, + { 0xA732, 0xA733 }, + { 0xA734, 0xA735 }, + { 0xA736, 0xA737 }, + { 0xA738, 0xA739 }, + { 0xA73A, 0xA73B }, + { 0xA73C, 0xA73D }, + { 0xA73E, 0xA73F }, + { 0xA740, 0xA741 }, + { 0xA742, 0xA743 }, + { 0xA744, 0xA745 }, + { 0xA746, 0xA747 }, + { 0xA748, 0xA749 }, + { 0xA74A, 0xA74B }, + { 0xA74C, 0xA74D }, + { 0xA74E, 0xA74F }, + { 0xA750, 0xA751 }, + { 0xA752, 0xA753 }, + { 0xA754, 0xA755 }, + { 0xA756, 0xA757 }, + { 0xA758, 0xA759 }, + { 0xA75A, 0xA75B }, + { 0xA75C, 0xA75D }, + { 0xA75E, 0xA75F }, + { 0xA760, 0xA761 }, + { 0xA762, 0xA763 }, + { 0xA764, 0xA765 }, + { 0xA766, 0xA767 }, + { 0xA768, 0xA769 }, + { 0xA76A, 0xA76B }, + { 0xA76C, 0xA76D }, + { 0xA76E, 0xA76F }, + { 0xA779, 0xA77A }, + { 0xA77B, 0xA77C }, + { 0xA77D, 0x1D79 }, + { 0xA77E, 0xA77F }, + { 0xA780, 0xA781 }, + { 0xA782, 0xA783 }, + { 0xA784, 0xA785 }, + { 0xA786, 0xA787 }, + { 0xA78B, 0xA78C }, + { 0xFF21, 0xFF41 }, + { 0xFF22, 0xFF42 }, + { 0xFF23, 0xFF43 }, + { 0xFF24, 0xFF44 }, + { 0xFF25, 0xFF45 }, + { 0xFF26, 0xFF46 }, + { 0xFF27, 0xFF47 }, + { 0xFF28, 0xFF48 }, + { 0xFF29, 0xFF49 }, + { 0xFF2A, 0xFF4A }, + { 0xFF2B, 0xFF4B }, + { 0xFF2C, 0xFF4C }, + { 0xFF2D, 0xFF4D }, + { 0xFF2E, 0xFF4E }, + { 0xFF2F, 0xFF4F }, + { 0xFF30, 0xFF50 }, + { 0xFF31, 0xFF51 }, + { 0xFF32, 0xFF52 }, + { 0xFF33, 0xFF53 }, + { 0xFF34, 0xFF54 }, + { 0xFF35, 0xFF55 }, + { 0xFF36, 0xFF56 }, + { 0xFF37, 0xFF57 }, + { 0xFF38, 0xFF58 }, + { 0xFF39, 0xFF59 }, + { 0xFF3A, 0xFF5A }, + { 0x10400, 0x10428 }, + { 0x10401, 0x10429 }, + { 0x10402, 0x1042A }, + { 0x10403, 0x1042B }, + { 0x10404, 0x1042C }, + { 0x10405, 0x1042D }, + { 0x10406, 0x1042E }, + { 0x10407, 0x1042F }, + { 0x10408, 0x10430 }, + { 0x10409, 0x10431 }, + { 0x1040A, 0x10432 }, + { 0x1040B, 0x10433 }, + { 0x1040C, 0x10434 }, + { 0x1040D, 0x10435 }, + { 0x1040E, 0x10436 }, + { 0x1040F, 0x10437 }, + { 0x10410, 0x10438 }, + { 0x10411, 0x10439 }, + { 0x10412, 0x1043A }, + { 0x10413, 0x1043B }, + { 0x10414, 0x1043C }, + { 0x10415, 0x1043D }, + { 0x10416, 0x1043E }, + { 0x10417, 0x1043F }, + { 0x10418, 0x10440 }, + { 0x10419, 0x10441 }, + { 0x1041A, 0x10442 }, + { 0x1041B, 0x10443 }, + { 0x1041C, 0x10444 }, + { 0x1041D, 0x10445 }, + { 0x1041E, 0x10446 }, + { 0x1041F, 0x10447 }, + { 0x10420, 0x10448 }, + { 0x10421, 0x10449 }, + { 0x10422, 0x1044A }, + { 0x10423, 0x1044B }, + { 0x10424, 0x1044C }, + { 0x10425, 0x1044D }, + { 0x10426, 0x1044E }, + { 0x10427, 0x1044F } + +#define MAP(c) uc_tolower (c) +#include "test-mapping-part2.h" diff --git a/tests/unicase/test-uc_totitle.c b/tests/unicase/test-uc_totitle.c new file mode 100644 index 00000000..9064a41a --- /dev/null +++ b/tests/unicase/test-uc_totitle.c @@ -0,0 +1,1055 @@ +/* DO NOT EDIT! GENERATED AUTOMATICALLY! */ +/* Test the Unicode character mapping functions. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Generated automatically by gen-case.c for Unicode 5.1.0. */ + +#include "test-mapping-part1.h" + + { 0x0061, 0x0041 }, + { 0x0062, 0x0042 }, + { 0x0063, 0x0043 }, + { 0x0064, 0x0044 }, + { 0x0065, 0x0045 }, + { 0x0066, 0x0046 }, + { 0x0067, 0x0047 }, + { 0x0068, 0x0048 }, + { 0x0069, 0x0049 }, + { 0x006A, 0x004A }, + { 0x006B, 0x004B }, + { 0x006C, 0x004C }, + { 0x006D, 0x004D }, + { 0x006E, 0x004E }, + { 0x006F, 0x004F }, + { 0x0070, 0x0050 }, + { 0x0071, 0x0051 }, + { 0x0072, 0x0052 }, + { 0x0073, 0x0053 }, + { 0x0074, 0x0054 }, + { 0x0075, 0x0055 }, + { 0x0076, 0x0056 }, + { 0x0077, 0x0057 }, + { 0x0078, 0x0058 }, + { 0x0079, 0x0059 }, + { 0x007A, 0x005A }, + { 0x00B5, 0x039C }, + { 0x00E0, 0x00C0 }, + { 0x00E1, 0x00C1 }, + { 0x00E2, 0x00C2 }, + { 0x00E3, 0x00C3 }, + { 0x00E4, 0x00C4 }, + { 0x00E5, 0x00C5 }, + { 0x00E6, 0x00C6 }, + { 0x00E7, 0x00C7 }, + { 0x00E8, 0x00C8 }, + { 0x00E9, 0x00C9 }, + { 0x00EA, 0x00CA }, + { 0x00EB, 0x00CB }, + { 0x00EC, 0x00CC }, + { 0x00ED, 0x00CD }, + { 0x00EE, 0x00CE }, + { 0x00EF, 0x00CF }, + { 0x00F0, 0x00D0 }, + { 0x00F1, 0x00D1 }, + { 0x00F2, 0x00D2 }, + { 0x00F3, 0x00D3 }, + { 0x00F4, 0x00D4 }, + { 0x00F5, 0x00D5 }, + { 0x00F6, 0x00D6 }, + { 0x00F8, 0x00D8 }, + { 0x00F9, 0x00D9 }, + { 0x00FA, 0x00DA }, + { 0x00FB, 0x00DB }, + { 0x00FC, 0x00DC }, + { 0x00FD, 0x00DD }, + { 0x00FE, 0x00DE }, + { 0x00FF, 0x0178 }, + { 0x0101, 0x0100 }, + { 0x0103, 0x0102 }, + { 0x0105, 0x0104 }, + { 0x0107, 0x0106 }, + { 0x0109, 0x0108 }, + { 0x010B, 0x010A }, + { 0x010D, 0x010C }, + { 0x010F, 0x010E }, + { 0x0111, 0x0110 }, + { 0x0113, 0x0112 }, + { 0x0115, 0x0114 }, + { 0x0117, 0x0116 }, + { 0x0119, 0x0118 }, + { 0x011B, 0x011A }, + { 0x011D, 0x011C }, + { 0x011F, 0x011E }, + { 0x0121, 0x0120 }, + { 0x0123, 0x0122 }, + { 0x0125, 0x0124 }, + { 0x0127, 0x0126 }, + { 0x0129, 0x0128 }, + { 0x012B, 0x012A }, + { 0x012D, 0x012C }, + { 0x012F, 0x012E }, + { 0x0131, 0x0049 }, + { 0x0133, 0x0132 }, + { 0x0135, 0x0134 }, + { 0x0137, 0x0136 }, + { 0x013A, 0x0139 }, + { 0x013C, 0x013B }, + { 0x013E, 0x013D }, + { 0x0140, 0x013F }, + { 0x0142, 0x0141 }, + { 0x0144, 0x0143 }, + { 0x0146, 0x0145 }, + { 0x0148, 0x0147 }, + { 0x014B, 0x014A }, + { 0x014D, 0x014C }, + { 0x014F, 0x014E }, + { 0x0151, 0x0150 }, + { 0x0153, 0x0152 }, + { 0x0155, 0x0154 }, + { 0x0157, 0x0156 }, + { 0x0159, 0x0158 }, + { 0x015B, 0x015A }, + { 0x015D, 0x015C }, + { 0x015F, 0x015E }, + { 0x0161, 0x0160 }, + { 0x0163, 0x0162 }, + { 0x0165, 0x0164 }, + { 0x0167, 0x0166 }, + { 0x0169, 0x0168 }, + { 0x016B, 0x016A }, + { 0x016D, 0x016C }, + { 0x016F, 0x016E }, + { 0x0171, 0x0170 }, + { 0x0173, 0x0172 }, + { 0x0175, 0x0174 }, + { 0x0177, 0x0176 }, + { 0x017A, 0x0179 }, + { 0x017C, 0x017B }, + { 0x017E, 0x017D }, + { 0x017F, 0x0053 }, + { 0x0180, 0x0243 }, + { 0x0183, 0x0182 }, + { 0x0185, 0x0184 }, + { 0x0188, 0x0187 }, + { 0x018C, 0x018B }, + { 0x0192, 0x0191 }, + { 0x0195, 0x01F6 }, + { 0x0199, 0x0198 }, + { 0x019A, 0x023D }, + { 0x019E, 0x0220 }, + { 0x01A1, 0x01A0 }, + { 0x01A3, 0x01A2 }, + { 0x01A5, 0x01A4 }, + { 0x01A8, 0x01A7 }, + { 0x01AD, 0x01AC }, + { 0x01B0, 0x01AF }, + { 0x01B4, 0x01B3 }, + { 0x01B6, 0x01B5 }, + { 0x01B9, 0x01B8 }, + { 0x01BD, 0x01BC }, + { 0x01BF, 0x01F7 }, + { 0x01C4, 0x01C5 }, + { 0x01C6, 0x01C5 }, + { 0x01C7, 0x01C8 }, + { 0x01C9, 0x01C8 }, + { 0x01CA, 0x01CB }, + { 0x01CC, 0x01CB }, + { 0x01CE, 0x01CD }, + { 0x01D0, 0x01CF }, + { 0x01D2, 0x01D1 }, + { 0x01D4, 0x01D3 }, + { 0x01D6, 0x01D5 }, + { 0x01D8, 0x01D7 }, + { 0x01DA, 0x01D9 }, + { 0x01DC, 0x01DB }, + { 0x01DD, 0x018E }, + { 0x01DF, 0x01DE }, + { 0x01E1, 0x01E0 }, + { 0x01E3, 0x01E2 }, + { 0x01E5, 0x01E4 }, + { 0x01E7, 0x01E6 }, + { 0x01E9, 0x01E8 }, + { 0x01EB, 0x01EA }, + { 0x01ED, 0x01EC }, + { 0x01EF, 0x01EE }, + { 0x01F1, 0x01F2 }, + { 0x01F3, 0x01F2 }, + { 0x01F5, 0x01F4 }, + { 0x01F9, 0x01F8 }, + { 0x01FB, 0x01FA }, + { 0x01FD, 0x01FC }, + { 0x01FF, 0x01FE }, + { 0x0201, 0x0200 }, + { 0x0203, 0x0202 }, + { 0x0205, 0x0204 }, + { 0x0207, 0x0206 }, + { 0x0209, 0x0208 }, + { 0x020B, 0x020A }, + { 0x020D, 0x020C }, + { 0x020F, 0x020E }, + { 0x0211, 0x0210 }, + { 0x0213, 0x0212 }, + { 0x0215, 0x0214 }, + { 0x0217, 0x0216 }, + { 0x0219, 0x0218 }, + { 0x021B, 0x021A }, + { 0x021D, 0x021C }, + { 0x021F, 0x021E }, + { 0x0223, 0x0222 }, + { 0x0225, 0x0224 }, + { 0x0227, 0x0226 }, + { 0x0229, 0x0228 }, + { 0x022B, 0x022A }, + { 0x022D, 0x022C }, + { 0x022F, 0x022E }, + { 0x0231, 0x0230 }, + { 0x0233, 0x0232 }, + { 0x023C, 0x023B }, + { 0x0242, 0x0241 }, + { 0x0247, 0x0246 }, + { 0x0249, 0x0248 }, + { 0x024B, 0x024A }, + { 0x024D, 0x024C }, + { 0x024F, 0x024E }, + { 0x0250, 0x2C6F }, + { 0x0251, 0x2C6D }, + { 0x0253, 0x0181 }, + { 0x0254, 0x0186 }, + { 0x0256, 0x0189 }, + { 0x0257, 0x018A }, + { 0x0259, 0x018F }, + { 0x025B, 0x0190 }, + { 0x0260, 0x0193 }, + { 0x0263, 0x0194 }, + { 0x0268, 0x0197 }, + { 0x0269, 0x0196 }, + { 0x026B, 0x2C62 }, + { 0x026F, 0x019C }, + { 0x0271, 0x2C6E }, + { 0x0272, 0x019D }, + { 0x0275, 0x019F }, + { 0x027D, 0x2C64 }, + { 0x0280, 0x01A6 }, + { 0x0283, 0x01A9 }, + { 0x0288, 0x01AE }, + { 0x0289, 0x0244 }, + { 0x028A, 0x01B1 }, + { 0x028B, 0x01B2 }, + { 0x028C, 0x0245 }, + { 0x0292, 0x01B7 }, + { 0x0345, 0x0399 }, + { 0x0371, 0x0370 }, + { 0x0373, 0x0372 }, + { 0x0377, 0x0376 }, + { 0x037B, 0x03FD }, + { 0x037C, 0x03FE }, + { 0x037D, 0x03FF }, + { 0x03AC, 0x0386 }, + { 0x03AD, 0x0388 }, + { 0x03AE, 0x0389 }, + { 0x03AF, 0x038A }, + { 0x03B1, 0x0391 }, + { 0x03B2, 0x0392 }, + { 0x03B3, 0x0393 }, + { 0x03B4, 0x0394 }, + { 0x03B5, 0x0395 }, + { 0x03B6, 0x0396 }, + { 0x03B7, 0x0397 }, + { 0x03B8, 0x0398 }, + { 0x03B9, 0x0399 }, + { 0x03BA, 0x039A }, + { 0x03BB, 0x039B }, + { 0x03BC, 0x039C }, + { 0x03BD, 0x039D }, + { 0x03BE, 0x039E }, + { 0x03BF, 0x039F }, + { 0x03C0, 0x03A0 }, + { 0x03C1, 0x03A1 }, + { 0x03C2, 0x03A3 }, + { 0x03C3, 0x03A3 }, + { 0x03C4, 0x03A4 }, + { 0x03C5, 0x03A5 }, + { 0x03C6, 0x03A6 }, + { 0x03C7, 0x03A7 }, + { 0x03C8, 0x03A8 }, + { 0x03C9, 0x03A9 }, + { 0x03CA, 0x03AA }, + { 0x03CB, 0x03AB }, + { 0x03CC, 0x038C }, + { 0x03CD, 0x038E }, + { 0x03CE, 0x038F }, + { 0x03D0, 0x0392 }, + { 0x03D1, 0x0398 }, + { 0x03D5, 0x03A6 }, + { 0x03D6, 0x03A0 }, + { 0x03D7, 0x03CF }, + { 0x03D9, 0x03D8 }, + { 0x03DB, 0x03DA }, + { 0x03DD, 0x03DC }, + { 0x03DF, 0x03DE }, + { 0x03E1, 0x03E0 }, + { 0x03E3, 0x03E2 }, + { 0x03E5, 0x03E4 }, + { 0x03E7, 0x03E6 }, + { 0x03E9, 0x03E8 }, + { 0x03EB, 0x03EA }, + { 0x03ED, 0x03EC }, + { 0x03EF, 0x03EE }, + { 0x03F0, 0x039A }, + { 0x03F1, 0x03A1 }, + { 0x03F2, 0x03F9 }, + { 0x03F5, 0x0395 }, + { 0x03F8, 0x03F7 }, + { 0x03FB, 0x03FA }, + { 0x0430, 0x0410 }, + { 0x0431, 0x0411 }, + { 0x0432, 0x0412 }, + { 0x0433, 0x0413 }, + { 0x0434, 0x0414 }, + { 0x0435, 0x0415 }, + { 0x0436, 0x0416 }, + { 0x0437, 0x0417 }, + { 0x0438, 0x0418 }, + { 0x0439, 0x0419 }, + { 0x043A, 0x041A }, + { 0x043B, 0x041B }, + { 0x043C, 0x041C }, + { 0x043D, 0x041D }, + { 0x043E, 0x041E }, + { 0x043F, 0x041F }, + { 0x0440, 0x0420 }, + { 0x0441, 0x0421 }, + { 0x0442, 0x0422 }, + { 0x0443, 0x0423 }, + { 0x0444, 0x0424 }, + { 0x0445, 0x0425 }, + { 0x0446, 0x0426 }, + { 0x0447, 0x0427 }, + { 0x0448, 0x0428 }, + { 0x0449, 0x0429 }, + { 0x044A, 0x042A }, + { 0x044B, 0x042B }, + { 0x044C, 0x042C }, + { 0x044D, 0x042D }, + { 0x044E, 0x042E }, + { 0x044F, 0x042F }, + { 0x0450, 0x0400 }, + { 0x0451, 0x0401 }, + { 0x0452, 0x0402 }, + { 0x0453, 0x0403 }, + { 0x0454, 0x0404 }, + { 0x0455, 0x0405 }, + { 0x0456, 0x0406 }, + { 0x0457, 0x0407 }, + { 0x0458, 0x0408 }, + { 0x0459, 0x0409 }, + { 0x045A, 0x040A }, + { 0x045B, 0x040B }, + { 0x045C, 0x040C }, + { 0x045D, 0x040D }, + { 0x045E, 0x040E }, + { 0x045F, 0x040F }, + { 0x0461, 0x0460 }, + { 0x0463, 0x0462 }, + { 0x0465, 0x0464 }, + { 0x0467, 0x0466 }, + { 0x0469, 0x0468 }, + { 0x046B, 0x046A }, + { 0x046D, 0x046C }, + { 0x046F, 0x046E }, + { 0x0471, 0x0470 }, + { 0x0473, 0x0472 }, + { 0x0475, 0x0474 }, + { 0x0477, 0x0476 }, + { 0x0479, 0x0478 }, + { 0x047B, 0x047A }, + { 0x047D, 0x047C }, + { 0x047F, 0x047E }, + { 0x0481, 0x0480 }, + { 0x048B, 0x048A }, + { 0x048D, 0x048C }, + { 0x048F, 0x048E }, + { 0x0491, 0x0490 }, + { 0x0493, 0x0492 }, + { 0x0495, 0x0494 }, + { 0x0497, 0x0496 }, + { 0x0499, 0x0498 }, + { 0x049B, 0x049A }, + { 0x049D, 0x049C }, + { 0x049F, 0x049E }, + { 0x04A1, 0x04A0 }, + { 0x04A3, 0x04A2 }, + { 0x04A5, 0x04A4 }, + { 0x04A7, 0x04A6 }, + { 0x04A9, 0x04A8 }, + { 0x04AB, 0x04AA }, + { 0x04AD, 0x04AC }, + { 0x04AF, 0x04AE }, + { 0x04B1, 0x04B0 }, + { 0x04B3, 0x04B2 }, + { 0x04B5, 0x04B4 }, + { 0x04B7, 0x04B6 }, + { 0x04B9, 0x04B8 }, + { 0x04BB, 0x04BA }, + { 0x04BD, 0x04BC }, + { 0x04BF, 0x04BE }, + { 0x04C2, 0x04C1 }, + { 0x04C4, 0x04C3 }, + { 0x04C6, 0x04C5 }, + { 0x04C8, 0x04C7 }, + { 0x04CA, 0x04C9 }, + { 0x04CC, 0x04CB }, + { 0x04CE, 0x04CD }, + { 0x04CF, 0x04C0 }, + { 0x04D1, 0x04D0 }, + { 0x04D3, 0x04D2 }, + { 0x04D5, 0x04D4 }, + { 0x04D7, 0x04D6 }, + { 0x04D9, 0x04D8 }, + { 0x04DB, 0x04DA }, + { 0x04DD, 0x04DC }, + { 0x04DF, 0x04DE }, + { 0x04E1, 0x04E0 }, + { 0x04E3, 0x04E2 }, + { 0x04E5, 0x04E4 }, + { 0x04E7, 0x04E6 }, + { 0x04E9, 0x04E8 }, + { 0x04EB, 0x04EA }, + { 0x04ED, 0x04EC }, + { 0x04EF, 0x04EE }, + { 0x04F1, 0x04F0 }, + { 0x04F3, 0x04F2 }, + { 0x04F5, 0x04F4 }, + { 0x04F7, 0x04F6 }, + { 0x04F9, 0x04F8 }, + { 0x04FB, 0x04FA }, + { 0x04FD, 0x04FC }, + { 0x04FF, 0x04FE }, + { 0x0501, 0x0500 }, + { 0x0503, 0x0502 }, + { 0x0505, 0x0504 }, + { 0x0507, 0x0506 }, + { 0x0509, 0x0508 }, + { 0x050B, 0x050A }, + { 0x050D, 0x050C }, + { 0x050F, 0x050E }, + { 0x0511, 0x0510 }, + { 0x0513, 0x0512 }, + { 0x0515, 0x0514 }, + { 0x0517, 0x0516 }, + { 0x0519, 0x0518 }, + { 0x051B, 0x051A }, + { 0x051D, 0x051C }, + { 0x051F, 0x051E }, + { 0x0521, 0x0520 }, + { 0x0523, 0x0522 }, + { 0x0561, 0x0531 }, + { 0x0562, 0x0532 }, + { 0x0563, 0x0533 }, + { 0x0564, 0x0534 }, + { 0x0565, 0x0535 }, + { 0x0566, 0x0536 }, + { 0x0567, 0x0537 }, + { 0x0568, 0x0538 }, + { 0x0569, 0x0539 }, + { 0x056A, 0x053A }, + { 0x056B, 0x053B }, + { 0x056C, 0x053C }, + { 0x056D, 0x053D }, + { 0x056E, 0x053E }, + { 0x056F, 0x053F }, + { 0x0570, 0x0540 }, + { 0x0571, 0x0541 }, + { 0x0572, 0x0542 }, + { 0x0573, 0x0543 }, + { 0x0574, 0x0544 }, + { 0x0575, 0x0545 }, + { 0x0576, 0x0546 }, + { 0x0577, 0x0547 }, + { 0x0578, 0x0548 }, + { 0x0579, 0x0549 }, + { 0x057A, 0x054A }, + { 0x057B, 0x054B }, + { 0x057C, 0x054C }, + { 0x057D, 0x054D }, + { 0x057E, 0x054E }, + { 0x057F, 0x054F }, + { 0x0580, 0x0550 }, + { 0x0581, 0x0551 }, + { 0x0582, 0x0552 }, + { 0x0583, 0x0553 }, + { 0x0584, 0x0554 }, + { 0x0585, 0x0555 }, + { 0x0586, 0x0556 }, + { 0x1D79, 0xA77D }, + { 0x1D7D, 0x2C63 }, + { 0x1E01, 0x1E00 }, + { 0x1E03, 0x1E02 }, + { 0x1E05, 0x1E04 }, + { 0x1E07, 0x1E06 }, + { 0x1E09, 0x1E08 }, + { 0x1E0B, 0x1E0A }, + { 0x1E0D, 0x1E0C }, + { 0x1E0F, 0x1E0E }, + { 0x1E11, 0x1E10 }, + { 0x1E13, 0x1E12 }, + { 0x1E15, 0x1E14 }, + { 0x1E17, 0x1E16 }, + { 0x1E19, 0x1E18 }, + { 0x1E1B, 0x1E1A }, + { 0x1E1D, 0x1E1C }, + { 0x1E1F, 0x1E1E }, + { 0x1E21, 0x1E20 }, + { 0x1E23, 0x1E22 }, + { 0x1E25, 0x1E24 }, + { 0x1E27, 0x1E26 }, + { 0x1E29, 0x1E28 }, + { 0x1E2B, 0x1E2A }, + { 0x1E2D, 0x1E2C }, + { 0x1E2F, 0x1E2E }, + { 0x1E31, 0x1E30 }, + { 0x1E33, 0x1E32 }, + { 0x1E35, 0x1E34 }, + { 0x1E37, 0x1E36 }, + { 0x1E39, 0x1E38 }, + { 0x1E3B, 0x1E3A }, + { 0x1E3D, 0x1E3C }, + { 0x1E3F, 0x1E3E }, + { 0x1E41, 0x1E40 }, + { 0x1E43, 0x1E42 }, + { 0x1E45, 0x1E44 }, + { 0x1E47, 0x1E46 }, + { 0x1E49, 0x1E48 }, + { 0x1E4B, 0x1E4A }, + { 0x1E4D, 0x1E4C }, + { 0x1E4F, 0x1E4E }, + { 0x1E51, 0x1E50 }, + { 0x1E53, 0x1E52 }, + { 0x1E55, 0x1E54 }, + { 0x1E57, 0x1E56 }, + { 0x1E59, 0x1E58 }, + { 0x1E5B, 0x1E5A }, + { 0x1E5D, 0x1E5C }, + { 0x1E5F, 0x1E5E }, + { 0x1E61, 0x1E60 }, + { 0x1E63, 0x1E62 }, + { 0x1E65, 0x1E64 }, + { 0x1E67, 0x1E66 }, + { 0x1E69, 0x1E68 }, + { 0x1E6B, 0x1E6A }, + { 0x1E6D, 0x1E6C }, + { 0x1E6F, 0x1E6E }, + { 0x1E71, 0x1E70 }, + { 0x1E73, 0x1E72 }, + { 0x1E75, 0x1E74 }, + { 0x1E77, 0x1E76 }, + { 0x1E79, 0x1E78 }, + { 0x1E7B, 0x1E7A }, + { 0x1E7D, 0x1E7C }, + { 0x1E7F, 0x1E7E }, + { 0x1E81, 0x1E80 }, + { 0x1E83, 0x1E82 }, + { 0x1E85, 0x1E84 }, + { 0x1E87, 0x1E86 }, + { 0x1E89, 0x1E88 }, + { 0x1E8B, 0x1E8A }, + { 0x1E8D, 0x1E8C }, + { 0x1E8F, 0x1E8E }, + { 0x1E91, 0x1E90 }, + { 0x1E93, 0x1E92 }, + { 0x1E95, 0x1E94 }, + { 0x1E9B, 0x1E60 }, + { 0x1EA1, 0x1EA0 }, + { 0x1EA3, 0x1EA2 }, + { 0x1EA5, 0x1EA4 }, + { 0x1EA7, 0x1EA6 }, + { 0x1EA9, 0x1EA8 }, + { 0x1EAB, 0x1EAA }, + { 0x1EAD, 0x1EAC }, + { 0x1EAF, 0x1EAE }, + { 0x1EB1, 0x1EB0 }, + { 0x1EB3, 0x1EB2 }, + { 0x1EB5, 0x1EB4 }, + { 0x1EB7, 0x1EB6 }, + { 0x1EB9, 0x1EB8 }, + { 0x1EBB, 0x1EBA }, + { 0x1EBD, 0x1EBC }, + { 0x1EBF, 0x1EBE }, + { 0x1EC1, 0x1EC0 }, + { 0x1EC3, 0x1EC2 }, + { 0x1EC5, 0x1EC4 }, + { 0x1EC7, 0x1EC6 }, + { 0x1EC9, 0x1EC8 }, + { 0x1ECB, 0x1ECA }, + { 0x1ECD, 0x1ECC }, + { 0x1ECF, 0x1ECE }, + { 0x1ED1, 0x1ED0 }, + { 0x1ED3, 0x1ED2 }, + { 0x1ED5, 0x1ED4 }, + { 0x1ED7, 0x1ED6 }, + { 0x1ED9, 0x1ED8 }, + { 0x1EDB, 0x1EDA }, + { 0x1EDD, 0x1EDC }, + { 0x1EDF, 0x1EDE }, + { 0x1EE1, 0x1EE0 }, + { 0x1EE3, 0x1EE2 }, + { 0x1EE5, 0x1EE4 }, + { 0x1EE7, 0x1EE6 }, + { 0x1EE9, 0x1EE8 }, + { 0x1EEB, 0x1EEA }, + { 0x1EED, 0x1EEC }, + { 0x1EEF, 0x1EEE }, + { 0x1EF1, 0x1EF0 }, + { 0x1EF3, 0x1EF2 }, + { 0x1EF5, 0x1EF4 }, + { 0x1EF7, 0x1EF6 }, + { 0x1EF9, 0x1EF8 }, + { 0x1EFB, 0x1EFA }, + { 0x1EFD, 0x1EFC }, + { 0x1EFF, 0x1EFE }, + { 0x1F00, 0x1F08 }, + { 0x1F01, 0x1F09 }, + { 0x1F02, 0x1F0A }, + { 0x1F03, 0x1F0B }, + { 0x1F04, 0x1F0C }, + { 0x1F05, 0x1F0D }, + { 0x1F06, 0x1F0E }, + { 0x1F07, 0x1F0F }, + { 0x1F10, 0x1F18 }, + { 0x1F11, 0x1F19 }, + { 0x1F12, 0x1F1A }, + { 0x1F13, 0x1F1B }, + { 0x1F14, 0x1F1C }, + { 0x1F15, 0x1F1D }, + { 0x1F20, 0x1F28 }, + { 0x1F21, 0x1F29 }, + { 0x1F22, 0x1F2A }, + { 0x1F23, 0x1F2B }, + { 0x1F24, 0x1F2C }, + { 0x1F25, 0x1F2D }, + { 0x1F26, 0x1F2E }, + { 0x1F27, 0x1F2F }, + { 0x1F30, 0x1F38 }, + { 0x1F31, 0x1F39 }, + { 0x1F32, 0x1F3A }, + { 0x1F33, 0x1F3B }, + { 0x1F34, 0x1F3C }, + { 0x1F35, 0x1F3D }, + { 0x1F36, 0x1F3E }, + { 0x1F37, 0x1F3F }, + { 0x1F40, 0x1F48 }, + { 0x1F41, 0x1F49 }, + { 0x1F42, 0x1F4A }, + { 0x1F43, 0x1F4B }, + { 0x1F44, 0x1F4C }, + { 0x1F45, 0x1F4D }, + { 0x1F51, 0x1F59 }, + { 0x1F53, 0x1F5B }, + { 0x1F55, 0x1F5D }, + { 0x1F57, 0x1F5F }, + { 0x1F60, 0x1F68 }, + { 0x1F61, 0x1F69 }, + { 0x1F62, 0x1F6A }, + { 0x1F63, 0x1F6B }, + { 0x1F64, 0x1F6C }, + { 0x1F65, 0x1F6D }, + { 0x1F66, 0x1F6E }, + { 0x1F67, 0x1F6F }, + { 0x1F70, 0x1FBA }, + { 0x1F71, 0x1FBB }, + { 0x1F72, 0x1FC8 }, + { 0x1F73, 0x1FC9 }, + { 0x1F74, 0x1FCA }, + { 0x1F75, 0x1FCB }, + { 0x1F76, 0x1FDA }, + { 0x1F77, 0x1FDB }, + { 0x1F78, 0x1FF8 }, + { 0x1F79, 0x1FF9 }, + { 0x1F7A, 0x1FEA }, + { 0x1F7B, 0x1FEB }, + { 0x1F7C, 0x1FFA }, + { 0x1F7D, 0x1FFB }, + { 0x1F80, 0x1F88 }, + { 0x1F81, 0x1F89 }, + { 0x1F82, 0x1F8A }, + { 0x1F83, 0x1F8B }, + { 0x1F84, 0x1F8C }, + { 0x1F85, 0x1F8D }, + { 0x1F86, 0x1F8E }, + { 0x1F87, 0x1F8F }, + { 0x1F90, 0x1F98 }, + { 0x1F91, 0x1F99 }, + { 0x1F92, 0x1F9A }, + { 0x1F93, 0x1F9B }, + { 0x1F94, 0x1F9C }, + { 0x1F95, 0x1F9D }, + { 0x1F96, 0x1F9E }, + { 0x1F97, 0x1F9F }, + { 0x1FA0, 0x1FA8 }, + { 0x1FA1, 0x1FA9 }, + { 0x1FA2, 0x1FAA }, + { 0x1FA3, 0x1FAB }, + { 0x1FA4, 0x1FAC }, + { 0x1FA5, 0x1FAD }, + { 0x1FA6, 0x1FAE }, + { 0x1FA7, 0x1FAF }, + { 0x1FB0, 0x1FB8 }, + { 0x1FB1, 0x1FB9 }, + { 0x1FB3, 0x1FBC }, + { 0x1FBE, 0x0399 }, + { 0x1FC3, 0x1FCC }, + { 0x1FD0, 0x1FD8 }, + { 0x1FD1, 0x1FD9 }, + { 0x1FE0, 0x1FE8 }, + { 0x1FE1, 0x1FE9 }, + { 0x1FE5, 0x1FEC }, + { 0x1FF3, 0x1FFC }, + { 0x214E, 0x2132 }, + { 0x2170, 0x2160 }, + { 0x2171, 0x2161 }, + { 0x2172, 0x2162 }, + { 0x2173, 0x2163 }, + { 0x2174, 0x2164 }, + { 0x2175, 0x2165 }, + { 0x2176, 0x2166 }, + { 0x2177, 0x2167 }, + { 0x2178, 0x2168 }, + { 0x2179, 0x2169 }, + { 0x217A, 0x216A }, + { 0x217B, 0x216B }, + { 0x217C, 0x216C }, + { 0x217D, 0x216D }, + { 0x217E, 0x216E }, + { 0x217F, 0x216F }, + { 0x2184, 0x2183 }, + { 0x24D0, 0x24B6 }, + { 0x24D1, 0x24B7 }, + { 0x24D2, 0x24B8 }, + { 0x24D3, 0x24B9 }, + { 0x24D4, 0x24BA }, + { 0x24D5, 0x24BB }, + { 0x24D6, 0x24BC }, + { 0x24D7, 0x24BD }, + { 0x24D8, 0x24BE }, + { 0x24D9, 0x24BF }, + { 0x24DA, 0x24C0 }, + { 0x24DB, 0x24C1 }, + { 0x24DC, 0x24C2 }, + { 0x24DD, 0x24C3 }, + { 0x24DE, 0x24C4 }, + { 0x24DF, 0x24C5 }, + { 0x24E0, 0x24C6 }, + { 0x24E1, 0x24C7 }, + { 0x24E2, 0x24C8 }, + { 0x24E3, 0x24C9 }, + { 0x24E4, 0x24CA }, + { 0x24E5, 0x24CB }, + { 0x24E6, 0x24CC }, + { 0x24E7, 0x24CD }, + { 0x24E8, 0x24CE }, + { 0x24E9, 0x24CF }, + { 0x2C30, 0x2C00 }, + { 0x2C31, 0x2C01 }, + { 0x2C32, 0x2C02 }, + { 0x2C33, 0x2C03 }, + { 0x2C34, 0x2C04 }, + { 0x2C35, 0x2C05 }, + { 0x2C36, 0x2C06 }, + { 0x2C37, 0x2C07 }, + { 0x2C38, 0x2C08 }, + { 0x2C39, 0x2C09 }, + { 0x2C3A, 0x2C0A }, + { 0x2C3B, 0x2C0B }, + { 0x2C3C, 0x2C0C }, + { 0x2C3D, 0x2C0D }, + { 0x2C3E, 0x2C0E }, + { 0x2C3F, 0x2C0F }, + { 0x2C40, 0x2C10 }, + { 0x2C41, 0x2C11 }, + { 0x2C42, 0x2C12 }, + { 0x2C43, 0x2C13 }, + { 0x2C44, 0x2C14 }, + { 0x2C45, 0x2C15 }, + { 0x2C46, 0x2C16 }, + { 0x2C47, 0x2C17 }, + { 0x2C48, 0x2C18 }, + { 0x2C49, 0x2C19 }, + { 0x2C4A, 0x2C1A }, + { 0x2C4B, 0x2C1B }, + { 0x2C4C, 0x2C1C }, + { 0x2C4D, 0x2C1D }, + { 0x2C4E, 0x2C1E }, + { 0x2C4F, 0x2C1F }, + { 0x2C50, 0x2C20 }, + { 0x2C51, 0x2C21 }, + { 0x2C52, 0x2C22 }, + { 0x2C53, 0x2C23 }, + { 0x2C54, 0x2C24 }, + { 0x2C55, 0x2C25 }, + { 0x2C56, 0x2C26 }, + { 0x2C57, 0x2C27 }, + { 0x2C58, 0x2C28 }, + { 0x2C59, 0x2C29 }, + { 0x2C5A, 0x2C2A }, + { 0x2C5B, 0x2C2B }, + { 0x2C5C, 0x2C2C }, + { 0x2C5D, 0x2C2D }, + { 0x2C5E, 0x2C2E }, + { 0x2C61, 0x2C60 }, + { 0x2C65, 0x023A }, + { 0x2C66, 0x023E }, + { 0x2C68, 0x2C67 }, + { 0x2C6A, 0x2C69 }, + { 0x2C6C, 0x2C6B }, + { 0x2C73, 0x2C72 }, + { 0x2C76, 0x2C75 }, + { 0x2C81, 0x2C80 }, + { 0x2C83, 0x2C82 }, + { 0x2C85, 0x2C84 }, + { 0x2C87, 0x2C86 }, + { 0x2C89, 0x2C88 }, + { 0x2C8B, 0x2C8A }, + { 0x2C8D, 0x2C8C }, + { 0x2C8F, 0x2C8E }, + { 0x2C91, 0x2C90 }, + { 0x2C93, 0x2C92 }, + { 0x2C95, 0x2C94 }, + { 0x2C97, 0x2C96 }, + { 0x2C99, 0x2C98 }, + { 0x2C9B, 0x2C9A }, + { 0x2C9D, 0x2C9C }, + { 0x2C9F, 0x2C9E }, + { 0x2CA1, 0x2CA0 }, + { 0x2CA3, 0x2CA2 }, + { 0x2CA5, 0x2CA4 }, + { 0x2CA7, 0x2CA6 }, + { 0x2CA9, 0x2CA8 }, + { 0x2CAB, 0x2CAA }, + { 0x2CAD, 0x2CAC }, + { 0x2CAF, 0x2CAE }, + { 0x2CB1, 0x2CB0 }, + { 0x2CB3, 0x2CB2 }, + { 0x2CB5, 0x2CB4 }, + { 0x2CB7, 0x2CB6 }, + { 0x2CB9, 0x2CB8 }, + { 0x2CBB, 0x2CBA }, + { 0x2CBD, 0x2CBC }, + { 0x2CBF, 0x2CBE }, + { 0x2CC1, 0x2CC0 }, + { 0x2CC3, 0x2CC2 }, + { 0x2CC5, 0x2CC4 }, + { 0x2CC7, 0x2CC6 }, + { 0x2CC9, 0x2CC8 }, + { 0x2CCB, 0x2CCA }, + { 0x2CCD, 0x2CCC }, + { 0x2CCF, 0x2CCE }, + { 0x2CD1, 0x2CD0 }, + { 0x2CD3, 0x2CD2 }, + { 0x2CD5, 0x2CD4 }, + { 0x2CD7, 0x2CD6 }, + { 0x2CD9, 0x2CD8 }, + { 0x2CDB, 0x2CDA }, + { 0x2CDD, 0x2CDC }, + { 0x2CDF, 0x2CDE }, + { 0x2CE1, 0x2CE0 }, + { 0x2CE3, 0x2CE2 }, + { 0x2D00, 0x10A0 }, + { 0x2D01, 0x10A1 }, + { 0x2D02, 0x10A2 }, + { 0x2D03, 0x10A3 }, + { 0x2D04, 0x10A4 }, + { 0x2D05, 0x10A5 }, + { 0x2D06, 0x10A6 }, + { 0x2D07, 0x10A7 }, + { 0x2D08, 0x10A8 }, + { 0x2D09, 0x10A9 }, + { 0x2D0A, 0x10AA }, + { 0x2D0B, 0x10AB }, + { 0x2D0C, 0x10AC }, + { 0x2D0D, 0x10AD }, + { 0x2D0E, 0x10AE }, + { 0x2D0F, 0x10AF }, + { 0x2D10, 0x10B0 }, + { 0x2D11, 0x10B1 }, + { 0x2D12, 0x10B2 }, + { 0x2D13, 0x10B3 }, + { 0x2D14, 0x10B4 }, + { 0x2D15, 0x10B5 }, + { 0x2D16, 0x10B6 }, + { 0x2D17, 0x10B7 }, + { 0x2D18, 0x10B8 }, + { 0x2D19, 0x10B9 }, + { 0x2D1A, 0x10BA }, + { 0x2D1B, 0x10BB }, + { 0x2D1C, 0x10BC }, + { 0x2D1D, 0x10BD }, + { 0x2D1E, 0x10BE }, + { 0x2D1F, 0x10BF }, + { 0x2D20, 0x10C0 }, + { 0x2D21, 0x10C1 }, + { 0x2D22, 0x10C2 }, + { 0x2D23, 0x10C3 }, + { 0x2D24, 0x10C4 }, + { 0x2D25, 0x10C5 }, + { 0xA641, 0xA640 }, + { 0xA643, 0xA642 }, + { 0xA645, 0xA644 }, + { 0xA647, 0xA646 }, + { 0xA649, 0xA648 }, + { 0xA64B, 0xA64A }, + { 0xA64D, 0xA64C }, + { 0xA64F, 0xA64E }, + { 0xA651, 0xA650 }, + { 0xA653, 0xA652 }, + { 0xA655, 0xA654 }, + { 0xA657, 0xA656 }, + { 0xA659, 0xA658 }, + { 0xA65B, 0xA65A }, + { 0xA65D, 0xA65C }, + { 0xA65F, 0xA65E }, + { 0xA663, 0xA662 }, + { 0xA665, 0xA664 }, + { 0xA667, 0xA666 }, + { 0xA669, 0xA668 }, + { 0xA66B, 0xA66A }, + { 0xA66D, 0xA66C }, + { 0xA681, 0xA680 }, + { 0xA683, 0xA682 }, + { 0xA685, 0xA684 }, + { 0xA687, 0xA686 }, + { 0xA689, 0xA688 }, + { 0xA68B, 0xA68A }, + { 0xA68D, 0xA68C }, + { 0xA68F, 0xA68E }, + { 0xA691, 0xA690 }, + { 0xA693, 0xA692 }, + { 0xA695, 0xA694 }, + { 0xA697, 0xA696 }, + { 0xA723, 0xA722 }, + { 0xA725, 0xA724 }, + { 0xA727, 0xA726 }, + { 0xA729, 0xA728 }, + { 0xA72B, 0xA72A }, + { 0xA72D, 0xA72C }, + { 0xA72F, 0xA72E }, + { 0xA733, 0xA732 }, + { 0xA735, 0xA734 }, + { 0xA737, 0xA736 }, + { 0xA739, 0xA738 }, + { 0xA73B, 0xA73A }, + { 0xA73D, 0xA73C }, + { 0xA73F, 0xA73E }, + { 0xA741, 0xA740 }, + { 0xA743, 0xA742 }, + { 0xA745, 0xA744 }, + { 0xA747, 0xA746 }, + { 0xA749, 0xA748 }, + { 0xA74B, 0xA74A }, + { 0xA74D, 0xA74C }, + { 0xA74F, 0xA74E }, + { 0xA751, 0xA750 }, + { 0xA753, 0xA752 }, + { 0xA755, 0xA754 }, + { 0xA757, 0xA756 }, + { 0xA759, 0xA758 }, + { 0xA75B, 0xA75A }, + { 0xA75D, 0xA75C }, + { 0xA75F, 0xA75E }, + { 0xA761, 0xA760 }, + { 0xA763, 0xA762 }, + { 0xA765, 0xA764 }, + { 0xA767, 0xA766 }, + { 0xA769, 0xA768 }, + { 0xA76B, 0xA76A }, + { 0xA76D, 0xA76C }, + { 0xA76F, 0xA76E }, + { 0xA77A, 0xA779 }, + { 0xA77C, 0xA77B }, + { 0xA77F, 0xA77E }, + { 0xA781, 0xA780 }, + { 0xA783, 0xA782 }, + { 0xA785, 0xA784 }, + { 0xA787, 0xA786 }, + { 0xA78C, 0xA78B }, + { 0xFF41, 0xFF21 }, + { 0xFF42, 0xFF22 }, + { 0xFF43, 0xFF23 }, + { 0xFF44, 0xFF24 }, + { 0xFF45, 0xFF25 }, + { 0xFF46, 0xFF26 }, + { 0xFF47, 0xFF27 }, + { 0xFF48, 0xFF28 }, + { 0xFF49, 0xFF29 }, + { 0xFF4A, 0xFF2A }, + { 0xFF4B, 0xFF2B }, + { 0xFF4C, 0xFF2C }, + { 0xFF4D, 0xFF2D }, + { 0xFF4E, 0xFF2E }, + { 0xFF4F, 0xFF2F }, + { 0xFF50, 0xFF30 }, + { 0xFF51, 0xFF31 }, + { 0xFF52, 0xFF32 }, + { 0xFF53, 0xFF33 }, + { 0xFF54, 0xFF34 }, + { 0xFF55, 0xFF35 }, + { 0xFF56, 0xFF36 }, + { 0xFF57, 0xFF37 }, + { 0xFF58, 0xFF38 }, + { 0xFF59, 0xFF39 }, + { 0xFF5A, 0xFF3A }, + { 0x10428, 0x10400 }, + { 0x10429, 0x10401 }, + { 0x1042A, 0x10402 }, + { 0x1042B, 0x10403 }, + { 0x1042C, 0x10404 }, + { 0x1042D, 0x10405 }, + { 0x1042E, 0x10406 }, + { 0x1042F, 0x10407 }, + { 0x10430, 0x10408 }, + { 0x10431, 0x10409 }, + { 0x10432, 0x1040A }, + { 0x10433, 0x1040B }, + { 0x10434, 0x1040C }, + { 0x10435, 0x1040D }, + { 0x10436, 0x1040E }, + { 0x10437, 0x1040F }, + { 0x10438, 0x10410 }, + { 0x10439, 0x10411 }, + { 0x1043A, 0x10412 }, + { 0x1043B, 0x10413 }, + { 0x1043C, 0x10414 }, + { 0x1043D, 0x10415 }, + { 0x1043E, 0x10416 }, + { 0x1043F, 0x10417 }, + { 0x10440, 0x10418 }, + { 0x10441, 0x10419 }, + { 0x10442, 0x1041A }, + { 0x10443, 0x1041B }, + { 0x10444, 0x1041C }, + { 0x10445, 0x1041D }, + { 0x10446, 0x1041E }, + { 0x10447, 0x1041F }, + { 0x10448, 0x10420 }, + { 0x10449, 0x10421 }, + { 0x1044A, 0x10422 }, + { 0x1044B, 0x10423 }, + { 0x1044C, 0x10424 }, + { 0x1044D, 0x10425 }, + { 0x1044E, 0x10426 }, + { 0x1044F, 0x10427 } + +#define MAP(c) uc_totitle (c) +#include "test-mapping-part2.h" diff --git a/tests/unicase/test-uc_toupper.c b/tests/unicase/test-uc_toupper.c new file mode 100644 index 00000000..ed242a07 --- /dev/null +++ b/tests/unicase/test-uc_toupper.c @@ -0,0 +1,1055 @@ +/* DO NOT EDIT! GENERATED AUTOMATICALLY! */ +/* Test the Unicode character mapping functions. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Generated automatically by gen-case.c for Unicode 5.1.0. */ + +#include "test-mapping-part1.h" + + { 0x0061, 0x0041 }, + { 0x0062, 0x0042 }, + { 0x0063, 0x0043 }, + { 0x0064, 0x0044 }, + { 0x0065, 0x0045 }, + { 0x0066, 0x0046 }, + { 0x0067, 0x0047 }, + { 0x0068, 0x0048 }, + { 0x0069, 0x0049 }, + { 0x006A, 0x004A }, + { 0x006B, 0x004B }, + { 0x006C, 0x004C }, + { 0x006D, 0x004D }, + { 0x006E, 0x004E }, + { 0x006F, 0x004F }, + { 0x0070, 0x0050 }, + { 0x0071, 0x0051 }, + { 0x0072, 0x0052 }, + { 0x0073, 0x0053 }, + { 0x0074, 0x0054 }, + { 0x0075, 0x0055 }, + { 0x0076, 0x0056 }, + { 0x0077, 0x0057 }, + { 0x0078, 0x0058 }, + { 0x0079, 0x0059 }, + { 0x007A, 0x005A }, + { 0x00B5, 0x039C }, + { 0x00E0, 0x00C0 }, + { 0x00E1, 0x00C1 }, + { 0x00E2, 0x00C2 }, + { 0x00E3, 0x00C3 }, + { 0x00E4, 0x00C4 }, + { 0x00E5, 0x00C5 }, + { 0x00E6, 0x00C6 }, + { 0x00E7, 0x00C7 }, + { 0x00E8, 0x00C8 }, + { 0x00E9, 0x00C9 }, + { 0x00EA, 0x00CA }, + { 0x00EB, 0x00CB }, + { 0x00EC, 0x00CC }, + { 0x00ED, 0x00CD }, + { 0x00EE, 0x00CE }, + { 0x00EF, 0x00CF }, + { 0x00F0, 0x00D0 }, + { 0x00F1, 0x00D1 }, + { 0x00F2, 0x00D2 }, + { 0x00F3, 0x00D3 }, + { 0x00F4, 0x00D4 }, + { 0x00F5, 0x00D5 }, + { 0x00F6, 0x00D6 }, + { 0x00F8, 0x00D8 }, + { 0x00F9, 0x00D9 }, + { 0x00FA, 0x00DA }, + { 0x00FB, 0x00DB }, + { 0x00FC, 0x00DC }, + { 0x00FD, 0x00DD }, + { 0x00FE, 0x00DE }, + { 0x00FF, 0x0178 }, + { 0x0101, 0x0100 }, + { 0x0103, 0x0102 }, + { 0x0105, 0x0104 }, + { 0x0107, 0x0106 }, + { 0x0109, 0x0108 }, + { 0x010B, 0x010A }, + { 0x010D, 0x010C }, + { 0x010F, 0x010E }, + { 0x0111, 0x0110 }, + { 0x0113, 0x0112 }, + { 0x0115, 0x0114 }, + { 0x0117, 0x0116 }, + { 0x0119, 0x0118 }, + { 0x011B, 0x011A }, + { 0x011D, 0x011C }, + { 0x011F, 0x011E }, + { 0x0121, 0x0120 }, + { 0x0123, 0x0122 }, + { 0x0125, 0x0124 }, + { 0x0127, 0x0126 }, + { 0x0129, 0x0128 }, + { 0x012B, 0x012A }, + { 0x012D, 0x012C }, + { 0x012F, 0x012E }, + { 0x0131, 0x0049 }, + { 0x0133, 0x0132 }, + { 0x0135, 0x0134 }, + { 0x0137, 0x0136 }, + { 0x013A, 0x0139 }, + { 0x013C, 0x013B }, + { 0x013E, 0x013D }, + { 0x0140, 0x013F }, + { 0x0142, 0x0141 }, + { 0x0144, 0x0143 }, + { 0x0146, 0x0145 }, + { 0x0148, 0x0147 }, + { 0x014B, 0x014A }, + { 0x014D, 0x014C }, + { 0x014F, 0x014E }, + { 0x0151, 0x0150 }, + { 0x0153, 0x0152 }, + { 0x0155, 0x0154 }, + { 0x0157, 0x0156 }, + { 0x0159, 0x0158 }, + { 0x015B, 0x015A }, + { 0x015D, 0x015C }, + { 0x015F, 0x015E }, + { 0x0161, 0x0160 }, + { 0x0163, 0x0162 }, + { 0x0165, 0x0164 }, + { 0x0167, 0x0166 }, + { 0x0169, 0x0168 }, + { 0x016B, 0x016A }, + { 0x016D, 0x016C }, + { 0x016F, 0x016E }, + { 0x0171, 0x0170 }, + { 0x0173, 0x0172 }, + { 0x0175, 0x0174 }, + { 0x0177, 0x0176 }, + { 0x017A, 0x0179 }, + { 0x017C, 0x017B }, + { 0x017E, 0x017D }, + { 0x017F, 0x0053 }, + { 0x0180, 0x0243 }, + { 0x0183, 0x0182 }, + { 0x0185, 0x0184 }, + { 0x0188, 0x0187 }, + { 0x018C, 0x018B }, + { 0x0192, 0x0191 }, + { 0x0195, 0x01F6 }, + { 0x0199, 0x0198 }, + { 0x019A, 0x023D }, + { 0x019E, 0x0220 }, + { 0x01A1, 0x01A0 }, + { 0x01A3, 0x01A2 }, + { 0x01A5, 0x01A4 }, + { 0x01A8, 0x01A7 }, + { 0x01AD, 0x01AC }, + { 0x01B0, 0x01AF }, + { 0x01B4, 0x01B3 }, + { 0x01B6, 0x01B5 }, + { 0x01B9, 0x01B8 }, + { 0x01BD, 0x01BC }, + { 0x01BF, 0x01F7 }, + { 0x01C5, 0x01C4 }, + { 0x01C6, 0x01C4 }, + { 0x01C8, 0x01C7 }, + { 0x01C9, 0x01C7 }, + { 0x01CB, 0x01CA }, + { 0x01CC, 0x01CA }, + { 0x01CE, 0x01CD }, + { 0x01D0, 0x01CF }, + { 0x01D2, 0x01D1 }, + { 0x01D4, 0x01D3 }, + { 0x01D6, 0x01D5 }, + { 0x01D8, 0x01D7 }, + { 0x01DA, 0x01D9 }, + { 0x01DC, 0x01DB }, + { 0x01DD, 0x018E }, + { 0x01DF, 0x01DE }, + { 0x01E1, 0x01E0 }, + { 0x01E3, 0x01E2 }, + { 0x01E5, 0x01E4 }, + { 0x01E7, 0x01E6 }, + { 0x01E9, 0x01E8 }, + { 0x01EB, 0x01EA }, + { 0x01ED, 0x01EC }, + { 0x01EF, 0x01EE }, + { 0x01F2, 0x01F1 }, + { 0x01F3, 0x01F1 }, + { 0x01F5, 0x01F4 }, + { 0x01F9, 0x01F8 }, + { 0x01FB, 0x01FA }, + { 0x01FD, 0x01FC }, + { 0x01FF, 0x01FE }, + { 0x0201, 0x0200 }, + { 0x0203, 0x0202 }, + { 0x0205, 0x0204 }, + { 0x0207, 0x0206 }, + { 0x0209, 0x0208 }, + { 0x020B, 0x020A }, + { 0x020D, 0x020C }, + { 0x020F, 0x020E }, + { 0x0211, 0x0210 }, + { 0x0213, 0x0212 }, + { 0x0215, 0x0214 }, + { 0x0217, 0x0216 }, + { 0x0219, 0x0218 }, + { 0x021B, 0x021A }, + { 0x021D, 0x021C }, + { 0x021F, 0x021E }, + { 0x0223, 0x0222 }, + { 0x0225, 0x0224 }, + { 0x0227, 0x0226 }, + { 0x0229, 0x0228 }, + { 0x022B, 0x022A }, + { 0x022D, 0x022C }, + { 0x022F, 0x022E }, + { 0x0231, 0x0230 }, + { 0x0233, 0x0232 }, + { 0x023C, 0x023B }, + { 0x0242, 0x0241 }, + { 0x0247, 0x0246 }, + { 0x0249, 0x0248 }, + { 0x024B, 0x024A }, + { 0x024D, 0x024C }, + { 0x024F, 0x024E }, + { 0x0250, 0x2C6F }, + { 0x0251, 0x2C6D }, + { 0x0253, 0x0181 }, + { 0x0254, 0x0186 }, + { 0x0256, 0x0189 }, + { 0x0257, 0x018A }, + { 0x0259, 0x018F }, + { 0x025B, 0x0190 }, + { 0x0260, 0x0193 }, + { 0x0263, 0x0194 }, + { 0x0268, 0x0197 }, + { 0x0269, 0x0196 }, + { 0x026B, 0x2C62 }, + { 0x026F, 0x019C }, + { 0x0271, 0x2C6E }, + { 0x0272, 0x019D }, + { 0x0275, 0x019F }, + { 0x027D, 0x2C64 }, + { 0x0280, 0x01A6 }, + { 0x0283, 0x01A9 }, + { 0x0288, 0x01AE }, + { 0x0289, 0x0244 }, + { 0x028A, 0x01B1 }, + { 0x028B, 0x01B2 }, + { 0x028C, 0x0245 }, + { 0x0292, 0x01B7 }, + { 0x0345, 0x0399 }, + { 0x0371, 0x0370 }, + { 0x0373, 0x0372 }, + { 0x0377, 0x0376 }, + { 0x037B, 0x03FD }, + { 0x037C, 0x03FE }, + { 0x037D, 0x03FF }, + { 0x03AC, 0x0386 }, + { 0x03AD, 0x0388 }, + { 0x03AE, 0x0389 }, + { 0x03AF, 0x038A }, + { 0x03B1, 0x0391 }, + { 0x03B2, 0x0392 }, + { 0x03B3, 0x0393 }, + { 0x03B4, 0x0394 }, + { 0x03B5, 0x0395 }, + { 0x03B6, 0x0396 }, + { 0x03B7, 0x0397 }, + { 0x03B8, 0x0398 }, + { 0x03B9, 0x0399 }, + { 0x03BA, 0x039A }, + { 0x03BB, 0x039B }, + { 0x03BC, 0x039C }, + { 0x03BD, 0x039D }, + { 0x03BE, 0x039E }, + { 0x03BF, 0x039F }, + { 0x03C0, 0x03A0 }, + { 0x03C1, 0x03A1 }, + { 0x03C2, 0x03A3 }, + { 0x03C3, 0x03A3 }, + { 0x03C4, 0x03A4 }, + { 0x03C5, 0x03A5 }, + { 0x03C6, 0x03A6 }, + { 0x03C7, 0x03A7 }, + { 0x03C8, 0x03A8 }, + { 0x03C9, 0x03A9 }, + { 0x03CA, 0x03AA }, + { 0x03CB, 0x03AB }, + { 0x03CC, 0x038C }, + { 0x03CD, 0x038E }, + { 0x03CE, 0x038F }, + { 0x03D0, 0x0392 }, + { 0x03D1, 0x0398 }, + { 0x03D5, 0x03A6 }, + { 0x03D6, 0x03A0 }, + { 0x03D7, 0x03CF }, + { 0x03D9, 0x03D8 }, + { 0x03DB, 0x03DA }, + { 0x03DD, 0x03DC }, + { 0x03DF, 0x03DE }, + { 0x03E1, 0x03E0 }, + { 0x03E3, 0x03E2 }, + { 0x03E5, 0x03E4 }, + { 0x03E7, 0x03E6 }, + { 0x03E9, 0x03E8 }, + { 0x03EB, 0x03EA }, + { 0x03ED, 0x03EC }, + { 0x03EF, 0x03EE }, + { 0x03F0, 0x039A }, + { 0x03F1, 0x03A1 }, + { 0x03F2, 0x03F9 }, + { 0x03F5, 0x0395 }, + { 0x03F8, 0x03F7 }, + { 0x03FB, 0x03FA }, + { 0x0430, 0x0410 }, + { 0x0431, 0x0411 }, + { 0x0432, 0x0412 }, + { 0x0433, 0x0413 }, + { 0x0434, 0x0414 }, + { 0x0435, 0x0415 }, + { 0x0436, 0x0416 }, + { 0x0437, 0x0417 }, + { 0x0438, 0x0418 }, + { 0x0439, 0x0419 }, + { 0x043A, 0x041A }, + { 0x043B, 0x041B }, + { 0x043C, 0x041C }, + { 0x043D, 0x041D }, + { 0x043E, 0x041E }, + { 0x043F, 0x041F }, + { 0x0440, 0x0420 }, + { 0x0441, 0x0421 }, + { 0x0442, 0x0422 }, + { 0x0443, 0x0423 }, + { 0x0444, 0x0424 }, + { 0x0445, 0x0425 }, + { 0x0446, 0x0426 }, + { 0x0447, 0x0427 }, + { 0x0448, 0x0428 }, + { 0x0449, 0x0429 }, + { 0x044A, 0x042A }, + { 0x044B, 0x042B }, + { 0x044C, 0x042C }, + { 0x044D, 0x042D }, + { 0x044E, 0x042E }, + { 0x044F, 0x042F }, + { 0x0450, 0x0400 }, + { 0x0451, 0x0401 }, + { 0x0452, 0x0402 }, + { 0x0453, 0x0403 }, + { 0x0454, 0x0404 }, + { 0x0455, 0x0405 }, + { 0x0456, 0x0406 }, + { 0x0457, 0x0407 }, + { 0x0458, 0x0408 }, + { 0x0459, 0x0409 }, + { 0x045A, 0x040A }, + { 0x045B, 0x040B }, + { 0x045C, 0x040C }, + { 0x045D, 0x040D }, + { 0x045E, 0x040E }, + { 0x045F, 0x040F }, + { 0x0461, 0x0460 }, + { 0x0463, 0x0462 }, + { 0x0465, 0x0464 }, + { 0x0467, 0x0466 }, + { 0x0469, 0x0468 }, + { 0x046B, 0x046A }, + { 0x046D, 0x046C }, + { 0x046F, 0x046E }, + { 0x0471, 0x0470 }, + { 0x0473, 0x0472 }, + { 0x0475, 0x0474 }, + { 0x0477, 0x0476 }, + { 0x0479, 0x0478 }, + { 0x047B, 0x047A }, + { 0x047D, 0x047C }, + { 0x047F, 0x047E }, + { 0x0481, 0x0480 }, + { 0x048B, 0x048A }, + { 0x048D, 0x048C }, + { 0x048F, 0x048E }, + { 0x0491, 0x0490 }, + { 0x0493, 0x0492 }, + { 0x0495, 0x0494 }, + { 0x0497, 0x0496 }, + { 0x0499, 0x0498 }, + { 0x049B, 0x049A }, + { 0x049D, 0x049C }, + { 0x049F, 0x049E }, + { 0x04A1, 0x04A0 }, + { 0x04A3, 0x04A2 }, + { 0x04A5, 0x04A4 }, + { 0x04A7, 0x04A6 }, + { 0x04A9, 0x04A8 }, + { 0x04AB, 0x04AA }, + { 0x04AD, 0x04AC }, + { 0x04AF, 0x04AE }, + { 0x04B1, 0x04B0 }, + { 0x04B3, 0x04B2 }, + { 0x04B5, 0x04B4 }, + { 0x04B7, 0x04B6 }, + { 0x04B9, 0x04B8 }, + { 0x04BB, 0x04BA }, + { 0x04BD, 0x04BC }, + { 0x04BF, 0x04BE }, + { 0x04C2, 0x04C1 }, + { 0x04C4, 0x04C3 }, + { 0x04C6, 0x04C5 }, + { 0x04C8, 0x04C7 }, + { 0x04CA, 0x04C9 }, + { 0x04CC, 0x04CB }, + { 0x04CE, 0x04CD }, + { 0x04CF, 0x04C0 }, + { 0x04D1, 0x04D0 }, + { 0x04D3, 0x04D2 }, + { 0x04D5, 0x04D4 }, + { 0x04D7, 0x04D6 }, + { 0x04D9, 0x04D8 }, + { 0x04DB, 0x04DA }, + { 0x04DD, 0x04DC }, + { 0x04DF, 0x04DE }, + { 0x04E1, 0x04E0 }, + { 0x04E3, 0x04E2 }, + { 0x04E5, 0x04E4 }, + { 0x04E7, 0x04E6 }, + { 0x04E9, 0x04E8 }, + { 0x04EB, 0x04EA }, + { 0x04ED, 0x04EC }, + { 0x04EF, 0x04EE }, + { 0x04F1, 0x04F0 }, + { 0x04F3, 0x04F2 }, + { 0x04F5, 0x04F4 }, + { 0x04F7, 0x04F6 }, + { 0x04F9, 0x04F8 }, + { 0x04FB, 0x04FA }, + { 0x04FD, 0x04FC }, + { 0x04FF, 0x04FE }, + { 0x0501, 0x0500 }, + { 0x0503, 0x0502 }, + { 0x0505, 0x0504 }, + { 0x0507, 0x0506 }, + { 0x0509, 0x0508 }, + { 0x050B, 0x050A }, + { 0x050D, 0x050C }, + { 0x050F, 0x050E }, + { 0x0511, 0x0510 }, + { 0x0513, 0x0512 }, + { 0x0515, 0x0514 }, + { 0x0517, 0x0516 }, + { 0x0519, 0x0518 }, + { 0x051B, 0x051A }, + { 0x051D, 0x051C }, + { 0x051F, 0x051E }, + { 0x0521, 0x0520 }, + { 0x0523, 0x0522 }, + { 0x0561, 0x0531 }, + { 0x0562, 0x0532 }, + { 0x0563, 0x0533 }, + { 0x0564, 0x0534 }, + { 0x0565, 0x0535 }, + { 0x0566, 0x0536 }, + { 0x0567, 0x0537 }, + { 0x0568, 0x0538 }, + { 0x0569, 0x0539 }, + { 0x056A, 0x053A }, + { 0x056B, 0x053B }, + { 0x056C, 0x053C }, + { 0x056D, 0x053D }, + { 0x056E, 0x053E }, + { 0x056F, 0x053F }, + { 0x0570, 0x0540 }, + { 0x0571, 0x0541 }, + { 0x0572, 0x0542 }, + { 0x0573, 0x0543 }, + { 0x0574, 0x0544 }, + { 0x0575, 0x0545 }, + { 0x0576, 0x0546 }, + { 0x0577, 0x0547 }, + { 0x0578, 0x0548 }, + { 0x0579, 0x0549 }, + { 0x057A, 0x054A }, + { 0x057B, 0x054B }, + { 0x057C, 0x054C }, + { 0x057D, 0x054D }, + { 0x057E, 0x054E }, + { 0x057F, 0x054F }, + { 0x0580, 0x0550 }, + { 0x0581, 0x0551 }, + { 0x0582, 0x0552 }, + { 0x0583, 0x0553 }, + { 0x0584, 0x0554 }, + { 0x0585, 0x0555 }, + { 0x0586, 0x0556 }, + { 0x1D79, 0xA77D }, + { 0x1D7D, 0x2C63 }, + { 0x1E01, 0x1E00 }, + { 0x1E03, 0x1E02 }, + { 0x1E05, 0x1E04 }, + { 0x1E07, 0x1E06 }, + { 0x1E09, 0x1E08 }, + { 0x1E0B, 0x1E0A }, + { 0x1E0D, 0x1E0C }, + { 0x1E0F, 0x1E0E }, + { 0x1E11, 0x1E10 }, + { 0x1E13, 0x1E12 }, + { 0x1E15, 0x1E14 }, + { 0x1E17, 0x1E16 }, + { 0x1E19, 0x1E18 }, + { 0x1E1B, 0x1E1A }, + { 0x1E1D, 0x1E1C }, + { 0x1E1F, 0x1E1E }, + { 0x1E21, 0x1E20 }, + { 0x1E23, 0x1E22 }, + { 0x1E25, 0x1E24 }, + { 0x1E27, 0x1E26 }, + { 0x1E29, 0x1E28 }, + { 0x1E2B, 0x1E2A }, + { 0x1E2D, 0x1E2C }, + { 0x1E2F, 0x1E2E }, + { 0x1E31, 0x1E30 }, + { 0x1E33, 0x1E32 }, + { 0x1E35, 0x1E34 }, + { 0x1E37, 0x1E36 }, + { 0x1E39, 0x1E38 }, + { 0x1E3B, 0x1E3A }, + { 0x1E3D, 0x1E3C }, + { 0x1E3F, 0x1E3E }, + { 0x1E41, 0x1E40 }, + { 0x1E43, 0x1E42 }, + { 0x1E45, 0x1E44 }, + { 0x1E47, 0x1E46 }, + { 0x1E49, 0x1E48 }, + { 0x1E4B, 0x1E4A }, + { 0x1E4D, 0x1E4C }, + { 0x1E4F, 0x1E4E }, + { 0x1E51, 0x1E50 }, + { 0x1E53, 0x1E52 }, + { 0x1E55, 0x1E54 }, + { 0x1E57, 0x1E56 }, + { 0x1E59, 0x1E58 }, + { 0x1E5B, 0x1E5A }, + { 0x1E5D, 0x1E5C }, + { 0x1E5F, 0x1E5E }, + { 0x1E61, 0x1E60 }, + { 0x1E63, 0x1E62 }, + { 0x1E65, 0x1E64 }, + { 0x1E67, 0x1E66 }, + { 0x1E69, 0x1E68 }, + { 0x1E6B, 0x1E6A }, + { 0x1E6D, 0x1E6C }, + { 0x1E6F, 0x1E6E }, + { 0x1E71, 0x1E70 }, + { 0x1E73, 0x1E72 }, + { 0x1E75, 0x1E74 }, + { 0x1E77, 0x1E76 }, + { 0x1E79, 0x1E78 }, + { 0x1E7B, 0x1E7A }, + { 0x1E7D, 0x1E7C }, + { 0x1E7F, 0x1E7E }, + { 0x1E81, 0x1E80 }, + { 0x1E83, 0x1E82 }, + { 0x1E85, 0x1E84 }, + { 0x1E87, 0x1E86 }, + { 0x1E89, 0x1E88 }, + { 0x1E8B, 0x1E8A }, + { 0x1E8D, 0x1E8C }, + { 0x1E8F, 0x1E8E }, + { 0x1E91, 0x1E90 }, + { 0x1E93, 0x1E92 }, + { 0x1E95, 0x1E94 }, + { 0x1E9B, 0x1E60 }, + { 0x1EA1, 0x1EA0 }, + { 0x1EA3, 0x1EA2 }, + { 0x1EA5, 0x1EA4 }, + { 0x1EA7, 0x1EA6 }, + { 0x1EA9, 0x1EA8 }, + { 0x1EAB, 0x1EAA }, + { 0x1EAD, 0x1EAC }, + { 0x1EAF, 0x1EAE }, + { 0x1EB1, 0x1EB0 }, + { 0x1EB3, 0x1EB2 }, + { 0x1EB5, 0x1EB4 }, + { 0x1EB7, 0x1EB6 }, + { 0x1EB9, 0x1EB8 }, + { 0x1EBB, 0x1EBA }, + { 0x1EBD, 0x1EBC }, + { 0x1EBF, 0x1EBE }, + { 0x1EC1, 0x1EC0 }, + { 0x1EC3, 0x1EC2 }, + { 0x1EC5, 0x1EC4 }, + { 0x1EC7, 0x1EC6 }, + { 0x1EC9, 0x1EC8 }, + { 0x1ECB, 0x1ECA }, + { 0x1ECD, 0x1ECC }, + { 0x1ECF, 0x1ECE }, + { 0x1ED1, 0x1ED0 }, + { 0x1ED3, 0x1ED2 }, + { 0x1ED5, 0x1ED4 }, + { 0x1ED7, 0x1ED6 }, + { 0x1ED9, 0x1ED8 }, + { 0x1EDB, 0x1EDA }, + { 0x1EDD, 0x1EDC }, + { 0x1EDF, 0x1EDE }, + { 0x1EE1, 0x1EE0 }, + { 0x1EE3, 0x1EE2 }, + { 0x1EE5, 0x1EE4 }, + { 0x1EE7, 0x1EE6 }, + { 0x1EE9, 0x1EE8 }, + { 0x1EEB, 0x1EEA }, + { 0x1EED, 0x1EEC }, + { 0x1EEF, 0x1EEE }, + { 0x1EF1, 0x1EF0 }, + { 0x1EF3, 0x1EF2 }, + { 0x1EF5, 0x1EF4 }, + { 0x1EF7, 0x1EF6 }, + { 0x1EF9, 0x1EF8 }, + { 0x1EFB, 0x1EFA }, + { 0x1EFD, 0x1EFC }, + { 0x1EFF, 0x1EFE }, + { 0x1F00, 0x1F08 }, + { 0x1F01, 0x1F09 }, + { 0x1F02, 0x1F0A }, + { 0x1F03, 0x1F0B }, + { 0x1F04, 0x1F0C }, + { 0x1F05, 0x1F0D }, + { 0x1F06, 0x1F0E }, + { 0x1F07, 0x1F0F }, + { 0x1F10, 0x1F18 }, + { 0x1F11, 0x1F19 }, + { 0x1F12, 0x1F1A }, + { 0x1F13, 0x1F1B }, + { 0x1F14, 0x1F1C }, + { 0x1F15, 0x1F1D }, + { 0x1F20, 0x1F28 }, + { 0x1F21, 0x1F29 }, + { 0x1F22, 0x1F2A }, + { 0x1F23, 0x1F2B }, + { 0x1F24, 0x1F2C }, + { 0x1F25, 0x1F2D }, + { 0x1F26, 0x1F2E }, + { 0x1F27, 0x1F2F }, + { 0x1F30, 0x1F38 }, + { 0x1F31, 0x1F39 }, + { 0x1F32, 0x1F3A }, + { 0x1F33, 0x1F3B }, + { 0x1F34, 0x1F3C }, + { 0x1F35, 0x1F3D }, + { 0x1F36, 0x1F3E }, + { 0x1F37, 0x1F3F }, + { 0x1F40, 0x1F48 }, + { 0x1F41, 0x1F49 }, + { 0x1F42, 0x1F4A }, + { 0x1F43, 0x1F4B }, + { 0x1F44, 0x1F4C }, + { 0x1F45, 0x1F4D }, + { 0x1F51, 0x1F59 }, + { 0x1F53, 0x1F5B }, + { 0x1F55, 0x1F5D }, + { 0x1F57, 0x1F5F }, + { 0x1F60, 0x1F68 }, + { 0x1F61, 0x1F69 }, + { 0x1F62, 0x1F6A }, + { 0x1F63, 0x1F6B }, + { 0x1F64, 0x1F6C }, + { 0x1F65, 0x1F6D }, + { 0x1F66, 0x1F6E }, + { 0x1F67, 0x1F6F }, + { 0x1F70, 0x1FBA }, + { 0x1F71, 0x1FBB }, + { 0x1F72, 0x1FC8 }, + { 0x1F73, 0x1FC9 }, + { 0x1F74, 0x1FCA }, + { 0x1F75, 0x1FCB }, + { 0x1F76, 0x1FDA }, + { 0x1F77, 0x1FDB }, + { 0x1F78, 0x1FF8 }, + { 0x1F79, 0x1FF9 }, + { 0x1F7A, 0x1FEA }, + { 0x1F7B, 0x1FEB }, + { 0x1F7C, 0x1FFA }, + { 0x1F7D, 0x1FFB }, + { 0x1F80, 0x1F88 }, + { 0x1F81, 0x1F89 }, + { 0x1F82, 0x1F8A }, + { 0x1F83, 0x1F8B }, + { 0x1F84, 0x1F8C }, + { 0x1F85, 0x1F8D }, + { 0x1F86, 0x1F8E }, + { 0x1F87, 0x1F8F }, + { 0x1F90, 0x1F98 }, + { 0x1F91, 0x1F99 }, + { 0x1F92, 0x1F9A }, + { 0x1F93, 0x1F9B }, + { 0x1F94, 0x1F9C }, + { 0x1F95, 0x1F9D }, + { 0x1F96, 0x1F9E }, + { 0x1F97, 0x1F9F }, + { 0x1FA0, 0x1FA8 }, + { 0x1FA1, 0x1FA9 }, + { 0x1FA2, 0x1FAA }, + { 0x1FA3, 0x1FAB }, + { 0x1FA4, 0x1FAC }, + { 0x1FA5, 0x1FAD }, + { 0x1FA6, 0x1FAE }, + { 0x1FA7, 0x1FAF }, + { 0x1FB0, 0x1FB8 }, + { 0x1FB1, 0x1FB9 }, + { 0x1FB3, 0x1FBC }, + { 0x1FBE, 0x0399 }, + { 0x1FC3, 0x1FCC }, + { 0x1FD0, 0x1FD8 }, + { 0x1FD1, 0x1FD9 }, + { 0x1FE0, 0x1FE8 }, + { 0x1FE1, 0x1FE9 }, + { 0x1FE5, 0x1FEC }, + { 0x1FF3, 0x1FFC }, + { 0x214E, 0x2132 }, + { 0x2170, 0x2160 }, + { 0x2171, 0x2161 }, + { 0x2172, 0x2162 }, + { 0x2173, 0x2163 }, + { 0x2174, 0x2164 }, + { 0x2175, 0x2165 }, + { 0x2176, 0x2166 }, + { 0x2177, 0x2167 }, + { 0x2178, 0x2168 }, + { 0x2179, 0x2169 }, + { 0x217A, 0x216A }, + { 0x217B, 0x216B }, + { 0x217C, 0x216C }, + { 0x217D, 0x216D }, + { 0x217E, 0x216E }, + { 0x217F, 0x216F }, + { 0x2184, 0x2183 }, + { 0x24D0, 0x24B6 }, + { 0x24D1, 0x24B7 }, + { 0x24D2, 0x24B8 }, + { 0x24D3, 0x24B9 }, + { 0x24D4, 0x24BA }, + { 0x24D5, 0x24BB }, + { 0x24D6, 0x24BC }, + { 0x24D7, 0x24BD }, + { 0x24D8, 0x24BE }, + { 0x24D9, 0x24BF }, + { 0x24DA, 0x24C0 }, + { 0x24DB, 0x24C1 }, + { 0x24DC, 0x24C2 }, + { 0x24DD, 0x24C3 }, + { 0x24DE, 0x24C4 }, + { 0x24DF, 0x24C5 }, + { 0x24E0, 0x24C6 }, + { 0x24E1, 0x24C7 }, + { 0x24E2, 0x24C8 }, + { 0x24E3, 0x24C9 }, + { 0x24E4, 0x24CA }, + { 0x24E5, 0x24CB }, + { 0x24E6, 0x24CC }, + { 0x24E7, 0x24CD }, + { 0x24E8, 0x24CE }, + { 0x24E9, 0x24CF }, + { 0x2C30, 0x2C00 }, + { 0x2C31, 0x2C01 }, + { 0x2C32, 0x2C02 }, + { 0x2C33, 0x2C03 }, + { 0x2C34, 0x2C04 }, + { 0x2C35, 0x2C05 }, + { 0x2C36, 0x2C06 }, + { 0x2C37, 0x2C07 }, + { 0x2C38, 0x2C08 }, + { 0x2C39, 0x2C09 }, + { 0x2C3A, 0x2C0A }, + { 0x2C3B, 0x2C0B }, + { 0x2C3C, 0x2C0C }, + { 0x2C3D, 0x2C0D }, + { 0x2C3E, 0x2C0E }, + { 0x2C3F, 0x2C0F }, + { 0x2C40, 0x2C10 }, + { 0x2C41, 0x2C11 }, + { 0x2C42, 0x2C12 }, + { 0x2C43, 0x2C13 }, + { 0x2C44, 0x2C14 }, + { 0x2C45, 0x2C15 }, + { 0x2C46, 0x2C16 }, + { 0x2C47, 0x2C17 }, + { 0x2C48, 0x2C18 }, + { 0x2C49, 0x2C19 }, + { 0x2C4A, 0x2C1A }, + { 0x2C4B, 0x2C1B }, + { 0x2C4C, 0x2C1C }, + { 0x2C4D, 0x2C1D }, + { 0x2C4E, 0x2C1E }, + { 0x2C4F, 0x2C1F }, + { 0x2C50, 0x2C20 }, + { 0x2C51, 0x2C21 }, + { 0x2C52, 0x2C22 }, + { 0x2C53, 0x2C23 }, + { 0x2C54, 0x2C24 }, + { 0x2C55, 0x2C25 }, + { 0x2C56, 0x2C26 }, + { 0x2C57, 0x2C27 }, + { 0x2C58, 0x2C28 }, + { 0x2C59, 0x2C29 }, + { 0x2C5A, 0x2C2A }, + { 0x2C5B, 0x2C2B }, + { 0x2C5C, 0x2C2C }, + { 0x2C5D, 0x2C2D }, + { 0x2C5E, 0x2C2E }, + { 0x2C61, 0x2C60 }, + { 0x2C65, 0x023A }, + { 0x2C66, 0x023E }, + { 0x2C68, 0x2C67 }, + { 0x2C6A, 0x2C69 }, + { 0x2C6C, 0x2C6B }, + { 0x2C73, 0x2C72 }, + { 0x2C76, 0x2C75 }, + { 0x2C81, 0x2C80 }, + { 0x2C83, 0x2C82 }, + { 0x2C85, 0x2C84 }, + { 0x2C87, 0x2C86 }, + { 0x2C89, 0x2C88 }, + { 0x2C8B, 0x2C8A }, + { 0x2C8D, 0x2C8C }, + { 0x2C8F, 0x2C8E }, + { 0x2C91, 0x2C90 }, + { 0x2C93, 0x2C92 }, + { 0x2C95, 0x2C94 }, + { 0x2C97, 0x2C96 }, + { 0x2C99, 0x2C98 }, + { 0x2C9B, 0x2C9A }, + { 0x2C9D, 0x2C9C }, + { 0x2C9F, 0x2C9E }, + { 0x2CA1, 0x2CA0 }, + { 0x2CA3, 0x2CA2 }, + { 0x2CA5, 0x2CA4 }, + { 0x2CA7, 0x2CA6 }, + { 0x2CA9, 0x2CA8 }, + { 0x2CAB, 0x2CAA }, + { 0x2CAD, 0x2CAC }, + { 0x2CAF, 0x2CAE }, + { 0x2CB1, 0x2CB0 }, + { 0x2CB3, 0x2CB2 }, + { 0x2CB5, 0x2CB4 }, + { 0x2CB7, 0x2CB6 }, + { 0x2CB9, 0x2CB8 }, + { 0x2CBB, 0x2CBA }, + { 0x2CBD, 0x2CBC }, + { 0x2CBF, 0x2CBE }, + { 0x2CC1, 0x2CC0 }, + { 0x2CC3, 0x2CC2 }, + { 0x2CC5, 0x2CC4 }, + { 0x2CC7, 0x2CC6 }, + { 0x2CC9, 0x2CC8 }, + { 0x2CCB, 0x2CCA }, + { 0x2CCD, 0x2CCC }, + { 0x2CCF, 0x2CCE }, + { 0x2CD1, 0x2CD0 }, + { 0x2CD3, 0x2CD2 }, + { 0x2CD5, 0x2CD4 }, + { 0x2CD7, 0x2CD6 }, + { 0x2CD9, 0x2CD8 }, + { 0x2CDB, 0x2CDA }, + { 0x2CDD, 0x2CDC }, + { 0x2CDF, 0x2CDE }, + { 0x2CE1, 0x2CE0 }, + { 0x2CE3, 0x2CE2 }, + { 0x2D00, 0x10A0 }, + { 0x2D01, 0x10A1 }, + { 0x2D02, 0x10A2 }, + { 0x2D03, 0x10A3 }, + { 0x2D04, 0x10A4 }, + { 0x2D05, 0x10A5 }, + { 0x2D06, 0x10A6 }, + { 0x2D07, 0x10A7 }, + { 0x2D08, 0x10A8 }, + { 0x2D09, 0x10A9 }, + { 0x2D0A, 0x10AA }, + { 0x2D0B, 0x10AB }, + { 0x2D0C, 0x10AC }, + { 0x2D0D, 0x10AD }, + { 0x2D0E, 0x10AE }, + { 0x2D0F, 0x10AF }, + { 0x2D10, 0x10B0 }, + { 0x2D11, 0x10B1 }, + { 0x2D12, 0x10B2 }, + { 0x2D13, 0x10B3 }, + { 0x2D14, 0x10B4 }, + { 0x2D15, 0x10B5 }, + { 0x2D16, 0x10B6 }, + { 0x2D17, 0x10B7 }, + { 0x2D18, 0x10B8 }, + { 0x2D19, 0x10B9 }, + { 0x2D1A, 0x10BA }, + { 0x2D1B, 0x10BB }, + { 0x2D1C, 0x10BC }, + { 0x2D1D, 0x10BD }, + { 0x2D1E, 0x10BE }, + { 0x2D1F, 0x10BF }, + { 0x2D20, 0x10C0 }, + { 0x2D21, 0x10C1 }, + { 0x2D22, 0x10C2 }, + { 0x2D23, 0x10C3 }, + { 0x2D24, 0x10C4 }, + { 0x2D25, 0x10C5 }, + { 0xA641, 0xA640 }, + { 0xA643, 0xA642 }, + { 0xA645, 0xA644 }, + { 0xA647, 0xA646 }, + { 0xA649, 0xA648 }, + { 0xA64B, 0xA64A }, + { 0xA64D, 0xA64C }, + { 0xA64F, 0xA64E }, + { 0xA651, 0xA650 }, + { 0xA653, 0xA652 }, + { 0xA655, 0xA654 }, + { 0xA657, 0xA656 }, + { 0xA659, 0xA658 }, + { 0xA65B, 0xA65A }, + { 0xA65D, 0xA65C }, + { 0xA65F, 0xA65E }, + { 0xA663, 0xA662 }, + { 0xA665, 0xA664 }, + { 0xA667, 0xA666 }, + { 0xA669, 0xA668 }, + { 0xA66B, 0xA66A }, + { 0xA66D, 0xA66C }, + { 0xA681, 0xA680 }, + { 0xA683, 0xA682 }, + { 0xA685, 0xA684 }, + { 0xA687, 0xA686 }, + { 0xA689, 0xA688 }, + { 0xA68B, 0xA68A }, + { 0xA68D, 0xA68C }, + { 0xA68F, 0xA68E }, + { 0xA691, 0xA690 }, + { 0xA693, 0xA692 }, + { 0xA695, 0xA694 }, + { 0xA697, 0xA696 }, + { 0xA723, 0xA722 }, + { 0xA725, 0xA724 }, + { 0xA727, 0xA726 }, + { 0xA729, 0xA728 }, + { 0xA72B, 0xA72A }, + { 0xA72D, 0xA72C }, + { 0xA72F, 0xA72E }, + { 0xA733, 0xA732 }, + { 0xA735, 0xA734 }, + { 0xA737, 0xA736 }, + { 0xA739, 0xA738 }, + { 0xA73B, 0xA73A }, + { 0xA73D, 0xA73C }, + { 0xA73F, 0xA73E }, + { 0xA741, 0xA740 }, + { 0xA743, 0xA742 }, + { 0xA745, 0xA744 }, + { 0xA747, 0xA746 }, + { 0xA749, 0xA748 }, + { 0xA74B, 0xA74A }, + { 0xA74D, 0xA74C }, + { 0xA74F, 0xA74E }, + { 0xA751, 0xA750 }, + { 0xA753, 0xA752 }, + { 0xA755, 0xA754 }, + { 0xA757, 0xA756 }, + { 0xA759, 0xA758 }, + { 0xA75B, 0xA75A }, + { 0xA75D, 0xA75C }, + { 0xA75F, 0xA75E }, + { 0xA761, 0xA760 }, + { 0xA763, 0xA762 }, + { 0xA765, 0xA764 }, + { 0xA767, 0xA766 }, + { 0xA769, 0xA768 }, + { 0xA76B, 0xA76A }, + { 0xA76D, 0xA76C }, + { 0xA76F, 0xA76E }, + { 0xA77A, 0xA779 }, + { 0xA77C, 0xA77B }, + { 0xA77F, 0xA77E }, + { 0xA781, 0xA780 }, + { 0xA783, 0xA782 }, + { 0xA785, 0xA784 }, + { 0xA787, 0xA786 }, + { 0xA78C, 0xA78B }, + { 0xFF41, 0xFF21 }, + { 0xFF42, 0xFF22 }, + { 0xFF43, 0xFF23 }, + { 0xFF44, 0xFF24 }, + { 0xFF45, 0xFF25 }, + { 0xFF46, 0xFF26 }, + { 0xFF47, 0xFF27 }, + { 0xFF48, 0xFF28 }, + { 0xFF49, 0xFF29 }, + { 0xFF4A, 0xFF2A }, + { 0xFF4B, 0xFF2B }, + { 0xFF4C, 0xFF2C }, + { 0xFF4D, 0xFF2D }, + { 0xFF4E, 0xFF2E }, + { 0xFF4F, 0xFF2F }, + { 0xFF50, 0xFF30 }, + { 0xFF51, 0xFF31 }, + { 0xFF52, 0xFF32 }, + { 0xFF53, 0xFF33 }, + { 0xFF54, 0xFF34 }, + { 0xFF55, 0xFF35 }, + { 0xFF56, 0xFF36 }, + { 0xFF57, 0xFF37 }, + { 0xFF58, 0xFF38 }, + { 0xFF59, 0xFF39 }, + { 0xFF5A, 0xFF3A }, + { 0x10428, 0x10400 }, + { 0x10429, 0x10401 }, + { 0x1042A, 0x10402 }, + { 0x1042B, 0x10403 }, + { 0x1042C, 0x10404 }, + { 0x1042D, 0x10405 }, + { 0x1042E, 0x10406 }, + { 0x1042F, 0x10407 }, + { 0x10430, 0x10408 }, + { 0x10431, 0x10409 }, + { 0x10432, 0x1040A }, + { 0x10433, 0x1040B }, + { 0x10434, 0x1040C }, + { 0x10435, 0x1040D }, + { 0x10436, 0x1040E }, + { 0x10437, 0x1040F }, + { 0x10438, 0x10410 }, + { 0x10439, 0x10411 }, + { 0x1043A, 0x10412 }, + { 0x1043B, 0x10413 }, + { 0x1043C, 0x10414 }, + { 0x1043D, 0x10415 }, + { 0x1043E, 0x10416 }, + { 0x1043F, 0x10417 }, + { 0x10440, 0x10418 }, + { 0x10441, 0x10419 }, + { 0x10442, 0x1041A }, + { 0x10443, 0x1041B }, + { 0x10444, 0x1041C }, + { 0x10445, 0x1041D }, + { 0x10446, 0x1041E }, + { 0x10447, 0x1041F }, + { 0x10448, 0x10420 }, + { 0x10449, 0x10421 }, + { 0x1044A, 0x10422 }, + { 0x1044B, 0x10423 }, + { 0x1044C, 0x10424 }, + { 0x1044D, 0x10425 }, + { 0x1044E, 0x10426 }, + { 0x1044F, 0x10427 } + +#define MAP(c) uc_toupper (c) +#include "test-mapping-part2.h" diff --git a/tests/unicase/test-ulc-casecmp.c b/tests/unicase/test-ulc-casecmp.c new file mode 100644 index 00000000..5450ef31 --- /dev/null +++ b/tests/unicase/test-ulc-casecmp.c @@ -0,0 +1,408 @@ +/* Test of case and normalization insensitive comparison of strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include +#include + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT char +#include "test-casecmp.h" +#undef UNIT + +static void +test_iso_8859_1 (int (*my_casecmp) (const char *, size_t, const char *, size_t, const char *, uninorm_t, int *)) +{ + { + static const char input1[] = { 'H', 0xF6, 'h', 'l', 'e' }; + static const char input2[] = { 'H', 0xD6, 'h', 'L', 'e' }; + static const char input3[] = { 'H', 0xF6, 'h', 'l', 'e', 'n' }; + static const char input4[] = { 'H', 0xD6, 'h', 'L', 'e', 'n' }; + static const char input5[] = { 'H', 'u', 'r', 'z' }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input1, SIZEOF (input1), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input3, SIZEOF (input3), input4, SIZEOF (input4), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input4, SIZEOF (input4), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input4, SIZEOF (input4), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input5, SIZEOF (input5), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input5, SIZEOF (input5), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "heiß" */ + static const char input1[] = { 0x68, 0x65, 0x69, 0xDF }; + static const char input2[] = { 0x68, 0x65, 0x69, 0x73, 0x73 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + } +} + +static void +test_utf_8 (int (*my_casecmp) (const char *, size_t, const char *, size_t, const char *, uninorm_t, int *)) +{ + /* Normalization effects. */ + { + static const char input1[] = { 'H', 0xC3, 0xB6, 'h', 'l', 'e' }; + static const char input2[] = { 'H', 'O', 0xCC, 0x88, 'h', 'L', 'e' }; + static const char input3[] = { 'H', 0xC3, 0xB6, 'h', 'l', 'e', 'n' }; + static const char input4[] = { 'H', 'O', 0xCC, 0x88, 'h', 'L', 'e', 'n' }; + static const char input5[] = { 'H', 'u', 'r', 'z' }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input1, SIZEOF (input1), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input3, SIZEOF (input3), input4, SIZEOF (input4), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input4, SIZEOF (input4), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input4, SIZEOF (input4), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input5, SIZEOF (input5), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input5, SIZEOF (input5), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == -1); + } + { /* LATIN CAPITAL LETTER A WITH DIAERESIS */ + static const char input1[] = { 0xC3, 0x84 }; + static const char input2[] = { 0x41, 0xCC, 0x88 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */ + static const char input1[] = { 0xC7, 0x9E }; + static const char input2[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* GREEK DIALYTIKA AND PERISPOMENI */ + static const char input1[] = { 0xE1, 0xBF, 0x81 }; + static const char input2[] = { 0xC2, 0xA8, 0xCD, 0x82 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* HANGUL SYLLABLE GEUL */ + static const char input1[] = { 0xEA, 0xB8, 0x80 }; + static const char input2[] = { 0xEA, 0xB7, 0xB8, 0xE1, 0x86, 0xAF }; + static const char input3[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* HANGUL SYLLABLE GEU */ + static const char input1[] = { 0xEA, 0xB7, 0xB8 }; + static const char input2[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Simple string. */ + { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */ + static const char input1[] = + { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.', ' ', + 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, 0x81, + 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5, + '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, 's', 'q', 'r', 't', '(', + 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', + ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E, ',', + 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', + 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80, '\n' + }; + static const char input2[] = + { 'g', 'r', 0xC3, 0xBC, 0x73, 0x73, ' ', 'g', 'o', 't', 't', '.', ' ', + 0xD0, 0xB7, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, 0x81, + 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5, + '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, 's', 'q', 'r', 't', '(', + 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', + ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E, ',', + 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', + 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80, '\n' + }; + static const char input3[] = + { 'G', 'R', 0xC3, 0x9C, 0x53, 0x53, ' ', 'G', 'O', 'T', 'T', '.', ' ', + 0xD0, 0x97, 0xD0, 0x94, 0xD0, 0xA0, 0xD0, 0x90, 0xD0, 0x92, 0xD0, 0xA1, + 0xD0, 0xA2, 0xD0, 0x92, 0xD0, 0xA3, 0xD0, 0x99, 0xD0, 0xA2, 0xD0, 0x95, + '!', ' ', 'X', '=', '(', '-', 'B', 0xC2, 0xB1, 'S', 'Q', 'R', 'T', '(', + 'B', 0xC2, 0xB2, '-', '4', 'A', 'C', ')', ')', '/', '(', '2', 'A', ')', + ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE8, 0xAA, 0x9E, ',', + 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', + 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80, '\n' + }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Case mapping can increase the number of Unicode characters. */ + { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */ + static const char input1[] = { 0xC5, 0x89 }; + static const char input2[] = { 0xCA, 0xBC, 0x6E }; + static const char input3[] = { 0xCA, 0xBC, 0x4E }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */ + static const char input1[] = { 0xCE, 0x90 }; + static const char input2[] = { 0xCE, 0xB9, 0xCC, 0x88, 0xCC, 0x81 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Turkish letters i İ ı I */ + { /* LATIN CAPITAL LETTER I */ + static const char input[] = { 0x49 }; + static const char casefolded[] = { 0x69 }; + static const char casefolded_tr[] = { 0xC4, 0xB1 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* LATIN SMALL LETTER I */ + static const char input[] = { 0x69 }; + static const char casefolded[] = { 0x49 }; + static const char casefolded_tr[] = { 0xC4, 0xB0 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */ + static const char input[] = { 0xC4, 0xB0 }; + static const char casefolded[] = { 0x69, 0xCC, 0x87 }; + static const char casefolded_tr[] = { 0x69 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_tr, SIZEOF (casefolded_tr), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* LATIN SMALL LETTER DOTLESS I */ + static const char input[] = { 0xC4, 0xB1 }; + static const char casefolded[] = { 0x49 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 1); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + { /* "topkapı" */ + static const char input[] = + { 0x54, 0x4F, 0x50, 0x4B, 0x41, 0x50, 0x49 }; + static const char casefolded[] = + { 0x74, 0x6F, 0x70, 0x6B, 0x61, 0x70, 0xC4, 0xB1 }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == -1); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), "tr", NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Uppercasing can increase the number of Unicode characters. */ + { /* "heiß" */ + static const char input1[] = { 0x68, 0x65, 0x69, 0xC3, 0x9F }; + static const char input2[] = { 0x68, 0x65, 0x69, 0x73, 0x73 }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Case mappings for some characters can depend on the surrounding characters. */ + { /* "περισσότερες πληροφορίες" */ + static const char input1[] = + { + 0xCF, 0x80, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB9, 0xCF, 0x83, 0xCF, 0x83, + 0xCF, 0x8C, 0xCF, 0x84, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB5, 0xCF, 0x82, + ' ', 0xCF, 0x80, 0xCE, 0xBB, 0xCE, 0xB7, 0xCF, 0x81, 0xCE, 0xBF, + 0xCF, 0x86, 0xCE, 0xBF, 0xCF, 0x81, 0xCE, 0xAF, 0xCE, 0xB5, 0xCF, 0x82 + }; + static const char input2[] = + { + 0xCF, 0x80, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB9, 0xCF, 0x83, 0xCF, 0x83, + 0xCF, 0x8C, 0xCF, 0x84, 0xCE, 0xB5, 0xCF, 0x81, 0xCE, 0xB5, 0xCF, 0x83, + ' ', 0xCF, 0x80, 0xCE, 0xBB, 0xCE, 0xB7, 0xCF, 0x81, 0xCE, 0xBF, + 0xCF, 0x86, 0xCE, 0xBF, 0xCF, 0x81, 0xCE, 0xAF, 0xCE, 0xB5, 0xCF, 0x83 + }; + static const char input3[] = + { + 0xCE, 0xA0, 0xCE, 0x95, 0xCE, 0xA1, 0xCE, 0x99, 0xCE, 0xA3, 0xCE, 0xA3, + 0xCE, 0x8C, 0xCE, 0xA4, 0xCE, 0x95, 0xCE, 0xA1, 0xCE, 0x95, 0xCE, 0xA3, + ' ', 0xCE, 0xA0, 0xCE, 0x9B, 0xCE, 0x97, 0xCE, 0xA1, 0xCE, 0x9F, + 0xCE, 0xA6, 0xCE, 0x9F, 0xCE, 0xA1, 0xCE, 0x8A, 0xCE, 0x95, 0xCE, 0xA3 + }; + int cmp; + + ASSERT (my_casecmp (input1, SIZEOF (input1), input2, SIZEOF (input2), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input1, SIZEOF (input1), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input2, SIZEOF (input2), input3, SIZEOF (input3), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + } + + /* Case mapping can require subsequent normalization. */ + { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */ + static const char input[] = { 0xC7, 0xB0, 0xCC, 0xA3 }; + static const char casefolded[] = { 0x6A, 0xCC, 0x8C, 0xCC, 0xA3 }; + static const char casefolded_decomposed[] = { 0x6A, 0xCC, 0xA3, 0xCC, 0x8C }; + int cmp; + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, NULL, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_decomposed, SIZEOF (casefolded_decomposed), NULL, NULL, &cmp) == 0); + ASSERT (cmp != 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded, SIZEOF (casefolded), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + + ASSERT (my_casecmp (input, SIZEOF (input), casefolded_decomposed, SIZEOF (casefolded_decomposed), NULL, UNINORM_NFD, &cmp) == 0); + ASSERT (cmp == 0); + } +} + +int +main (int argc, char *argv[]) +{ + /* configure should already have checked that the locale is supported. */ + if (setlocale (LC_ALL, "") == NULL) + return 1; + + test_ascii (ulc_casecmp, UNINORM_NFD); + + if (argc > 1) + switch (argv[1][0]) + { + case '1': + /* Locale encoding is ISO-8859-1 or ISO-8859-15. */ + test_iso_8859_1 (ulc_casecmp); + return 0; + + case '2': + /* Locale encoding is UTF-8. */ + test_utf_8 (ulc_casecmp); + return 0; + } + + return 1; +} diff --git a/tests/unicase/test-ulc-casecmp1.sh b/tests/unicase/test-ulc-casecmp1.sh new file mode 100755 index 00000000..7be2cca1 --- /dev/null +++ b/tests/unicase/test-ulc-casecmp1.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test in an ISO-8859-1 or ISO-8859-15 locale. +: ${LOCALE_FR=fr_FR} +if test $LOCALE_FR = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no traditional french locale is installed" + else + echo "Skipping test: no traditional french locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_FR \ +./test-ulc-casecmp${EXEEXT} 1 diff --git a/tests/unicase/test-ulc-casecmp2.sh b/tests/unicase/test-ulc-casecmp2.sh new file mode 100755 index 00000000..a7f06e88 --- /dev/null +++ b/tests/unicase/test-ulc-casecmp2.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test whether a specific UTF-8 locale is installed. +: ${LOCALE_FR_UTF8=fr_FR.UTF-8} +if test $LOCALE_FR_UTF8 = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no french Unicode locale is installed" + else + echo "Skipping test: no french Unicode locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_FR_UTF8 \ +./test-ulc-casecmp${EXEEXT} 2 diff --git a/tests/unicase/test-ulc-casecoll.c b/tests/unicase/test-ulc-casecoll.c new file mode 100644 index 00000000..a0875f44 --- /dev/null +++ b/tests/unicase/test-ulc-casecoll.c @@ -0,0 +1,59 @@ +/* Test of locale dependent, case and normalization insensitive comparison of + strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2009. */ + +#include + +#include "unicase.h" + +#include +#include +#include + +#include "uninorm.h" + +#define SIZEOF(array) (sizeof (array) / sizeof (array[0])) +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define UNIT char +#include "test-casecmp.h" +#undef UNIT + +int +main () +{ + /* configure should already have checked that the locale is supported. */ + if (setlocale (LC_ALL, "") == NULL) + return 1; + + /* In the "C" locale, strcoll is equivalent to strcmp, therefore u8_casecoll + on ASCII strings should behave like strcasecmp. */ + test_ascii (ulc_casecoll, UNINORM_NFC); + + return 0; +} diff --git a/tests/unicase/test-ulc-casecoll1.sh b/tests/unicase/test-ulc-casecoll1.sh new file mode 100755 index 00000000..3760ed82 --- /dev/null +++ b/tests/unicase/test-ulc-casecoll1.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test in an ISO-8859-1 or ISO-8859-15 locale. +: ${LOCALE_FR=fr_FR} +if test $LOCALE_FR = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no traditional french locale is installed" + else + echo "Skipping test: no traditional french locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_FR \ +./test-ulc-casecoll${EXEEXT} diff --git a/tests/unicase/test-ulc-casecoll2.sh b/tests/unicase/test-ulc-casecoll2.sh new file mode 100755 index 00000000..b179a4c6 --- /dev/null +++ b/tests/unicase/test-ulc-casecoll2.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Test whether a specific UTF-8 locale is installed. +: ${LOCALE_FR_UTF8=fr_FR.UTF-8} +if test $LOCALE_FR_UTF8 = none; then + if test -f /usr/bin/localedef; then + echo "Skipping test: no french Unicode locale is installed" + else + echo "Skipping test: no french Unicode locale is supported" + fi + exit 77 +fi + +LC_ALL=$LOCALE_FR_UTF8 \ +./test-ulc-casecoll${EXEEXT} -- cgit v1.2.3