diff options
Diffstat (limited to 'test/test_utf8.c')
-rw-r--r-- | test/test_utf8.c | 93 |
1 files changed, 92 insertions, 1 deletions
diff --git a/test/test_utf8.c b/test/test_utf8.c index bab6b0d..d6fc761 100644 --- a/test/test_utf8.c +++ b/test/test_utf8.c @@ -132,8 +132,9 @@ static void e(char* pattern, char* str, int error_no) extern int main(int argc, char* argv[]) { - static OnigEncoding use_encs[] = { ONIG_ENCODING_UTF8 }; + OnigEncoding use_encs[1]; + use_encs[0] = ONIG_ENCODING_UTF8; onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); err_file = stdout; @@ -298,6 +299,8 @@ extern int main(int argc, char* argv[]) x2("(?i:xssy)", "xs\xc5\xbfy", 0, 5); x2("(?i:xssy)", "x\xc3\x9fy", 0, 4); x2("(?i:xssy)", "x\xe1\xba\x9ey", 0, 5); + x2("(?i:x\xc3\x9fy)", "xssy", 0, 4); + x2("(?i:x\xc3\x9fy)", "xSSy", 0, 4); x2("(?i:\xc3\x9f)", "ss", 0, 2); x2("(?i:\xc3\x9f)", "SS", 0, 2); x2("(?i:[\xc3\x9f])", "ss", 0, 2); @@ -1202,10 +1205,98 @@ extern int main(int argc, char* argv[]) x2("a{3,2}b", "aab", 0, 3); x2("a{3,2}?", "", 0, 0); /* == (?:a{3,2})?*/ x2("a{2,3}+a", "aaa", 0, 3); /* == (?:a{2,3})+*/ + x2("[\\x{0}-\\x{7fffffff}]", "a", 0, 1); + x2("[\\x{7f}-\\x{7fffffff}]", "\xe5\xae\xb6", 0, 3); + x2("[a[cdef]]", "a", 0, 1); + n("[a[xyz]-c]", "b"); + x2("[a[xyz]-c]", "a", 0, 1); + x2("[a[xyz]-c]", "-", 0, 1); + x2("[a[xyz]-c]", "c", 0, 1); + + x2("((?(a)\\g<1>|b))", "aab", 0, 3); + x2("((?(a)\\g<1>))", "aab", 0, 2); + x2("(b(?(a)|\\g<1>))", "bba", 0, 3); + e("(()(?(2)\\g<1>))", "", ONIGERR_NEVER_ENDING_RECURSION); + + x2("(?i)st", "st", 0, 2); + x2("(?i)st", "St", 0, 2); + x2("(?i)st", "sT", 0, 2); + x2("(?i)st", "\xC5\xBFt", 0, 3); // U+017F + x2("(?i)st", "\xEF\xAC\x85", 0, 3); // U+FB05 + x2("(?i)st", "\xEF\xAC\x86", 0, 3); // U+FB06 + x2("(?i)ast", "Ast", 0, 3); + x2("(?i)ast", "ASt", 0, 3); + x2("(?i)ast", "AsT", 0, 3); + x2("(?i)ast", "A\xC5\xBFt", 0, 4); // U+017F + x2("(?i)ast", "A\xEF\xAC\x85", 0, 4); // U+FB05 + x2("(?i)ast", "A\xEF\xAC\x86", 0, 4); // U+FB06 + x2("(?i)stZ", "stz", 0, 3); + x2("(?i)stZ", "Stz", 0, 3); + x2("(?i)stZ", "sTz", 0, 3); + x2("(?i)stZ", "\xC5\xBFtz", 0, 4); // U+017F + x2("(?i)stZ", "\xEF\xAC\x85z", 0, 4); // U+FB05 + x2("(?i)stZ", "\xEF\xAC\x86z", 0, 4); // U+FB06 + x2("(?i)BstZ", "bstz", 0, 4); + x2("(?i)BstZ", "bStz", 0, 4); + x2("(?i)BstZ", "bsTz", 0, 4); + x2("(?i)BstZ", "b\xC5\xBFtz", 0, 5); // U+017F + x2("(?i)BstZ", "b\xEF\xAC\x85z", 0, 5); // U+FB05 + x2("(?i)BstZ", "b\xEF\xAC\x86z", 0, 5); // U+FB06 + x2("(?i).*st\\z", "tttssss\xC5\xBFt", 0, 10); // U+017F + x2("(?i).*st\\z", "tttssss\xEF\xAC\x85", 0, 10); // U+FB05 + x2("(?i).*st\\z", "tttssss\xEF\xAC\x86", 0, 10); // U+FB06 + x2("(?i).*あstい\\z", "tttssssあ\xC5\xBFtい", 0, 16); // U+017F + x2("(?i).*あstい\\z", "tttssssあ\xEF\xAC\x85い", 0, 16); // U+FB05 + x2("(?i).*あstい\\z", "tttssssあ\xEF\xAC\x86い", 0, 16); // U+FB06 + x2("(?i).*\xC5\xBFt\\z", "tttssssst", 0, 9); // U+017F + x2("(?i).*\xEF\xAC\x85\\z", "tttssssあst", 0, 12); // U+FB05 + x2("(?i).*\xEF\xAC\x86い\\z", "tttssssstい", 0, 12); // U+FB06 + x2("(?i).*\xEF\xAC\x85\\z", "tttssssあ\xEF\xAC\x85", 0, 13); + + x2("(?i).*ss", "abcdefghijklmnopqrstuvwxyz\xc3\x9f", 0, 28); // U+00DF + x2("(?i).*ss.*", "abcdefghijklmnopqrstuvwxyz\xc3\x9fxyz", 0, 31); // U+00DF + x2("(?i).*\xc3\x9f", "abcdefghijklmnopqrstuvwxyzss", 0, 28); // U+00DF + x2("(?i).*ss.*", "abcdefghijklmnopqrstuvwxyzSSxyz", 0, 31); + + x2("(?i)ssv", "\xc3\x9fv", 0, 3); // U+00DF + x2("(?i)(?<=ss)v", "SSv", 2, 3); + x2("(?i)(?<=\xc3\x9f)v", "\xc3\x9fv", 2, 3); + //x2("(?i)(?<=\xc3\x9f)v", "ssv", 2, 3); + //x2("(?i)(?<=ss)v", "\xc3\x9fv", 2, 3); + + /* #156 U+01F0 (UTF-8: C7 B0) */ + x2("(?i).+Isssǰ", ".+Isssǰ", 0, 8); + x2(".+Isssǰ", ".+Isssǰ", 0, 8); + x2("(?i)ǰ", "ǰ", 0, 2); + x2("(?i)ǰ", "j\xcc\x8c", 0, 3); + x2("(?i)j\xcc\x8c", "ǰ", 0, 2); + x2("(?i)5ǰ", "5ǰ", 0, 3); + x2("(?i)5ǰ", "5j\xcc\x8c", 0, 4); + x2("(?i)5j\xcc\x8c", "5ǰ", 0, 3); + x2("(?i)ǰv", "ǰV", 0, 3); + x2("(?i)ǰv", "j\xcc\x8cV", 0, 4); + x2("(?i)j\xcc\x8cv", "ǰV", 0, 3); + x2("(?i)[ǰ]", "ǰ", 0, 2); + x2("(?i)[ǰ]", "j\xcc\x8c", 0, 3); + //x2("(?i)[j]\xcc\x8c", "ǰ", 0, 2); + + n(" \xfd", ""); /* https://bugs.php.net/bug.php?id=77370 */ + /* can't use \xfc00.. because compiler error: hex escape sequence out of range */ + n("()0\\xfc00000\\xfc00000\\xfc00000\xfc", ""); /* https://bugs.php.net/bug.php?id=77371 */ + x2("000||0\xfa", "0", 0, 0); /* https://bugs.php.net/bug.php?id=77381 */ + e("(?i)000000000000000000000\xf0", "", ONIGERR_INVALID_CODE_POINT_VALUE); /* https://bugs.php.net/bug.php?id=77382 */ + n("0000\\\xf5", "0"); /* https://bugs.php.net/bug.php?id=77385 */ + n("(?i)FFF00000000000000000\xfd", ""); /* https://bugs.php.net/bug.php?id=77394 */ + e("x{55380}{77590}", "", ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); + e("(xyz){40000}{99999}(?<name>vv)", "", ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); + e("f{90000,90000}{80000,80000}", "", ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); + n("f{90000,90000}{80000,80001}", ""); x2("\\p{Common}", "\xe3\x8b\xbf", 0, 3); /* U+32FF */ x2("\\p{In_Enclosed_CJK_Letters_and_Months}", "\xe3\x8b\xbf", 0, 3); /* U+32FF */ + e("\\x{7fffffff}", "", ONIGERR_TOO_BIG_WIDE_CHAR_VALUE); + e("[\\x{7fffffff}]", "", ONIGERR_INVALID_CODE_POINT_VALUE); e("\\u040", "@", ONIGERR_INVALID_CODE_POINT_VALUE); e("(?<abc>\\g<abc>)", "zzzz", ONIGERR_NEVER_ENDING_RECURSION); e("(?<=(?>abc))", "abc", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); |