diff options
Diffstat (limited to 'test/test_utf8.c')
-rw-r--r-- | test/test_utf8.c | 184 |
1 files changed, 181 insertions, 3 deletions
diff --git a/test/test_utf8.c b/test/test_utf8.c index d6fc761..1bbc071 100644 --- a/test/test_utf8.c +++ b/test/test_utf8.c @@ -1,6 +1,6 @@ /* * test_utf8.c - * Copyright (c) 2019 K.Kosako + * Copyright (c) 2019-2020 K.Kosako */ #include "config.h" #ifdef ONIG_ESCAPE_UCHAR_COLLISION @@ -685,7 +685,7 @@ extern int main(int argc, char* argv[]) x2("aaaaa(?~)", "aaaaaaaaaa", 0, 5); x2("(?~(?:|aaa))", "aaa", 0, 0); x2("(?~aaa|)", "aaa", 0, 0); - x2("a(?~(?~)).", "abcdefghijklmnopqrstuvwxyz", 0, 26); // !!! + x2("a(?~(?~)).", "abcdefghijklmnopqrstuvwxyz", 0, 26); // nested absent functions cause strange result x2("/\\*(?~\\*/)\\*/", "/* */ */", 0, 5); x2("(?~\\w+)zzzzz", "zzzzz", 0, 5); x2("(?~\\w*)zzzzz", "zzzzz", 0, 5); @@ -1198,6 +1198,15 @@ extern int main(int argc, char* argv[]) x2("\\g'0'++{,0}?", "abcdefgh", 0, 0); x2("\\g'0'++{,0}b", "abcdefgh", 1, 2); x2("\\g'0'++{,0}?def", "abcdefgh", 3, 6); + x2("a{1,3}?", "aaa", 0, 1); + x2("a{3}", "aaa", 0, 3); + x2("a{3}?", "aaa", 0, 3); + x2("a{3}?", "aa", 0, 0); + x2("a{3,3}?", "aaa", 0, 3); + n("a{3,3}?", "aa"); + x2("a{1,3}+", "aaaaaa", 0, 6); + x2("a{3}+", "aaaaaa", 0, 6); + x2("a{3,3}+", "aaaaaa", 0, 6); n("a{2,3}?", "a"); n("a{3,2}a", "aaa"); x2("a{3,2}b", "aaab", 0, 4); @@ -1212,11 +1221,113 @@ extern int main(int argc, char* argv[]) x2("[a[xyz]-c]", "a", 0, 1); x2("[a[xyz]-c]", "-", 0, 1); x2("[a[xyz]-c]", "c", 0, 1); + x2("(a.c|def)(.{4})(?<=\\1)", "abcdabc", 0, 7); + x2("(a.c|de)(.{4})(?<=\\1)", "abcdabc", 0, 7); + x2("(a.c|def)(.{5})(?<=d\\1e)", "abcdabce", 0, 8); + x2("(a.c|.)d(?<=\\k<1>d)", "zzzzzabcdabc", 5, 9); + x2("(?<=az*)abc", "azzzzzzzzzzabcdabcabc", 11, 14); + x2("(?<=ab|abc|abcd)ef", "abcdef", 4, 6); + x2("(?<=ta+|tb+|tc+|td+)zz", "tcccccccccczz", 11, 13); + x2("(?<=t.{7}|t.{5}|t.{2}|t.)zz", "tczz", 2, 4); + x2("(?<=t.{7}|t.{5}|t.{2})zz", "tczzzz", 3, 5); + x2("(?<=t.{7}|t.{5}|t.{3})zz", "tczzazzbzz", 8, 10); + n("(?<=t.{7}|t.{5}|t.{3})zz", "tczzazzbczz"); + x2("(?<=(ab|abc|abcd))ef", "abcdef", 4, 6); + x2("(?<=(ta+|tb+|tc+|td+))zz", "tcccccccccczz", 11, 13); + x2("(?<=(t.{7}|t.{5}|t.{2}|t.))zz", "tczz", 2, 4); + x2("(?<=(t.{7}|t.{5}|t.{2}))zz", "tczzzz", 3, 5); + x2("(?<=(t.{7}|t.{5}|t.{3}))zz", "tczzazzbzz", 8, 10); + n("(?<=(t.{7}|t.{5}|t.{3}))zz", "tczzazzbczz"); + x2("(.{1,4})(.{1,4})(?<=\\2\\1)", "abaaba", 0, 6); + x2("(.{1,4})(.{1,4})(?<=\\2\\1)", "ababab", 0, 6); + n("(.{1,4})(.{1,4})(?<=\\2\\1)", "abcdabce"); + x2("(.{1,4})(.{1,4})(?<=\\2\\1)", "abcdabceabce", 4, 12); + x2("(?<=a)", "a", 1, 1); + x2("(?<=a.*\\w)z", "abbbz", 4, 5); + n("(?<=a.*\\w)z", "abb z"); + x2("(?<=a.*\\W)z", "abb z", 4, 5); + x2("(?<=a.*\\b)z", "abb z", 4, 5); + x2("(?<=(?>abc))", "abc", 3, 3); + x2("(?<=a\\Xz)", "abz", 3, 3); + n("(?<=^a*)bc", "zabc"); + n("(?<=a*\\b)b", "abc"); + x2("(?<=a+.*[efg])z", "abcdfz", 5, 6); + x2("(?<=a+.*[efg])z", "abcdfgz", 6, 7); + n("(?<=a+.*[efg])z", "bcdfz"); + x2("(?<=a*.*[efg])z", "bcdfz", 4, 5); + n("(?<=a+.*[efg])z", "abcdz"); + x2("(?<=v|t|a+.*[efg])z", "abcdfz", 5, 6); + x2("(?<=v|t|^a+.*[efg])z", "abcdfz", 5, 6); + x2("(?<=^(?:v|t|a+.*[efg]))z", "abcdfz", 5, 6); + x2("(?<=v|^t|a+.*[efg])z", "uabcdfz", 6, 7); + n("^..(?<=(a{,2}))\\1z", "aaaaz"); // !!! look-behind is shortest priority + x2("^..(?<=(a{,2}))\\1z", "aaz", 0, 3); // shortest priority + e("(?<=(?~|zoo)a.*z)", "abcdefz", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); + e("(?<=(?~|)a.*z)", "abcdefz", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); + e("(a(?~|boo)z){0}(?<=\\g<1>)", "abcdefz", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); + x2("(?<=(?<= )| )", "abcde fg", 6, 6); // #173 + x2("(?<=D|)(?<=@!nnnnnnnnnIIIIn;{1}D?()|<x@x*xxxD|)(?<=@xxx|xxxxx\\g<1>;{1}x)", "(?<=D|)(?<=@!nnnnnnnnnIIIIn;{1}D?()|<x@x*xxxD|)(?<=@xxx|xxxxx\\g<1>;{1}x)", 55, 55); // #173 + x2("(?<=;()|)\\g<1>", "", 0, 0); // reduced #173 + x2("(?<=;()|)\\k<1>", ";", 1, 1); + x2("(())\\g<3>{0}(?<=|())", "abc", 0, 0); // #175 + x2("(?<=()|)\\1{0}", "abc", 0, 0); + e("(?<!xxxxxxxxxxxxxxxxxxxxxxx{32774}{65521}xxxxxxxx{65521}xxxxxxxxxxxxxx{32774}xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx)", "", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); // #177 + x2("(?<=(?<=abc))def", "abcdef", 3, 6); + x2("(?<=ab(?<=.+b)c)def", "abcdef", 3, 6); + n("(?<=ab(?<=a+)c)def", "abcdef"); + n("(?<=abc)(?<!abc)def", "abcdef"); + n("(?<!ab.)(?<=.bc)def", "abcdef"); + x2("(?<!ab.)(?<=.bc)def", "abcdefcbcdef", 9, 12); + n("(?<!abc)def", "abcdef"); + n("(?<!xxx|abc)def", "abcdef"); + n("(?<!xxxxx|abc)def", "abcdef"); + n("(?<!xxxxx|abc)def", "xxxxxxdef"); + n("(?<!x+|abc)def", "abcdef"); + n("(?<!x+|abc)def", "xxxxxxxxxdef"); + x2("(?<!x+|abc)def", "xxxxxxxxzdef", 9, 12); + n("(?<!a.*z|a)def", "axxxxxxxzdef"); + n("(?<!a.*z|a)def", "bxxxxxxxadef"); + x2("(?<!a.*z|a)def", "axxxxxxxzdefxxdef", 14, 17); + x2("(?<!a.*z|a)def", "bxxxxxxxadefxxdef", 14, 17); + x2("(?<!a.*z|a)def", "bxxxxxxxzdef", 9, 12); + x2("(?<!x+|y+)\\d+", "xxx572", 4, 6); + x2("(?<!3+|4+)\\d+", "33334444", 0, 8); + n(".(?<!3+|4+)\\d+", "33334444"); + n("(.{,3})..(?<!\\1)", "aaaaa"); + x2("(.{,3})..(?<!\\1)", "abcde", 0, 5); + x2("(.{,3})...(?<!\\1)", "abcde", 0, 5); + x2("(a.c)(.{3,}?)(?<!\\1)", "abcabcd", 0, 7); + x2("(a*)(.{3,}?)(?<!\\1)", "abcabcd", 0, 5); + x2("(?:(a.*b)|c.*d)(?<!(?(1))azzzb)", "azzzzb", 0, 6); + n("(?:(a.*b)|c.*d)(?<!(?(1))azzzb)", "azzzb"); + x2("<(?<!NT{+}abcd)", "<(?<!NT{+}abcd)", 0, 1); + x2("(?<!a.*c)def", "abbbbdef", 5, 8); + n("(?<!a.*c)def", "abbbcdef"); + x2("(?<!a.*X\\b)def", "abbbbbXdef", 7, 10); + n("(?<!a.*X\\B)def", "abbbbbXdef"); + x2("(?<!a.*[uvw])def", "abbbbbXdef", 7, 10); + n("(?<!a.*[uvw])def", "abbbbbwdef"); + x2("(?<!ab*\\S+)def", "abbbbb def", 9, 12); + x2("(?<!a.*\\S)def", "abbbbb def", 7, 10); + n("(?<!ab*\\s+)def", "abbbbb def"); + x2("(?<!ab*\\s+\\B)def", "abbbbb def", 9, 12); + n("(?<!v|t|a+.*[efg])z", "abcdfz"); + x2("(?<!v|t|a+.*[efg])z", "abcdfzavzuz", 10, 11); + n("(?<!v|t|^a+.*[efg])z", "abcdfz"); + n("(?<!^(?:v|t|a+.*[efg]))z", "abcdfz"); + x2("(?<!v|^t|^a+.*[efg])z", "uabcdfz", 6, 7); x2("((?(a)\\g<1>|b))", "aab", 0, 3); x2("((?(a)\\g<1>))", "aab", 0, 2); x2("(b(?(a)|\\g<1>))", "bba", 0, 3); e("(()(?(2)\\g<1>))", "", ONIGERR_NEVER_ENDING_RECURSION); + x2("(?(a)(?:b|c))", "ac", 0, 2); + n("^(?(a)b|c)", "ac"); + x2("(?i)a|b", "B", 0, 1); + n("((?i)a|b.)|c", "C"); + n("c(?i)a.|b.", "Caz"); + x2("c(?i)a|b", "cB", 0, 2); /* == c(?i:a|b) */ + x2("c(?i)a.|b.", "cBb", 0, 3); x2("(?i)st", "st", 0, 2); x2("(?i)st", "St", 0, 2); @@ -1279,7 +1390,68 @@ extern int main(int argc, char* argv[]) x2("(?i)[ǰ]", "ǰ", 0, 2); x2("(?i)[ǰ]", "j\xcc\x8c", 0, 3); //x2("(?i)[j]\xcc\x8c", "ǰ", 0, 2); + x2("(?i)\ufb00a", "ffa", 0, 3); + x2("(?i)ffz", "\xef\xac\x80z", 0, 4); + x2("(?i)\u2126", "\xcf\x89", 0, 2); + x2("a(?i)\u2126", "a\xcf\x89", 0, 3); + x2("(?i)A\u2126", "a\xcf\x89", 0, 3); + x2("(?i)A\u2126=", "a\xcf\x89=", 0, 4); + x2("(?i:ss)=1234567890", "\xc5\xbf\xc5\xbf=1234567890", 0, 15); + x2("\\x{000A}", "\x0a", 0, 1); + x2("\\x{000A 002f}", "\x0a\x2f", 0, 2); + x2("\\x{000A 002f }", "\x0a\x2f", 0, 2); + x2("\\x{007C 001b}", "\x7c\x1b", 0, 2); + x2("\\x{1 2 3 4 5 6 7 8 9 a b c d e f}", "\x01\x02\x3\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", 0, 15); + x2("a\\x{000A 002f}@", "a\x0a\x2f@", 0, 4); + x2("a\\x{0060\n0063}@", "a\x60\x63@", 0, 4); + e("\\x{00000001 000000012}", "", ONIGERR_TOO_LONG_WIDE_CHAR_VALUE); + e("\\x{000A 00000002f}", "", ONIGERR_TOO_LONG_WIDE_CHAR_VALUE); + e("\\x{000A 002f/", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\x{000A 002f /", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\x{000A", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\x{000A ", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\x{000A 002f ", "", ONIGERR_INVALID_CODE_POINT_VALUE); + x2("\\o{102}", "B", 0, 1); + x2("\\o{102 103}", "BC", 0, 2); + x2("\\o{0160 0000161}", "pq", 0, 2); + x2("\\o{1 2 3 4 5 6 7 10 11 12 13 14 15 16 17}", "\x01\x02\x3\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", 0, 15); + x2("\\o{0007 0010 }", "\x07\x08", 0, 2); + e("\\o{0000 0015/", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\o{0000 0015 /", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\o{0015", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\o{0015 ", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("\\o{0007 002f}", "", ONIGERR_INVALID_CODE_POINT_VALUE); + x2("[\\x{000A}]", "\x0a", 0, 1); + x2("[\\x{000A 002f}]+", "\x0a\x2f\x2e", 0, 2); + x2("[\\x{01 0F 1A 2c 4B}]+", "\x20\x01\x0f\x1a\x2c\x4b\x1b", 1, 6); + x2("[\\x{0020 0024}-\\x{0026}]+", "\x25\x24\x26\x23", 0, 3); + x2("[\\x{0030}-\\x{0033 005a}]+", "\x30\x31\x32\x33\x5a\34", 0, 5); + e("[\\x{000A]", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("[\\x{000A ]", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("[\\x{000A }]", "", ONIGERR_INVALID_CODE_POINT_VALUE); + x2("[\\o{102}]", "B", 0, 1); + x2("[\\o{102 103}]*", "BC", 0, 2); + e("[a\\o{002 003]bcde|zzz", "", ONIGERR_INVALID_CODE_POINT_VALUE); + x2("[\\x{0030-0039}]+", "abc0123456789def", 3, 13); + x2("[\\x{0030 - 0039 }]+", "abc0123456789def", 3, 13); + x2("[\\x{0030 - 0039 0063 0064}]+", "abc0123456789def", 2, 14); + x2("[\\x{0030 - 0039 0063-0065}]+", "acde019b", 1, 7); + e("[\\x{0030 - 0039-0063 0064}]+", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("[\\x{0030 - }]+", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("[\\x{0030 -- 0040}]+", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("[\\x{0030--0040}]+", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("[\\x{0030 - - 0040}]+", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("[\\x{0030 0044 - }]+", "", ONIGERR_INVALID_CODE_POINT_VALUE); + e("[a-\\x{0070 - 0039}]+", "", ONIGERR_INVALID_CODE_POINT_VALUE); + x2("[a-\\x{0063 0071}]+", "dabcqz", 1, 5); + x2("[-\\x{0063-0065}]+", "ace-df", 1, 5); + x2("[\\x61-\\x{0063 0065}]+", "abced", 0, 4); + e("[\\x61-\\x{0063-0065}]+", "", ONIGERR_INVALID_CODE_POINT_VALUE); + x2("[t\\x{0063 0071}]+", "tcqb", 0, 3); + x2("[\\W\\x{0063 0071}]+", "*cqa", 0, 3); + + n("a(b|)+d", "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcd"); /* https://www.haijin-boys.com/discussions/5079 */ n(" \xfd", ""); /* https://bugs.php.net/bug.php?id=77370 */ /* can't use \xfc00.. because compiler error: hex escape sequence out of range */ n("()0\\xfc00000\\xfc00000\\xfc00000\xfc", ""); /* https://bugs.php.net/bug.php?id=77371 */ @@ -1299,8 +1471,14 @@ extern int main(int argc, char* argv[]) e("[\\x{7fffffff}]", "", ONIGERR_INVALID_CODE_POINT_VALUE); e("\\u040", "@", ONIGERR_INVALID_CODE_POINT_VALUE); e("(?<abc>\\g<abc>)", "zzzz", ONIGERR_NEVER_ENDING_RECURSION); - e("(?<=(?>abc))", "abc", ONIGERR_INVALID_LOOK_BEHIND_PATTERN); e("(*FOO)", "abcdefg", ONIGERR_UNDEFINED_CALLOUT_NAME); + e("*", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); + e("|*", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); + e("(?i)*", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); + e("(?:*)", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); + e("(?m:*)", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); + x2("(?:)*", "abc", 0, 0); + e("^*", "abc", ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID); fprintf(stdout, "\nRESULT SUCC: %4d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", |