From 4216de6a3336cbc6dddb572cb7e6ab6193bf3729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Fri, 29 Nov 2019 11:26:35 +0100 Subject: New upstream version 6.9.4 --- test/Makefile.am | 11 +- test/test_regset.c | 465 +++++++++++++++++++++++++++++++++++++++++++++++++++++ test/test_utf8.c | 82 +++++++++- test/testc.c | 3 +- test/testu.c | 3 +- 5 files changed, 558 insertions(+), 6 deletions(-) create mode 100644 test/test_regset.c (limited to 'test') diff --git a/test/Makefile.am b/test/Makefile.am index 67b5d1e..4d62568 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -6,9 +6,9 @@ AM_CFLAGS = -Wall -Wno-invalid-source-encoding AM_CPPFLAGS = -I$(top_srcdir)/src if ENABLE_POSIX_API -TESTS = test_utf8 testc testp testcu +TESTS = test_utf8 testc testp testcu test_regset else -TESTS = test_utf8 testc testcu +TESTS = test_utf8 testc testcu test_regset endif check_PROGRAMS = $(TESTS) @@ -24,6 +24,9 @@ if ENABLE_POSIX_API endif @echo "[Oniguruma API, UTF-16 check]" @./testcu | grep RESULT + @echo "" + @echo "[Oniguruma API, regset check]" + @./test_regset test_uchar: @echo "[UChar in oniguruma.h check]" @@ -44,9 +47,13 @@ testp_CFLAGS = -DPOSIX_TEST -Wall -Wno-invalid-source-encoding testcu_SOURCES = testu.c testcu_LDADD = $(lib_onig) +test_regset_SOURCES = test_regset.c +test_regset_LDADD = $(lib_onig) + gcov: make CFLAGS="--coverage" test_utf8 make CFLAGS="--coverage" testc make CFLAGS="--coverage" testp make CFLAGS="--coverage" testcu + make CFLAGS="--coverage" test_regset diff --git a/test/test_regset.c b/test/test_regset.c new file mode 100644 index 0000000..497fbd6 --- /dev/null +++ b/test/test_regset.c @@ -0,0 +1,465 @@ +/* + * test_regset.c --- test for regset API + * Copyright (c) 2019 K.Kosako + */ +#include +#include +#include +#include + +#include "oniguruma.h" + +static int nsucc = 0; +static int nfail = 0; +static int nerror = 0; + + +static int +make_regset(int line_no, int n, char* pat[], OnigRegSet** rset, int error_no) +{ + int r; + int i; + OnigRegSet* set; + regex_t* reg; + OnigErrorInfo einfo; + + *rset = NULL; + r = onig_regset_new(&set, 0, NULL); + if (r != 0) return r; + + for (i = 0; i < n; i++) { + r = onig_new(®, (UChar* )pat[i], (UChar* )(pat[i] + strlen(pat[i])), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, + &einfo); + if (r != 0) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + + if (error_no == 0) { + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(stderr, "ERROR: %d: %s /%s/\n", line_no, s, pat[i]); + nerror++; + } + else { + if (r == error_no) { + fprintf(stdout, "OK(ERROR): %d: /%s/ %d\n", line_no, pat[i], r); + nsucc++; + } + else { + fprintf(stdout, "FAIL(ERROR): %d: /%s/ %d, %d\n", + line_no, pat[i], error_no, r); + nfail++; + } + } + return r; + } + + r = onig_regset_add(set, reg); + if (r != 0) { + onig_regset_free(set); + fprintf(stderr, "ERROR: %d: onig_regset_add(): /%s/\n", line_no, pat[i]); + nerror++; + return r; + } + } + + *rset = set; + return 0; +} + +#ifndef _WIN32 + +static double +get_sec(struct timespec* ts, struct timespec* te) +{ + double t; + + t = (te->tv_sec - ts->tv_sec) + + (double )(te->tv_nsec - ts->tv_nsec) / 1000000000.0; + return t; +} + +/* clock_gettime() doesn't exist in Windows */ + +static int +time_test(int repeat, int n, char* ps[], char* s, char* end, double* rt_set, double* rt_reg) +{ + int r; + int i; + int match_pos; + OnigRegSet* set; + struct timespec ts1, ts2; + double t_set, t_reg; + + r = make_regset(0, n, ps, &set, 0); + if (r != 0) return r; + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts1); + + for (i = 0; i < repeat; i++) { + r = onig_regset_search(set, (UChar* )s, (UChar* )end, (UChar* )s, (UChar* )end, + ONIG_REGSET_POSITION_LEAD, ONIG_OPTION_NONE, &match_pos); + if (r < 0) { + fprintf(stderr, "FAIL onig_regset_search(POSITION_LEAD): %d\n", r); + return r; + } + } + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts2); + t_set = get_sec(&ts1, &ts2); + + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts1); + + for (i = 0; i < repeat; i++) { + r = onig_regset_search(set, (UChar* )s, (UChar* )end, (UChar* )s, (UChar* )end, + ONIG_REGSET_REGEX_LEAD, ONIG_OPTION_NONE, &match_pos); + if (r < 0) { + fprintf(stderr, "FAIL onig_regset_search(REGEX_LEAD): %d\n", r); + return r; + } + } + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts2); + onig_regset_free(set); + + t_reg = get_sec(&ts1, &ts2); + + *rt_set = t_set; + *rt_reg = t_reg; + return 0; +} +#endif + +static void +fisher_yates_shuffle(int n, char* ps[], char* cps[]) +{ +#define GET_RAND(n) (rand()%(n+1)) +#define SWAP(a,b) { char* tmp = a; a = b; b = tmp; } + + int i; + + for (i = 0; i < n; i++) + cps[i] = ps[i]; + + for (i = n - 1; i > 0; i--) { + int x = GET_RAND(i); + SWAP(cps[i], cps[x]); + } +} + +#ifndef _WIN32 +static void +time_compare(int n, char* ps[], char* s, char* end) +{ + int r; + int i; + int repeat; + double t_set, t_reg; + double total_set, total_reg; + char** cps; + + cps = (char** )malloc(sizeof(char*) * n); + if (cps == 0) return ; + + repeat = 100 / n; + total_set = total_reg = 0.0; + for (i = 0; i < n; i++) { + fisher_yates_shuffle(n, ps, cps); + r = time_test(repeat, n, cps, s, end, &t_set, &t_reg); + if (r != 0) return ; + total_set += t_set; + total_reg += t_reg; + } + + free(cps); + + fprintf(stdout, "POS lead: %6.2lfmsec. REG lead: %6.2lfmsec.\n", + total_set * 1000.0, total_reg * 1000.0); +} +#endif + + +static OnigRegSetLead XX_LEAD = ONIG_REGSET_POSITION_LEAD; + +static void +xx(int line_no, int n, char* ps[], char* s, int from, int to, int mem, int not, int error_no) +{ + int r; + int match_pos; + int match_index; + OnigRegSet* set; + char *end; + + r = make_regset(line_no, n, ps, &set, error_no); + if (r != 0) return ; + + end = s + strlen(s); + + r = onig_regset_search(set, (UChar* )s, (UChar* )end, (UChar* )s, (UChar* )end, + XX_LEAD, ONIG_OPTION_NONE, &match_pos); + if (r < 0) { + if (r == ONIG_MISMATCH) { + if (not) { + fprintf(stdout, "OK(N): %d\n", line_no); + nsucc++; + } + else { + fprintf(stdout, "FAIL: %d\n", line_no); + nfail++; + } + } + else { + if (error_no == 0) { + char buf[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )buf, r); + fprintf(stderr, "ERROR: %d: %s\n", line_no, buf); + nerror++; + } + else { + if (r == error_no) { + fprintf(stdout, "OK(ERROR): %d: %d\n", line_no, r); + nsucc++; + } + else { + fprintf(stdout, "FAIL ERROR NO: %d: %d, %d\n", line_no, error_no, r); + nfail++; + } + } + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): %d\n", line_no); + nfail++; + } + else { + OnigRegion* region; + + match_index = r; + region = onig_regset_get_region(set, match_index); + if (region == 0) { + fprintf(stderr, "ERROR: %d: can't get region.\n", line_no); + nerror++; + return ; + } + + if (region->beg[mem] == from && region->end[mem] == to) { + fprintf(stdout, "OK: %d\n", line_no); + nsucc++; + } + else { + char buf[1000]; + int len; + len = region->end[mem] - region->beg[mem]; + strncpy(buf, s + region->beg[mem], len); + buf[len] = '\0'; + fprintf(stdout, "FAIL: %d: %d-%d : %d-%d (%s)\n", line_no, + from, to, region->beg[mem], region->end[mem], buf); + nfail++; + } + } + } + + onig_regset_free(set); +} + +static void +x2(int line_no, int n, char* ps[], char* s, int from, int to) +{ + xx(line_no, n, ps, s, from, to, 0, 0, 0); +} + +static void +x3(int line_no, int n, char* ps[], char* s, int from, int to, int mem) +{ + xx(line_no, n, ps, s, from, to, mem, 0, 0); +} + +static void +n(int line_no, int n, char* ps[], char* s) +{ + xx(line_no, n, ps, s, 0, 0, 0, 1, 0); +} + +#define ASIZE(a) sizeof(a)/sizeof(a[0]) +#define X2(ps,s,from,to) x2(__LINE__,ASIZE(ps),ps,s,from,to) +#define X3(ps,s,from,to,mem) x3(__LINE__,ASIZE(ps),ps,s,from,to,mem) +#define N(ps,s) n(__LINE__,ASIZE(ps),ps,s) +#define NZERO(s) n(__LINE__,0,(char** )0,s) + +#ifndef _WIN32 + +/* getdelim() doesn't exist in Windows */ + +static int +get_all_content_of_file(char* path, char** rs, char** rend) +{ + size_t len; + size_t n; + char* line; + FILE* fp; + + fp = fopen(path, "r"); + if (fp == 0) return -1; + + n = 0; + line = NULL; + len = getdelim(&line, &n, EOF, fp); + fclose(fp); + if (len < 0) return -2; + + *rs = line; + *rend = line + len; + return 0; +} +#endif + + +#define TEXT_PATH "kofu-utf8.txt" + +/* --- To get kofu.txt --- + $ wget https://www.aozora.gr.jp/cards/000148/files/774_ruby_1640.zip + $ unzip 774_ruby_1640.zip + $ nkf -Lu -w8 kofu.txt > kofu-utf8.txt + (convert encoding to utf-8 with BOM and line terminator to be Unix-form) +*/ + +static char* p1[] = { + "abc", + "(bca)", + "(cab)" +}; + +static char* p2[] = { + "小説", + "9", + "夏目漱石", +}; + +static char* p3[] = { + "^いる。", + "^校正", + "^底本", + "^ 翌日", +}; + +static char* p4[] = { + "《[^》]{5}》", + "《[^》]{6}》", + "《[^》]{7}》", + "《[^》]{8}》", + "《[^》]{9}》", + "《[^》]{10}》", + "《[^》]{11}》", + "《[^》]{12}》", + "《[^》]{13}》", + "《[^》]{14}》", + "《[^》]{15}》", + "《[^》]{16}》", + "《[^》]{17}》", + "《[^》]{18}》", + "《[^》]{19}》", + "《[^》]{20}》", +}; + +static char* p5[] = { + "小室圭", + "bbbbbb", + "ドナルド・トランプ", + "筑摩書房", + "松原", + "aaaaaaaaa", + "bbbbbbbbb", + "ccccc", + "ddddddddddd", + "eee", + "ffffffffffff", + "gggggggggg", + "hhhhhhhhhhhhhh", + "iiiiiii", +}; + +static char* p6[] = { + "^.{1000,}", + "松原", + "小室圭", + "ドナルド・トランプ", + "筑摩書房", +}; + +static char* p7[] = { + "0+", "1+", "2+", "3+", "4+", "5+", "6+", "7+", "8+", "9+", +}; + +extern int +main(int argc, char* argv[]) +{ + int r; + int file_exist; + char *s, *end; + OnigEncoding use_encs[1]; + + use_encs[0] = ONIG_ENCODING_UTF8; + onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); + + srand(12345); + + XX_LEAD = ONIG_REGSET_POSITION_LEAD; + + NZERO(" abab bccab ca"); + X2(p1, " abab bccab ca", 8, 11); + X3(p1, " abab bccab ca", 8, 11, 1); + N(p2, " XXXX AAA 1223 012345678bbb"); + X2(p2, "0123456789", 9, 10); + X2(p7, "abcde 555 qwert", 6, 9); + + XX_LEAD = ONIG_REGSET_REGEX_LEAD; + + NZERO(" abab bccab ca"); + X2(p1, " abab bccab ca", 8, 11); + X3(p1, " abab bccab ca", 8, 11, 1); + N(p2, " XXXX AAA 1223 012345678bbb"); + X2(p2, "0123456789", 9, 10); + X2(p7, "abcde 555 qwert", 6, 9); + +#ifndef _WIN32 + r = get_all_content_of_file(TEXT_PATH, &s, &end); + if (r == 0) { + fprintf(stdout, "FILE: %s, size: %d\n", TEXT_PATH, (int )(end - s)); + file_exist = 1; + } + else { + fprintf(stdout, "Ignore %s\n", TEXT_PATH); + file_exist = 0; + } +#else + file_exist = 0; +#endif + + if (file_exist != 0) { + X2(p2, s, 10, 22); + X2(p3, s, 496079, 496088); + X2(p4, s, 1294, 1315); + } + + fprintf(stdout, + "\nRESULT SUCC: %4d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", + nsucc, nfail, nerror, onig_version()); + + if (file_exist != 0) { +#ifndef _WIN32 + fprintf(stdout, "\n"); + time_compare(ASIZE(p2), p2, s, end); + time_compare(ASIZE(p3), p3, s, end); + time_compare(ASIZE(p4), p4, s, end); + time_compare(ASIZE(p5), p5, s, end); + time_compare(ASIZE(p6), p6, s, end); + fprintf(stdout, "\n"); +#endif + free(s); + } + + onig_end(); + + return ((nfail == 0 && nerror == 0) ? 0 : -1); +} diff --git a/test/test_utf8.c b/test/test_utf8.c index 2338526..d6fc761 100644 --- a/test/test_utf8.c +++ b/test/test_utf8.c @@ -132,8 +132,9 @@ static void e(char* pattern, char* str, int error_no) extern int main(int argc, char* argv[]) { - static OnigEncoding use_encs[] = { ONIG_ENCODING_UTF8 }; + OnigEncoding use_encs[1]; + use_encs[0] = ONIG_ENCODING_UTF8; onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); err_file = stdout; @@ -298,6 +299,8 @@ extern int main(int argc, char* argv[]) x2("(?i:xssy)", "xs\xc5\xbfy", 0, 5); x2("(?i:xssy)", "x\xc3\x9fy", 0, 4); x2("(?i:xssy)", "x\xe1\xba\x9ey", 0, 5); + x2("(?i:x\xc3\x9fy)", "xssy", 0, 4); + x2("(?i:x\xc3\x9fy)", "xSSy", 0, 4); x2("(?i:\xc3\x9f)", "ss", 0, 2); x2("(?i:\xc3\x9f)", "SS", 0, 2); x2("(?i:[\xc3\x9f])", "ss", 0, 2); @@ -1204,6 +1207,78 @@ extern int main(int argc, char* argv[]) x2("a{2,3}+a", "aaa", 0, 3); /* == (?:a{2,3})+*/ x2("[\\x{0}-\\x{7fffffff}]", "a", 0, 1); x2("[\\x{7f}-\\x{7fffffff}]", "\xe5\xae\xb6", 0, 3); + x2("[a[cdef]]", "a", 0, 1); + n("[a[xyz]-c]", "b"); + x2("[a[xyz]-c]", "a", 0, 1); + x2("[a[xyz]-c]", "-", 0, 1); + x2("[a[xyz]-c]", "c", 0, 1); + + x2("((?(a)\\g<1>|b))", "aab", 0, 3); + x2("((?(a)\\g<1>))", "aab", 0, 2); + x2("(b(?(a)|\\g<1>))", "bba", 0, 3); + e("(()(?(2)\\g<1>))", "", ONIGERR_NEVER_ENDING_RECURSION); + + x2("(?i)st", "st", 0, 2); + x2("(?i)st", "St", 0, 2); + x2("(?i)st", "sT", 0, 2); + x2("(?i)st", "\xC5\xBFt", 0, 3); // U+017F + x2("(?i)st", "\xEF\xAC\x85", 0, 3); // U+FB05 + x2("(?i)st", "\xEF\xAC\x86", 0, 3); // U+FB06 + x2("(?i)ast", "Ast", 0, 3); + x2("(?i)ast", "ASt", 0, 3); + x2("(?i)ast", "AsT", 0, 3); + x2("(?i)ast", "A\xC5\xBFt", 0, 4); // U+017F + x2("(?i)ast", "A\xEF\xAC\x85", 0, 4); // U+FB05 + x2("(?i)ast", "A\xEF\xAC\x86", 0, 4); // U+FB06 + x2("(?i)stZ", "stz", 0, 3); + x2("(?i)stZ", "Stz", 0, 3); + x2("(?i)stZ", "sTz", 0, 3); + x2("(?i)stZ", "\xC5\xBFtz", 0, 4); // U+017F + x2("(?i)stZ", "\xEF\xAC\x85z", 0, 4); // U+FB05 + x2("(?i)stZ", "\xEF\xAC\x86z", 0, 4); // U+FB06 + x2("(?i)BstZ", "bstz", 0, 4); + x2("(?i)BstZ", "bStz", 0, 4); + x2("(?i)BstZ", "bsTz", 0, 4); + x2("(?i)BstZ", "b\xC5\xBFtz", 0, 5); // U+017F + x2("(?i)BstZ", "b\xEF\xAC\x85z", 0, 5); // U+FB05 + x2("(?i)BstZ", "b\xEF\xAC\x86z", 0, 5); // U+FB06 + x2("(?i).*st\\z", "tttssss\xC5\xBFt", 0, 10); // U+017F + x2("(?i).*st\\z", "tttssss\xEF\xAC\x85", 0, 10); // U+FB05 + x2("(?i).*st\\z", "tttssss\xEF\xAC\x86", 0, 10); // U+FB06 + x2("(?i).*あstい\\z", "tttssssあ\xC5\xBFtい", 0, 16); // U+017F + x2("(?i).*あstい\\z", "tttssssあ\xEF\xAC\x85い", 0, 16); // U+FB05 + x2("(?i).*あstい\\z", "tttssssあ\xEF\xAC\x86い", 0, 16); // U+FB06 + x2("(?i).*\xC5\xBFt\\z", "tttssssst", 0, 9); // U+017F + x2("(?i).*\xEF\xAC\x85\\z", "tttssssあst", 0, 12); // U+FB05 + x2("(?i).*\xEF\xAC\x86い\\z", "tttssssstい", 0, 12); // U+FB06 + x2("(?i).*\xEF\xAC\x85\\z", "tttssssあ\xEF\xAC\x85", 0, 13); + + x2("(?i).*ss", "abcdefghijklmnopqrstuvwxyz\xc3\x9f", 0, 28); // U+00DF + x2("(?i).*ss.*", "abcdefghijklmnopqrstuvwxyz\xc3\x9fxyz", 0, 31); // U+00DF + x2("(?i).*\xc3\x9f", "abcdefghijklmnopqrstuvwxyzss", 0, 28); // U+00DF + x2("(?i).*ss.*", "abcdefghijklmnopqrstuvwxyzSSxyz", 0, 31); + + x2("(?i)ssv", "\xc3\x9fv", 0, 3); // U+00DF + x2("(?i)(?<=ss)v", "SSv", 2, 3); + x2("(?i)(?<=\xc3\x9f)v", "\xc3\x9fv", 2, 3); + //x2("(?i)(?<=\xc3\x9f)v", "ssv", 2, 3); + //x2("(?i)(?<=ss)v", "\xc3\x9fv", 2, 3); + + /* #156 U+01F0 (UTF-8: C7 B0) */ + x2("(?i).+Isssǰ", ".+Isssǰ", 0, 8); + x2(".+Isssǰ", ".+Isssǰ", 0, 8); + x2("(?i)ǰ", "ǰ", 0, 2); + x2("(?i)ǰ", "j\xcc\x8c", 0, 3); + x2("(?i)j\xcc\x8c", "ǰ", 0, 2); + x2("(?i)5ǰ", "5ǰ", 0, 3); + x2("(?i)5ǰ", "5j\xcc\x8c", 0, 4); + x2("(?i)5j\xcc\x8c", "5ǰ", 0, 3); + x2("(?i)ǰv", "ǰV", 0, 3); + x2("(?i)ǰv", "j\xcc\x8cV", 0, 4); + x2("(?i)j\xcc\x8cv", "ǰV", 0, 3); + x2("(?i)[ǰ]", "ǰ", 0, 2); + x2("(?i)[ǰ]", "j\xcc\x8c", 0, 3); + //x2("(?i)[j]\xcc\x8c", "ǰ", 0, 2); n(" \xfd", ""); /* https://bugs.php.net/bug.php?id=77370 */ /* can't use \xfc00.. because compiler error: hex escape sequence out of range */ @@ -1212,7 +1287,10 @@ extern int main(int argc, char* argv[]) e("(?i)000000000000000000000\xf0", "", ONIGERR_INVALID_CODE_POINT_VALUE); /* https://bugs.php.net/bug.php?id=77382 */ n("0000\\\xf5", "0"); /* https://bugs.php.net/bug.php?id=77385 */ n("(?i)FFF00000000000000000\xfd", ""); /* https://bugs.php.net/bug.php?id=77394 */ - + e("x{55380}{77590}", "", ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); + e("(xyz){40000}{99999}(?vv)", "", ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); + e("f{90000,90000}{80000,80000}", "", ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); + n("f{90000,90000}{80000,80001}", ""); x2("\\p{Common}", "\xe3\x8b\xbf", 0, 3); /* U+32FF */ x2("\\p{In_Enclosed_CJK_Letters_and_Months}", "\xe3\x8b\xbf", 0, 3); /* U+32FF */ diff --git a/test/testc.c b/test/testc.c index c3174cd..5c60764 100644 --- a/test/testc.c +++ b/test/testc.c @@ -153,8 +153,9 @@ static void n(char* pattern, char* str) extern int main(int argc, char* argv[]) { #ifndef POSIX_TEST - static OnigEncoding use_encs[] = { ONIG_ENCODING_EUC_JP }; + OnigEncoding use_encs[1]; + use_encs[0] = ONIG_ENCODING_EUC_JP; onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); #endif diff --git a/test/testu.c b/test/testu.c index 397da95..24397ab 100644 --- a/test/testu.c +++ b/test/testu.c @@ -190,8 +190,9 @@ static void n(char* pattern, char* str) extern int main(int argc, char* argv[]) { - static OnigEncoding use_encs[] = { ONIG_ENCODING_UTF16_BE }; + OnigEncoding use_encs[1]; + use_encs[0] = ONIG_ENCODING_UTF16_BE; onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); err_file = stdout; -- cgit v1.2.3