From fc9ba4264eafbb5a6ec0f3cc4cd2e1964c9b8fcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Sun, 8 Nov 2020 10:58:30 +0100 Subject: New upstream version 6.9.6 --- .gitignore | 1 + CMakeLists.txt | 25 +- HISTORY | 58 +- Makefile.am | 5 + README | 12 +- README.md | 73 +- configure.ac | 37 +- doc/API | 56 +- doc/API.ja | 50 +- doc/CALLOUTS.API | 2 +- doc/RE | 4 +- doc/RE.ja | 4 +- doc/UNICODE_PROPERTIES | 1614 ++++++++++++++++++------------------- harnesses/base.c | 166 ++-- harnesses/makefile | 9 +- sample/Makefile.am | 6 +- sample/scan.c | 24 +- src/Makefile.am | 25 +- src/Makefile.windows | 2 +- src/big5.c | 13 +- src/config.h.cmake.in | 6 + src/euc_jp.c | 29 +- src/euc_kr.c | 13 +- src/euc_tw.c | 19 +- src/gb18030.c | 24 +- src/make_property.sh | 2 +- src/make_unicode_property.sh | 2 +- src/make_unicode_property_data.py | 4 +- src/onigposix.h | 41 +- src/oniguruma.h | 15 +- src/regcomp.c | 310 +++++-- src/regenc.c | 6 +- src/regerror.c | 2 + src/regexec.c | 592 +++++++------- src/regint.h | 42 +- src/regparse.c | 315 +++++--- src/regparse.h | 4 +- src/regposerr.c | 28 +- src/regposix.c | 94 ++- src/sjis.c | 14 +- src/st.c | 8 +- src/unicode.c | 13 +- src/utf16_be.c | 2 +- src/utf16_le.c | 2 +- src/utf32_be.c | 7 +- src/utf32_le.c | 7 +- test/test_back.c | 10 +- test/test_regset.c | 11 +- test/test_syntax.c | 8 + test/test_utf8.c | 35 +- test/testc.c | 8 + test/testu.c | 14 +- tis-ci/config.h | 109 +++ tis-ci/stub.c | 3 + tis-ci/test_back.config | 26 + tis-ci/test_regset.config | 20 + tis-ci/test_syntax.config | 26 + tis-ci/test_utf8.config | 25 + tis-ci/testc.config | 26 + tis-ci/testu.config | 24 + tis.config | 1336 ++++++++++++++++++++++++++++++ 61 files changed, 3975 insertions(+), 1493 deletions(-) create mode 100644 tis-ci/config.h create mode 100644 tis-ci/stub.c create mode 100644 tis-ci/test_back.config create mode 100644 tis-ci/test_regset.config create mode 100644 tis-ci/test_syntax.config create mode 100644 tis-ci/test_utf8.config create mode 100644 tis-ci/testc.config create mode 100644 tis-ci/testu.config create mode 100644 tis.config diff --git a/.gitignore b/.gitignore index 40c84fb..11974d8 100644 --- a/.gitignore +++ b/.gitignore @@ -67,6 +67,7 @@ m4/*.m4 /sample/count /sample/bug_fix /sample/regset +/sample/scan /sample/log* /harnesses/utf16*.dict diff --git a/CMakeLists.txt b/CMakeLists.txt index 29a1417..06af497 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.1) project(oniguruma - VERSION 6.9.5 + VERSION 6.9.6 LANGUAGES C) set(PACKAGE onig) @@ -8,6 +8,7 @@ set(PACKAGE_VERSION ${PROJECT_VERSION}) option(BUILD_SHARED_LIBS "Build shared libraries" ON) option(ENABLE_POSIX_API "Include POSIX API" OFF) +option(ENABLE_BINARY_COMPATIBLE_POSIX_API "Include Binary compatible POSIX API" OFF) if(MSVC) option(MSVC_STATIC_RUNTIME "Build with static runtime" OFF) endif() @@ -32,6 +33,8 @@ check_include_files(unistd.h HAVE_UNISTD_H) check_include_files(inttypes.h HAVE_INTTYPES_H) check_type_size(int SIZEOF_INT) check_type_size(long SIZEOF_LONG) +check_type_size("long long" SIZEOF_LONG_LONG) +check_type_size("void*" SIZEOF_VOIDP) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/config.h.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/config.h) @@ -54,9 +57,14 @@ set(_SRCS src/regint.h src/regparse.h src/regenc.h src/st.h set(_INST_HEADERS src/oniguruma.h src/oniggnu.h) -if(ENABLE_POSIX_API) +if(ENABLE_POSIX_API OR ENABLE_BINARY_COMPATIBLE_POSIX_API) set(_SRCS ${_SRCS} src/regposix.c src/regposerr.c) set(_INST_HEADERS ${_INST_HEADERS} src/onigposix.h) + add_definitions("-DUSE_POSIX_API") +endif() + +if(ENABLE_BINARY_COMPATIBLE_POSIX_API) + add_definitions("-DUSE_BINARY_COMPATIBLE_POSIX_API") endif() add_library(onig ${_SRCS}) @@ -67,6 +75,19 @@ target_include_directories(onig PUBLIC target_compile_definitions(onig PUBLIC $<$>:ONIG_STATIC>) +if(BUILD_SHARED_LIBS) + # Parse SOVERSION information from LTVERSION in configure.ac + file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/configure.ac" LTVERSION REGEX "^LTVERSION *= *\"?[0-9]+:[0-9]+:[0-9]+\"?") + string(REGEX REPLACE "^LTVERSION *= *\"?([0-9]+:[0-9]+:[0-9]+)\"?.*$" "\\1" LTVERSION "${LTVERSION}") + string(REGEX REPLACE "^([0-9]+):([0-9]+):([0-9]+)" "\\1" LTCURRENT ${LTVERSION}) + string(REGEX REPLACE "^([0-9]+):([0-9]+):([0-9]+)" "\\2" LTREVISION ${LTVERSION}) + string(REGEX REPLACE "^([0-9]+):([0-9]+):([0-9]+)" "\\3" LTAGE ${LTVERSION}) + math(EXPR ONIG_SOVERSION "${LTCURRENT} - ${LTAGE}") + set_target_properties(onig PROPERTIES + SOVERSION "${ONIG_SOVERSION}" + VERSION "${ONIG_SOVERSION}.${LTAGE}.${LTREVISION}") +endif() + if(MSVC) target_compile_options(onig PRIVATE #/W4 diff --git a/HISTORY b/HISTORY index 8af2805..625aa24 100644 --- a/HISTORY +++ b/HISTORY @@ -1,6 +1,62 @@ History -2020/04/DD: Version 6.9.5 +2020/11/05: Version 6.9.6 + +2020/11/01: fix Issue 26798 in oss-fuzz: Timeout +2020/10/27: fix Issue 26675 in oss-fuzz: Timeout + +2020/10/21: Release Candidate 4 for Version 6.9.6 + +2020/10/20: #221: revert cbe9f8b and 8155473: Out-of-bounds write in #207 (Issues found with Coverity) is fake + +2020/10/16: Release Candidate 3 for Version 6.9.6 + +2020/10/15: fix #220: autotools not building DLL using msys2 and mingw64 on windows 10 +2020/10/12: fix #219: Binary incompatibilty between 6.9.5_rev1 -> 6.9.2_rc2: reg_number_of_names + +2020/10/09: Release Candidate 2 for Version 6.9.6 + +2020/10/09: fix #216: build fails on Windows + +2020/10/07: Release Candidate 1 for Version 6.9.6 + +2020/09/30: add configure option --enable-binary-compatible-posix-api +2020/09/24: fix: Issue 25893 in oss-fuzz: Stack-buffer-overflow +2020/09/22: fix Issues found with Coverity (Issue #207) +2020/08/27: fix Issue #204: define uint32_t and uint64_t for Visual Studio older than 2010 +2020/08/04: fix Issue 24544 in oss-fuzz: Timeout +2020/07/21: add USE_CHECK_VALIDITY_OF_STRING_IN_TREE (fix Issue 24276 in oss-fuzz: Undefined-shift) +2020/07/20: fix: Issue 24268 in oss-fuzz: Timeout +2020/07/17: fix: Issue 24112 in oss-fuzz: Undefined-shift +2020/07/14: fix: Issue 24066 in oss-fuzz: Timeout +2020/07/05: fix: Incomplete application of ONIG_OPTION_NOTBOL to \A +2020/07/05: fix: Incomplete application of ONIG_OPTION_NOT_END_STRING to \Z (Issue #192) +2020/07/05: fix: Incomplete application of ONIG_OPTION_NOTEOL to \z +2020/07/05: fix: Incomplete application of ONIG_OPTION_NOTEOL to \Z +2020/07/01: add ONIG_OPTION_NOT_END_STRING (Issue #198) +2020/06/28: add ONIG_OPTION_NOT_BEGIN_POSITION (Issue #198) +2020/06/28: add ONIG_OPTION_NOT_BEGIN_STRING +2020/06/28: fix: Issue 23754 in oss-fuzz: Timeout +2020/06/21: fix: Issue 23525 in oss-fuzz: Timeout +2020/06/15: fix: Issue 23311 in oss-fuzz: Timeout +2020/06/03: fix: Issue 22925 in oss-fuzz: Index-out-of-bounds +2020/06/03: fix: Issue 22917 in oss-fuzz: Out-of-memory +2020/06/02: fix: Issue 22916 in oss-fuzz: Timeout +2020/05/29: fix: Issue 22744 in oss-fuzz: Integer-overflow +2020/05/28: fix: Issue 22658 in oss-fuzz: check backref with level +2020/05/28: fix: Issue 22533 in oss-fuzz: memory leak +2020/05/23: fix: Issue 22393 in oss-fuzz: Integer-overflow +2020/05/13: fix: Issue 22154 in oss-fuzz: When the option FIND_LONGEST is specified, match_at() returns ONIG_MISMATCH unless there is no need to search any more. +2020/05/06: Add SOVERSION info to library when using cmake +2020/05/04: fix: 22008 in oss-fuzz +2020/05/04: fix: 21998 in oss-fuzz +2020/05/03: fix: 21944, 21977 in oss-fuzz + +2020/04/26: Version 6.9.5 revised 1 + +2020/04/24: fix #192: Unexpected regex match + +2020/04/20: Version 6.9.5 2020/04/12: Release Candidate 2 for Version 6.9.5 2020/04/09: fix a problem (found by oss-fuzz test on my PC) diff --git a/Makefile.am b/Makefile.am index fc3885b..0880b2d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -61,6 +61,11 @@ sanitize: make make all-test +debug_out_sanitize: + make clean + ./configure CFLAGS="-O0 -g -fsanitize=address -DONIG_DEBUG_PARSE -DONIG_DEBUG_COMPILE" LDFLAGS="-fsanitize=address" + make + cov: make lcov-clear cd test; make CFLAGS="--coverage" test diff --git a/README b/README index 90b420d..c72da97 100644 --- a/README +++ b/README @@ -46,7 +46,15 @@ License Install - Case 1: Unix and Cygwin platform + Case 1: Linux distribution packages + + * Fedora: dnf install oniguruma + * RHEL/CentOS: yum install oniguruma + * Debian/Ubuntu: apt install libonig5 + * Arch: pacman -S oniguruma + * openSUSE: zypper install oniguruma + + Case 2: Manual compilation on Linux, Unix, and Cygwin platform 1. autoreconf -vfi (* case: configure script is not found.) @@ -67,7 +75,7 @@ Install - Case 2: Windows 64/32bit platform (Visual Studio) + Case 3: Windows 64/32bit platform (Visual Studio) execute make_win64 or make_win32 diff --git a/README.md b/README.md index 63d835a..c546942 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ [![Build Status](https://travis-ci.org/kkos/oniguruma.svg?branch=master)](https://travis-ci.org/kkos/oniguruma) [![Code Quality: Cpp](https://img.shields.io/lgtm/grade/cpp/g/kkos/oniguruma.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/kkos/oniguruma/context:cpp) [![Total Alerts](https://img.shields.io/lgtm/alerts/g/kkos/oniguruma.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/kkos/oniguruma/alerts) +[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/oniguruma.svg)](https://oss-fuzz-build-logs.storage.googleapis.com/index.html#oniguruma) +[![TrustInSoft CI](https://ci.trust-in-soft.com/projects/kkos/oniguruma.svg?branch=master)](https://ci.trust-in-soft.com/projects/kkos/oniguruma) Oniguruma ========= @@ -27,6 +29,26 @@ Supported character encodings: * doc/SYNTAX.md: contributed by seanofw +Version 6.9.6 +------------- +* When using configure script, if you have the POSIX API enabled in an earlier version (disabled by default in 6.9.5) and you need application binary compatibility with the POSIX API, specify "--enable-binary-compatible-posix-api=yes" instead of "--enable-posix-api=yes". Starting in 6.9.6, "--enable-posix-api=yes" only supports source-level compatibility for 6.9.5 and earlier about POSIX API. (Issue #210) + +* NEW: configure option --enable-binary-compatible-posix-api=[yes/no] +* NEW API: Limiting the maximum number of calls of subexp-call +* NEW API: ONIG_OPTION_NOT_BEGIN_STRING / NOT_END_STRING / NOT_BEGIN_POSITION +* Fixed behavior of ONIG_OPTION_NOTBOL / NOTEOL +* Fixed many problems found by OSS-Fuzz +* Fixed many problems found by Coverity +* Fixed CVE-2020-26159 (This turned out not to be a problem later. #221) +* Under cygwin and mingw, generate and install the libonig.def file (Issue #220) + + +Version 6.9.5 revised 1 +----------------------- + +* Fixed Issue #192 + + Version 6.9.5 ------------- @@ -143,45 +165,6 @@ Version 6.5.0 * NEW: Absent stopper (?~|absent) (*original) -Version 6.4.0 -------------- - -* Fix fatal problem of endless repeat on Windows -* NEW: call zero (call the total regexp) \g<0> -* NEW: relative backref/call by positive number \k<+n>, \g<+n> - - -Version 6.3.0 -------------- - -* NEW: octal codepoint \o{.....} -* Fixed CVE-2017-9224 -* Fixed CVE-2017-9225 -* Fixed CVE-2017-9226 -* Fixed CVE-2017-9227 -* Fixed CVE-2017-9228 -* Fixed CVE-2017-9229 - - -Version 6.1.2 -------------- - -* allow word bound, word begin and word end in look-behind. -* NEW option: ONIG_OPTION_CHECK_VALIDITY_OF_STRING - -Version 6.1 ------------ - -* improved doc/RE -* NEW API: onig_scan() - -Version 6.0 ------------ - -* Update Unicode 8.0 Property/Case-folding -* NEW API: onig_unicode_define_user_property() - - License ------- @@ -191,7 +174,15 @@ License Install ------- -### Case 1: Unix and Cygwin platform +### Case 1: Linux distribution packages + + * Fedora: `dnf install oniguruma` + * RHEL/CentOS: `yum install oniguruma` + * Debian/Ubuntu: `apt install libonig5` + * Arch: `pacman -S oniguruma` + * openSUSE: `zypper install oniguruma` + +### Case 2: Manual compilation on Linux, Unix, and Cygwin platform 1. autoreconf -vfi (* case: configure script is not found.) @@ -212,7 +203,7 @@ Install -### Case 2: Windows 64/32bit platform (Visual Studio) +### Case 3: Windows 64/32bit platform (Visual Studio) Execute make_win.bat diff --git a/configure.ac b/configure.ac index 74c20e3..2efaf19 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl Process this file with autoconf to produce a configure script. -AC_INIT(onig, 6.9.5) +AC_INIT(onig, 6.9.6) AC_CONFIG_MACRO_DIR([m4]) @@ -27,6 +27,20 @@ case "${enableval}" in *) AC_MSG_ERROR(bad value for --enable-posix-api) ;; esac], enable_posix_api=no) + +dnl check for Binary compatible POSIX API +AC_ARG_ENABLE([binary-compatible-posix-api], + [AS_HELP_STRING([--enable-binary-compatible-posix-api], + [turn on to Binary compatible POSIX API [default=no]])], +[\ +case "${enableval}" in + yes) enable_binary_compatible_posix_api=yes; enable_posix_api=yes ;; + no) enable_binary_compatible_posix_api=no ;; + *) AC_MSG_ERROR(bad value for --enable-binary-compatible-posix-api) ;; +esac], +enable_binary_compatible_posix_api=no) + +AM_CONDITIONAL(ENABLE_BINARY_COMPATIBLE_POSIX_API, test x"${enable_binary_compatible_posix_api}" = xyes) AM_CONDITIONAL(ENABLE_POSIX_API, test x"${enable_posix_api}" = xyes) @@ -43,7 +57,7 @@ fi dnl Checks for programs. AC_PROG_CC LT_INIT -LTVERSION="5:0:0" +LTVERSION="6:0:1" AC_SUBST(LTVERSION) AC_PROG_INSTALL @@ -65,4 +79,23 @@ AC_FUNC_ALLOCA AC_CONFIG_FILES([Makefile src/Makefile test/Makefile sample/Makefile onig-config]) AC_CONFIG_COMMANDS([default],[chmod +x onig-config],[]) + +# for Issue #220 +LIBONIG_DEF_FILE= +FIX_TO_LDFLAGS= +case $host_os in + cygwin* | mingw* ) + if test X"$enable_shared" = Xyes; then + LIBONIG_DEF_FILE=libonig.def + FIX_TO_LDFLAGS="-no-undefined" + fi + ;; +esac + +EXTRA_LIBONIG_LDFLAGS="$EXTRA_LIBONIG_LDFLAGS $FIX_TO_LDFLAGS" +AC_SUBST(EXTRA_LIBONIG_LDFLAGS) + +AM_CONDITIONAL(USE_LIBONIG_DEF_FILE, test -n "${LIBONIG_DEF_FILE}") +AC_SUBST(LIBONIG_DEF_FILE) + AC_OUTPUT diff --git a/doc/API b/doc/API index bb7b010..b13a503 100644 --- a/doc/API +++ b/doc/API @@ -1,4 +1,4 @@ -Oniguruma API Version 6.9.5 2020/03/25 +Oniguruma API Version 6.9.6 2020/07/12 #include @@ -95,6 +95,10 @@ Oniguruma API Version 6.9.5 2020/03/25 ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER Extended Grapheme Cluster mode ONIG_OPTION_TEXT_SEGMENT_WORD Word mode + + * The ONIG_OPTION_FIND_LONGEST option doesn't work properly during backward search of onig_search(). + + 5 enc: character encoding. ONIG_ENCODING_ASCII ASCII @@ -141,7 +145,6 @@ Oniguruma API Version 6.9.5 2020/03/25 ONIG_SYNTAX_JAVA Java (Sun java.util.regex) ONIG_SYNTAX_PERL Perl ONIG_SYNTAX_PERL_NG Perl + named group - ONIG_SYNTAX_RUBY Ruby ONIG_SYNTAX_ONIGURUMA Oniguruma ONIG_SYNTAX_DEFAULT default (== ONIG_SYNTAX_ONIGURUMA) onig_set_default_syntax() @@ -331,9 +334,11 @@ Oniguruma API Version 6.9.5 2020/03/25 6 region: address for return group match range info (NULL is allowed) 7 option: search time option - ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line - ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line - ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] of POSIX API. + ONIG_OPTION_NOTBOL (str) isn't considered as begin of line and begin of string (* ONIG_OPTION_NOT_BEGIN_STRING) + ONIG_OPTION_NOTEOL (end) isn't considered as end of line and end of string (* ONIG_OPTION_NOT_END_STRING) + ONIG_OPTION_NOT_BEGIN_STRING (str) isn't considered as begin of string (* fail \A) + ONIG_OPTION_NOT_END_STRING (end) isn't considered as end of string (* fail \z, \Z) + ONIG_OPTION_NOT_BEGIN_POSITION (start) isn't considered as start position of search (* fail \G) # int onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, @@ -366,9 +371,11 @@ Oniguruma API Version 6.9.5 2020/03/25 5 region: address for return group match range info (NULL is allowed) 6 option: search time option - ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line - ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line - ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] type of POSIX API. + ONIG_OPTION_NOTBOL (str) isn't considered as begin of line and begin of string (* ONIG_OPTION_NOT_BEGIN_STRING) + ONIG_OPTION_NOTEOL (end) isn't considered as end of line and end of string (* ONIG_OPTION_NOT_END_STRING) + ONIG_OPTION_NOT_BEGIN_STRING (str) isn't considered as begin of string (* fail \A) + ONIG_OPTION_NOT_END_STRING (end) isn't considered as end of string (* fail \z, \Z) + ONIG_OPTION_NOT_BEGIN_POSITION (at) isn't considered as start position of search (* fail \G) # int onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, @@ -500,8 +507,12 @@ Oniguruma API Version 6.9.5 2020/03/25 ONIG_REGSET_REGEX_LEAD (returns most left position) ONIG_REGSET_PRIORITY_TO_REGEX_ORDER (returns first match regex) 7 option: search time option - ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line - ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line + ONIG_OPTION_NOTBOL (str) isn't considered as begin of line and begin of string (* ONIG_OPTION_NOT_BEGIN_STRING) + ONIG_OPTION_NOTEOL end (end) isn't considered as end of line and end of string (* ONIG_OPTION_NOT_END_STRING) + ONIG_OPTION_NOT_BEGIN_STRING (str) isn't considered as begin of string (* fail \A) + ONIG_OPTION_NOT_END_STRING (end) isn't considered as end of string (* fail \z, \Z) + ONIG_OPTION_NOT_BEGIN_POSITION (start) isn't considered as start position of search (* fail \G) + 8 rmatch_pos: return address of match position (match_address - str) * ONIG_REGSET_POSITION_LEAD and ONIG_REGSET_REGEX_LEAD return the same result. @@ -529,8 +540,12 @@ Oniguruma API Version 6.9.5 2020/03/25 ONIG_REGSET_REGEX_LEAD (returns most left position) ONIG_REGSET_PRIORITY_TO_REGEX_ORDER (returns first match regex) 7 option: search time option - ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line - ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line + ONIG_OPTION_NOTBOL (str) isn't considered as begin of line and begin of string (* ONIG_OPTION_NOT_BEGIN_STRING) + ONIG_OPTION_NOTEOL (end) isn't considered as end of line and end of string (* ONIG_OPTION_NOT_END_STRING) + ONIG_OPTION_NOT_BEGIN_STRING (str) isn't considered as begin of string (* fail \A) + ONIG_OPTION_NOT_END_STRING (end) isn't considered as end of string (* fail \z, \Z) + ONIG_OPTION_NOT_BEGIN_POSITION (start) isn't considered as start position of search (* fail \G) + 8 mps: array of match-params 9 rmatch_pos: return address of match position (match_address - str) @@ -672,7 +687,7 @@ Oniguruma API Version 6.9.5 2020/03/25 Return the root node of capture history data tree. - This value is undefined if matching has faild. + This value is undefined if matching has failed. arguments 1 region: matching result. @@ -908,6 +923,21 @@ Oniguruma API Version 6.9.5 2020/03/25 normal return: ONIG_NORMAL +# unsigned long onig_get_subexp_call_limit_in_search(void) + + Return the limit of subexp call count. + (default: 0:unlimited) + + normal return: current limit value + + +# int onig_set_subexp_call_limit_in_search(unsigned long n) + + Set a limit count of subexp call. + + normal return: ONIG_NORMAL + + # int onig_get_subexp_call_max_nest_level(void) Return the limit of subexp call nest level. diff --git a/doc/API.ja b/doc/API.ja index 38ce8d6..283bea3 100644 --- a/doc/API.ja +++ b/doc/API.ja @@ -1,4 +1,4 @@ -鬼車インターフェース Version 6.9.5 2020/03/25 +鬼車インターフェース Version 6.9.6 2020/07/12 #include @@ -94,6 +94,8 @@ ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER 拡張書記素房モード ONIG_OPTION_TEXT_SEGMENT_WORD 単語モード + * ONIG_OPTION_FIND_LONGEST はonig_search()の後方探索では正しく動作しない + 5 enc: 文字エンコーディング ONIG_ENCODING_ASCII ASCII @@ -140,7 +142,6 @@ ONIG_SYNTAX_JAVA Java (Sun java.util.regex) ONIG_SYNTAX_PERL Perl ONIG_SYNTAX_PERL_NG Perl + 名前付き捕獲式集合 - ONIG_SYNTAX_RUBY Ruby ONIG_SYNTAX_ONIGURUMA Oniguruma ONIG_SYNTAX_DEFAULT default (== ONIG_SYNTAX_ONIGURUMA) onig_set_default_syntax() @@ -329,9 +330,11 @@ 6 region: マッチ領域情報(region) (NULLも許される) 7 option: 検索時オプション - ONIG_OPTION_NOTBOL 文字列の先頭(str)を行頭と看做さない - ONIG_OPTION_NOTEOL 文字列の終端(end)を行末と看做さない - ONIG_OPTION_POSIX_REGION region引数をPOSIX APIのregmatch_t[]にする + ONIG_OPTION_NOTBOL strを行頭および文字列先頭と看做さない + ONIG_OPTION_NOTEOL endを行末および文字列終端と看做さない + ONIG_OPTION_NOT_BEGIN_STRING strを文字列の先頭と看做さない (\A 失敗) + ONIG_OPTION_NOT_END_STRING end文字列終端と看做さない (\z, \Z 失敗) + ONIG_OPTION_NOT_BEGIN_POSITION startを検索開始位置と看做さない (\G 失敗) # int onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, @@ -363,9 +366,11 @@ 5 region: マッチ領域情報(region) (NULLも許される) 6 option: 検索時オプション - ONIG_OPTION_NOTBOL 文字列の先頭(str)を行頭と看做さない - ONIG_OPTION_NOTEOL 文字列の終端(end)を行末と看做さない - ONIG_OPTION_POSIX_REGION region引数をPOSIX APIのregmatch_t[]にする + ONIG_OPTION_NOTBOL strを行頭および文字列先頭と看做さない + ONIG_OPTION_NOTEOL endを行末および文字列終端と看做さない + ONIG_OPTION_NOT_BEGIN_STRING strを文字列の先頭と看做さない (\A 失敗) + ONIG_OPTION_NOT_END_STRING end文字列終端と看做さない (\z, \Z 失敗) + ONIG_OPTION_NOT_BEGIN_POSITION atを検索開始位置と看做さない (\G 失敗) # int onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, @@ -498,8 +503,11 @@ ONIG_REGSET_REGEX_LEAD (最左位置でマッチした結果を返す) ONIG_REGSET_PRIORITY_TO_REGEX_ORDER (最初にマッチした正規表現の結果を返す) 7 option: 検索時オプション - ONIG_OPTION_NOTBOL 文字列の先頭(str)を行頭と看做さない - ONIG_OPTION_NOTEOL 文字列の終端(end)を行末と看做さない + ONIG_OPTION_NOTBOL strを行頭および文字列先頭と看做さない + ONIG_OPTION_NOTEOL endを行末および文字列終端と看做さない + ONIG_OPTION_NOT_BEGIN_STRING strを文字列の先頭と看做さない (\A 失敗) + ONIG_OPTION_NOT_END_STRING end文字列終端と看做さない (\z, \Z 失敗) + ONIG_OPTION_NOT_BEGIN_POSITION startを検索開始位置と看做さない (\G 失敗) 8 rmatch_pos: マッチした位置を返すためのアドレス (match_address - str) * ONIG_REGSET_POSITION_LEADとONIG_REGSET_REGEX_LEADは同じ結果を返す。 @@ -528,8 +536,11 @@ ONIG_REGSET_REGEX_LEAD (最左位置でマッチした結果を返す) ONIG_REGSET_PRIORITY_TO_REGEX_ORDER (最初にマッチした正規表現の結果を返す) 7 option: 検索時オプション - ONIG_OPTION_NOTBOL 文字列の先頭(str)を行頭と看做さない - ONIG_OPTION_NOTEOL 文字列の終端(end)を行末と看做さない + ONIG_OPTION_NOTBOL strを行頭および文字列先頭と看做さない + ONIG_OPTION_NOTEOL endを行末および文字列終端と看做さない + ONIG_OPTION_NOT_BEGIN_STRING strを文字列の先頭と看做さない (\A 失敗) + ONIG_OPTION_NOT_END_STRING end文字列終端と看做さない (\z, \Z 失敗) + ONIG_OPTION_NOT_BEGIN_POSITION startを検索開始位置と看做さない (\G 失敗) 8 mps: OnigMatchParamオブジェクトの配列 9 rmatch_pos: マッチした位置を返すためのアドレス (match_address - str) @@ -915,6 +926,21 @@ 正常終了戻り値: ONIG_NORMAL +# unsigned long onig_get_subexp_call_limit_in_search(void) + + 部分式呼出しの呼び出し回数の制限値を返す。 + (デフォルト: 0:無制限) + + 正常終了戻り値: 制限値 + + +# int onig_set_subexp_call_limit_in_search(unsigned long n) + + 部分式呼出しの呼び出し回数の制限値を指定する。 + + 正常終了戻り値: ONIG_NORMAL + + # int onig_get_subexp_call_max_nest_level(void) 部分式呼出しのネストレベルの最大値を返す。 diff --git a/doc/CALLOUTS.API b/doc/CALLOUTS.API index 057a054..c4a13c8 100644 --- a/doc/CALLOUTS.API +++ b/doc/CALLOUTS.API @@ -316,7 +316,7 @@ Callouts API Version 6.8.2 2018/06/08 because it doesn't use this function. -(8) Callout data (used in apllications) +(8) Callout data (used in applications) # int onig_get_callout_data(OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType* type, OnigValue* val) diff --git a/doc/RE b/doc/RE index f96efe7..c8d9795 100644 --- a/doc/RE +++ b/doc/RE @@ -1,4 +1,4 @@ -Oniguruma Regular Expressions Version 6.9.5 2020/04/09 +Oniguruma Regular Expressions Version 6.9.6 2020/07/31 syntax: ONIG_SYNTAX_ONIGURUMA (default syntax) @@ -366,7 +366,7 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default syntax) (?~|absent) Absent stopper (* original) After passed this operator, string right range is limited - at the point that does not include the string match whth + at the point that does not include the string match with . (?~|) Range clear diff --git a/doc/RE.ja b/doc/RE.ja index 6eacc8d..c8321d5 100644 --- a/doc/RE.ja +++ b/doc/RE.ja @@ -1,4 +1,4 @@ -鬼車 正規表現 Version 6.9.5 2020/04/09 +鬼車 正規表現 Version 6.9.6 2020/07/31 使用文法: ONIG_SYNTAX_ONIGURUMA (既定値) @@ -367,7 +367,7 @@ <不在>に適合する文字列を含まない範囲に制限される。 (?~|) 範囲消去 - 不在停止の効果を消して、それ以前の状態にする。 + 不在停止の効果を消して、初期の状態にする。 * 不在機能の入れ子には対応しておらず、その場合の挙動は不定とする。 diff --git a/doc/UNICODE_PROPERTIES b/doc/UNICODE_PROPERTIES index 2227ada..2dc96da 100644 --- a/doc/UNICODE_PROPERTIES +++ b/doc/UNICODE_PROPERTIES @@ -1,809 +1,809 @@ Unicode Properties (Unicode Version: 13.0.0, Emoji: 13.0) - 15: ASCII_Hex_Digit - 16: Adlam - 17: Ahom - 18: Alphabetic - 19: Anatolian_Hieroglyphs - 20: Any - 21: Arabic - 22: Armenian - 23: Assigned - 24: Avestan - 25: Balinese - 26: Bamum - 27: Bassa_Vah - 28: Batak - 29: Bengali - 30: Bhaiksuki - 31: Bidi_Control - 32: Bopomofo - 33: Brahmi - 34: Braille - 35: Buginese - 36: Buhid - 37: C - 38: Canadian_Aboriginal - 39: Carian - 40: Case_Ignorable - 41: Cased - 42: Caucasian_Albanian - 43: Cc - 44: Cf - 45: Chakma - 46: Cham - 47: Changes_When_Casefolded - 48: Changes_When_Casemapped - 49: Changes_When_Lowercased - 50: Changes_When_Titlecased - 51: Changes_When_Uppercased - 52: Cherokee - 53: Chorasmian - 54: Cn - 55: Co - 56: Common - 57: Coptic - 58: Cs - 59: Cuneiform - 60: Cypriot - 61: Cyrillic - 62: Dash - 63: Default_Ignorable_Code_Point - 64: Deprecated - 65: Deseret - 66: Devanagari - 67: Diacritic - 68: Dives_Akuru - 69: Dogra - 70: Duployan - 71: Egyptian_Hieroglyphs - 72: Elbasan - 73: Elymaic - 74: Emoji - 75: Emoji_Component - 76: Emoji_Modifier - 77: Emoji_Modifier_Base - 78: Emoji_Presentation - 79: Ethiopic - 80: Extended_Pictographic - 81: Extender - 82: Georgian - 83: Glagolitic - 84: Gothic - 85: Grantha - 86: Grapheme_Base - 87: Grapheme_Extend - 88: Grapheme_Link - 89: Greek - 90: Gujarati - 91: Gunjala_Gondi - 92: Gurmukhi - 93: Han - 94: Hangul - 95: Hanifi_Rohingya - 96: Hanunoo - 97: Hatran - 98: Hebrew - 99: Hex_Digit -100: Hiragana -101: Hyphen -102: IDS_Binary_Operator -103: IDS_Trinary_Operator -104: ID_Continue -105: ID_Start -106: Ideographic -107: Imperial_Aramaic -108: Inherited -109: Inscriptional_Pahlavi -110: Inscriptional_Parthian -111: Javanese -112: Join_Control -113: Kaithi -114: Kannada -115: Katakana -116: Kayah_Li -117: Kharoshthi -118: Khitan_Small_Script -119: Khmer -120: Khojki -121: Khudawadi -122: L -123: LC -124: Lao -125: Latin -126: Lepcha -127: Limbu -128: Linear_A -129: Linear_B -130: Lisu -131: Ll -132: Lm -133: Lo -134: Logical_Order_Exception -135: Lowercase -136: Lt -137: Lu -138: Lycian -139: Lydian -140: M -141: Mahajani -142: Makasar -143: Malayalam -144: Mandaic -145: Manichaean -146: Marchen -147: Masaram_Gondi -148: Math -149: Mc -150: Me -151: Medefaidrin -152: Meetei_Mayek -153: Mende_Kikakui -154: Meroitic_Cursive -155: Meroitic_Hieroglyphs -156: Miao -157: Mn -158: Modi -159: Mongolian -160: Mro -161: Multani -162: Myanmar -163: N -164: Nabataean -165: Nandinagari -166: Nd -167: New_Tai_Lue -168: Newa -169: Nko -170: Nl -171: No -172: Noncharacter_Code_Point -173: Nushu -174: Nyiakeng_Puachue_Hmong -175: Ogham -176: Ol_Chiki -177: Old_Hungarian -178: Old_Italic -179: Old_North_Arabian -180: Old_Permic -181: Old_Persian -182: Old_Sogdian -183: Old_South_Arabian -184: Old_Turkic -185: Oriya -186: Osage -187: Osmanya -188: Other_Alphabetic -189: Other_Default_Ignorable_Code_Point -190: Other_Grapheme_Extend -191: Other_ID_Continue -192: Other_ID_Start -193: Other_Lowercase -194: Other_Math -195: Other_Uppercase -196: P -197: Pahawh_Hmong -198: Palmyrene -199: Pattern_Syntax -200: Pattern_White_Space -201: Pau_Cin_Hau -202: Pc -203: Pd -204: Pe -205: Pf -206: Phags_Pa -207: Phoenician -208: Pi -209: Po -210: Prepended_Concatenation_Mark -211: Ps -212: Psalter_Pahlavi -213: Quotation_Mark -214: Radical -215: Regional_Indicator -216: Rejang -217: Runic -218: S -219: Samaritan -220: Saurashtra -221: Sc -222: Sentence_Terminal -223: Sharada -224: Shavian -225: Siddham -226: SignWriting -227: Sinhala -228: Sk -229: Sm -230: So -231: Soft_Dotted -232: Sogdian -233: Sora_Sompeng -234: Soyombo -235: Sundanese -236: Syloti_Nagri -237: Syriac -238: Tagalog -239: Tagbanwa -240: Tai_Le -241: Tai_Tham -242: Tai_Viet -243: Takri -244: Tamil -245: Tangut -246: Telugu -247: Terminal_Punctuation -248: Thaana -249: Thai -250: Tibetan -251: Tifinagh -252: Tirhuta -253: Ugaritic -254: Unified_Ideograph -255: Unknown -256: Uppercase -257: Vai -258: Variation_Selector -259: Wancho -260: Warang_Citi -261: White_Space -262: XID_Continue -263: XID_Start -264: Yezidi -265: Yi -266: Z -267: Zanabazar_Square -268: Zl -269: Zp -270: Zs - 16: Adlm - 42: Aghb - 15: AHex - 21: Arab -107: Armi - 22: Armn - 24: Avst - 25: Bali - 26: Bamu - 27: Bass - 28: Batk - 29: Beng - 30: Bhks - 31: Bidi_C - 32: Bopo - 33: Brah - 34: Brai - 35: Bugi - 36: Buhd - 45: Cakm - 38: Cans - 39: Cari -123: Cased_Letter - 52: Cher - 53: Chrs - 40: CI -204: Close_Punctuation -140: Combining_Mark -202: Connector_Punctuation - 43: Control - 57: Copt - 60: Cprt -221: Currency_Symbol - 47: CWCF - 48: CWCM - 49: CWL - 50: CWT - 51: CWU - 61: Cyrl -203: Dash_Punctuation -166: Decimal_Number - 64: Dep - 66: Deva - 63: DI - 67: Dia - 68: Diak - 69: Dogr - 65: Dsrt - 70: Dupl - 77: EBase - 75: EComp - 71: Egyp - 72: Elba - 73: Elym - 76: EMod -150: Enclosing_Mark - 78: EPres - 79: Ethi - 81: Ext - 80: ExtPict -205: Final_Punctuation - 44: Format - 82: Geor - 83: Glag - 91: Gong -147: Gonm - 84: Goth - 85: Gran - 86: Gr_Base - 89: Grek - 87: Gr_Ext - 88: Gr_Link - 90: Gujr - 92: Guru - 94: Hang - 93: Hani - 96: Hano - 97: Hatr - 98: Hebr - 99: Hex -100: Hira - 19: Hluw -197: Hmng -174: Hmnp -177: Hung -104: IDC -106: Ideo -105: IDS -102: IDSB -103: IDST -208: Initial_Punctuation -178: Ital -111: Java -112: Join_C -116: Kali -115: Kana -117: Khar -119: Khmr -120: Khoj -118: Kits -114: Knda -113: Kthi -241: Lana -124: Laoo -125: Latn -126: Lepc -122: Letter -170: Letter_Number -127: Limb -128: Lina -129: Linb -268: Line_Separator -134: LOE -131: Lowercase_Letter -138: Lyci -139: Lydi -141: Mahj -142: Maka -144: Mand -145: Mani -146: Marc -140: Mark -229: Math_Symbol -151: Medf -153: Mend -154: Merc -155: Mero -143: Mlym -132: Modifier_Letter -228: Modifier_Symbol -159: Mong -160: Mroo -152: Mtei -161: Mult -162: Mymr -165: Nand -179: Narb -164: Nbat -172: NChar -169: Nkoo -157: Nonspacing_Mark -173: Nshu -163: Number -188: OAlpha -189: ODI -175: Ogam -190: OGr_Ext -191: OIDC -192: OIDS -176: Olck -193: OLower -194: OMath -211: Open_Punctuation -184: Orkh -185: Orya -186: Osge -187: Osma - 37: Other -133: Other_Letter -171: Other_Number -209: Other_Punctuation -230: Other_Symbol -195: OUpper -198: Palm -269: Paragraph_Separator -199: Pat_Syn -200: Pat_WS -201: Pauc -210: PCM -180: Perm -206: Phag -109: Phli -212: Phlp -207: Phnx -156: Plrd - 55: Private_Use -110: Prti -196: Punctuation - 57: Qaac -108: Qaai -213: QMark -215: RI -216: Rjng - 95: Rohg -217: Runr -219: Samr -183: Sarb -220: Saur -231: SD -266: Separator -226: Sgnw -224: Shaw -223: Shrd -225: Sidd -121: Sind -227: Sinh -232: Sogd -182: Sogo -233: Sora -234: Soyo -270: Space_Separator -149: Spacing_Mark -222: STerm -235: Sund - 58: Surrogate -236: Sylo -218: Symbol -237: Syrc -239: Tagb -243: Takr -240: Tale -167: Talu -244: Taml -245: Tang -242: Tavt -246: Telu -247: Term -251: Tfng -238: Tglg -248: Thaa -250: Tibt -252: Tirh -136: Titlecase_Letter -253: Ugar -254: UIdeo - 54: Unassigned -137: Uppercase_Letter -257: Vaii -258: VS -260: Wara -259: Wcho -261: WSpace -262: XIDC -263: XIDS -181: Xpeo - 59: Xsux -264: Yezi -265: Yiii -267: Zanb -108: Zinh - 56: Zyyy -255: Zzzz -271: In_Basic_Latin -272: In_Latin_1_Supplement -273: In_Latin_Extended_A -274: In_Latin_Extended_B -275: In_IPA_Extensions -276: In_Spacing_Modifier_Letters -277: In_Combining_Diacritical_Marks -278: In_Greek_and_Coptic -279: In_Cyrillic -280: In_Cyrillic_Supplement -281: In_Armenian -282: In_Hebrew -283: In_Arabic -284: In_Syriac -285: In_Arabic_Supplement -286: In_Thaana -287: In_NKo -288: In_Samaritan -289: In_Mandaic -290: In_Syriac_Supplement -291: In_Arabic_Extended_A -292: In_Devanagari -293: In_Bengali -294: In_Gurmukhi -295: In_Gujarati -296: In_Oriya -297: In_Tamil -298: In_Telugu -299: In_Kannada -300: In_Malayalam -301: In_Sinhala -302: In_Thai -303: In_Lao -304: In_Tibetan -305: In_Myanmar -306: In_Georgian -307: In_Hangul_Jamo -308: In_Ethiopic -309: In_Ethiopic_Supplement -310: In_Cherokee -311: In_Unified_Canadian_Aboriginal_Syllabics -312: In_Ogham -313: In_Runic -314: In_Tagalog -315: In_Hanunoo -316: In_Buhid -317: In_Tagbanwa -318: In_Khmer -319: In_Mongolian -320: In_Unified_Canadian_Aboriginal_Syllabics_Extended -321: In_Limbu -322: In_Tai_Le -323: In_New_Tai_Lue -324: In_Khmer_Symbols -325: In_Buginese -326: In_Tai_Tham -327: In_Combining_Diacritical_Marks_Extended -328: In_Balinese -329: In_Sundanese -330: In_Batak -331: In_Lepcha -332: In_Ol_Chiki -333: In_Cyrillic_Extended_C -334: In_Georgian_Extended -335: In_Sundanese_Supplement -336: In_Vedic_Extensions -337: In_Phonetic_Extensions -338: In_Phonetic_Extensions_Supplement -339: In_Combining_Diacritical_Marks_Supplement -340: In_Latin_Extended_Additional -341: In_Greek_Extended -342: In_General_Punctuation -343: In_Superscripts_and_Subscripts -344: In_Currency_Symbols -345: In_Combining_Diacritical_Marks_for_Symbols -346: In_Letterlike_Symbols -347: In_Number_Forms -348: In_Arrows -349: In_Mathematical_Operators -350: In_Miscellaneous_Technical -351: In_Control_Pictures -352: In_Optical_Character_Recognition -353: In_Enclosed_Alphanumerics -354: In_Box_Drawing -355: In_Block_Elements -356: In_Geometric_Shapes -357: In_Miscellaneous_Symbols -358: In_Dingbats -359: In_Miscellaneous_Mathematical_Symbols_A -360: In_Supplemental_Arrows_A -361: In_Braille_Patterns -362: In_Supplemental_Arrows_B -363: In_Miscellaneous_Mathematical_Symbols_B -364: In_Supplemental_Mathematical_Operators -365: In_Miscellaneous_Symbols_and_Arrows -366: In_Glagolitic -367: In_Latin_Extended_C -368: In_Coptic -369: In_Georgian_Supplement -370: In_Tifinagh -371: In_Ethiopic_Extended -372: In_Cyrillic_Extended_A -373: In_Supplemental_Punctuation -374: In_CJK_Radicals_Supplement -375: In_Kangxi_Radicals -376: In_Ideographic_Description_Characters -377: In_CJK_Symbols_and_Punctuation -378: In_Hiragana -379: In_Katakana -380: In_Bopomofo -381: In_Hangul_Compatibility_Jamo -382: In_Kanbun -383: In_Bopomofo_Extended -384: In_CJK_Strokes -385: In_Katakana_Phonetic_Extensions -386: In_Enclosed_CJK_Letters_and_Months -387: In_CJK_Compatibility -388: In_CJK_Unified_Ideographs_Extension_A -389: In_Yijing_Hexagram_Symbols -390: In_CJK_Unified_Ideographs -391: In_Yi_Syllables -392: In_Yi_Radicals -393: In_Lisu -394: In_Vai -395: In_Cyrillic_Extended_B -396: In_Bamum -397: In_Modifier_Tone_Letters -398: In_Latin_Extended_D -399: In_Syloti_Nagri -400: In_Common_Indic_Number_Forms -401: In_Phags_pa -402: In_Saurashtra -403: In_Devanagari_Extended -404: In_Kayah_Li -405: In_Rejang -406: In_Hangul_Jamo_Extended_A -407: In_Javanese -408: In_Myanmar_Extended_B -409: In_Cham -410: In_Myanmar_Extended_A -411: In_Tai_Viet -412: In_Meetei_Mayek_Extensions -413: In_Ethiopic_Extended_A -414: In_Latin_Extended_E -415: In_Cherokee_Supplement -416: In_Meetei_Mayek -417: In_Hangul_Syllables -418: In_Hangul_Jamo_Extended_B -419: In_High_Surrogates -420: In_High_Private_Use_Surrogates -421: In_Low_Surrogates -422: In_Private_Use_Area -423: In_CJK_Compatibility_Ideographs -424: In_Alphabetic_Presentation_Forms -425: In_Arabic_Presentation_Forms_A -426: In_Variation_Selectors -427: In_Vertical_Forms -428: In_Combining_Half_Marks -429: In_CJK_Compatibility_Forms -430: In_Small_Form_Variants -431: In_Arabic_Presentation_Forms_B -432: In_Halfwidth_and_Fullwidth_Forms -433: In_Specials -434: In_Linear_B_Syllabary -435: In_Linear_B_Ideograms -436: In_Aegean_Numbers -437: In_Ancient_Greek_Numbers -438: In_Ancient_Symbols -439: In_Phaistos_Disc -440: In_Lycian -441: In_Carian -442: In_Coptic_Epact_Numbers -443: In_Old_Italic -444: In_Gothic -445: In_Old_Permic -446: In_Ugaritic -447: In_Old_Persian -448: In_Deseret -449: In_Shavian -450: In_Osmanya -451: In_Osage -452: In_Elbasan -453: In_Caucasian_Albanian -454: In_Linear_A -455: In_Cypriot_Syllabary -456: In_Imperial_Aramaic -457: In_Palmyrene -458: In_Nabataean -459: In_Hatran -460: In_Phoenician -461: In_Lydian -462: In_Meroitic_Hieroglyphs -463: In_Meroitic_Cursive -464: In_Kharoshthi -465: In_Old_South_Arabian -466: In_Old_North_Arabian -467: In_Manichaean -468: In_Avestan -469: In_Inscriptional_Parthian -470: In_Inscriptional_Pahlavi -471: In_Psalter_Pahlavi -472: In_Old_Turkic -473: In_Old_Hungarian -474: In_Hanifi_Rohingya -475: In_Rumi_Numeral_Symbols -476: In_Yezidi -477: In_Old_Sogdian -478: In_Sogdian -479: In_Chorasmian -480: In_Elymaic -481: In_Brahmi -482: In_Kaithi -483: In_Sora_Sompeng -484: In_Chakma -485: In_Mahajani -486: In_Sharada -487: In_Sinhala_Archaic_Numbers -488: In_Khojki -489: In_Multani -490: In_Khudawadi -491: In_Grantha -492: In_Newa -493: In_Tirhuta -494: In_Siddham -495: In_Modi -496: In_Mongolian_Supplement -497: In_Takri -498: In_Ahom -499: In_Dogra -500: In_Warang_Citi -501: In_Dives_Akuru -502: In_Nandinagari -503: In_Zanabazar_Square -504: In_Soyombo -505: In_Pau_Cin_Hau -506: In_Bhaiksuki -507: In_Marchen -508: In_Masaram_Gondi -509: In_Gunjala_Gondi -510: In_Makasar -511: In_Lisu_Supplement -512: In_Tamil_Supplement -513: In_Cuneiform -514: In_Cuneiform_Numbers_and_Punctuation -515: In_Early_Dynastic_Cuneiform -516: In_Egyptian_Hieroglyphs -517: In_Egyptian_Hieroglyph_Format_Controls -518: In_Anatolian_Hieroglyphs -519: In_Bamum_Supplement -520: In_Mro -521: In_Bassa_Vah -522: In_Pahawh_Hmong -523: In_Medefaidrin -524: In_Miao -525: In_Ideographic_Symbols_and_Punctuation -526: In_Tangut -527: In_Tangut_Components -528: In_Khitan_Small_Script -529: In_Tangut_Supplement -530: In_Kana_Supplement -531: In_Kana_Extended_A -532: In_Small_Kana_Extension -533: In_Nushu -534: In_Duployan -535: In_Shorthand_Format_Controls -536: In_Byzantine_Musical_Symbols -537: In_Musical_Symbols -538: In_Ancient_Greek_Musical_Notation -539: In_Mayan_Numerals -540: In_Tai_Xuan_Jing_Symbols -541: In_Counting_Rod_Numerals -542: In_Mathematical_Alphanumeric_Symbols -543: In_Sutton_SignWriting -544: In_Glagolitic_Supplement -545: In_Nyiakeng_Puachue_Hmong -546: In_Wancho -547: In_Mende_Kikakui -548: In_Adlam -549: In_Indic_Siyaq_Numbers -550: In_Ottoman_Siyaq_Numbers -551: In_Arabic_Mathematical_Alphabetic_Symbols -552: In_Mahjong_Tiles -553: In_Domino_Tiles -554: In_Playing_Cards -555: In_Enclosed_Alphanumeric_Supplement -556: In_Enclosed_Ideographic_Supplement -557: In_Miscellaneous_Symbols_and_Pictographs -558: In_Emoticons -559: In_Ornamental_Dingbats -560: In_Transport_and_Map_Symbols -561: In_Alchemical_Symbols -562: In_Geometric_Shapes_Extended -563: In_Supplemental_Arrows_C -564: In_Supplemental_Symbols_and_Pictographs -565: In_Chess_Symbols -566: In_Symbols_and_Pictographs_Extended_A -567: In_Symbols_for_Legacy_Computing -568: In_CJK_Unified_Ideographs_Extension_B -569: In_CJK_Unified_Ideographs_Extension_C -570: In_CJK_Unified_Ideographs_Extension_D -571: In_CJK_Unified_Ideographs_Extension_E -572: In_CJK_Unified_Ideographs_Extension_F -573: In_CJK_Compatibility_Ideographs_Supplement -574: In_CJK_Unified_Ideographs_Extension_G -575: In_Tags -576: In_Variation_Selectors_Supplement -577: In_Supplementary_Private_Use_Area_A -578: In_Supplementary_Private_Use_Area_B -579: In_No_Block +ASCII_Hex_Digit +Adlam +Ahom +Alphabetic +Anatolian_Hieroglyphs +Any +Arabic +Armenian +Assigned +Avestan +Balinese +Bamum +Bassa_Vah +Batak +Bengali +Bhaiksuki +Bidi_Control +Bopomofo +Brahmi +Braille +Buginese +Buhid +C +Canadian_Aboriginal +Carian +Case_Ignorable +Cased +Caucasian_Albanian +Cc +Cf +Chakma +Cham +Changes_When_Casefolded +Changes_When_Casemapped +Changes_When_Lowercased +Changes_When_Titlecased +Changes_When_Uppercased +Cherokee +Chorasmian +Cn +Co +Common +Coptic +Cs +Cuneiform +Cypriot +Cyrillic +Dash +Default_Ignorable_Code_Point +Deprecated +Deseret +Devanagari +Diacritic +Dives_Akuru +Dogra +Duployan +Egyptian_Hieroglyphs +Elbasan +Elymaic +Emoji +Emoji_Component +Emoji_Modifier +Emoji_Modifier_Base +Emoji_Presentation +Ethiopic +Extended_Pictographic +Extender +Georgian +Glagolitic +Gothic +Grantha +Grapheme_Base +Grapheme_Extend +Grapheme_Link +Greek +Gujarati +Gunjala_Gondi +Gurmukhi +Han +Hangul +Hanifi_Rohingya +Hanunoo +Hatran +Hebrew +Hex_Digit +Hiragana +Hyphen +IDS_Binary_Operator +IDS_Trinary_Operator +ID_Continue +ID_Start +Ideographic +Imperial_Aramaic +Inherited +Inscriptional_Pahlavi +Inscriptional_Parthian +Javanese +Join_Control +Kaithi +Kannada +Katakana +Kayah_Li +Kharoshthi +Khitan_Small_Script +Khmer +Khojki +Khudawadi +L +LC +Lao +Latin +Lepcha +Limbu +Linear_A +Linear_B +Lisu +Ll +Lm +Lo +Logical_Order_Exception +Lowercase +Lt +Lu +Lycian +Lydian +M +Mahajani +Makasar +Malayalam +Mandaic +Manichaean +Marchen +Masaram_Gondi +Math +Mc +Me +Medefaidrin +Meetei_Mayek +Mende_Kikakui +Meroitic_Cursive +Meroitic_Hieroglyphs +Miao +Mn +Modi +Mongolian +Mro +Multani +Myanmar +N +Nabataean +Nandinagari +Nd +New_Tai_Lue +Newa +Nko +Nl +No +Noncharacter_Code_Point +Nushu +Nyiakeng_Puachue_Hmong +Ogham +Ol_Chiki +Old_Hungarian +Old_Italic +Old_North_Arabian +Old_Permic +Old_Persian +Old_Sogdian +Old_South_Arabian +Old_Turkic +Oriya +Osage +Osmanya +Other_Alphabetic +Other_Default_Ignorable_Code_Point +Other_Grapheme_Extend +Other_ID_Continue +Other_ID_Start +Other_Lowercase +Other_Math +Other_Uppercase +P +Pahawh_Hmong +Palmyrene +Pattern_Syntax +Pattern_White_Space +Pau_Cin_Hau +Pc +Pd +Pe +Pf +Phags_Pa +Phoenician +Pi +Po +Prepended_Concatenation_Mark +Ps +Psalter_Pahlavi +Quotation_Mark +Radical +Regional_Indicator +Rejang +Runic +S +Samaritan +Saurashtra +Sc +Sentence_Terminal +Sharada +Shavian +Siddham +SignWriting +Sinhala +Sk +Sm +So +Soft_Dotted +Sogdian +Sora_Sompeng +Soyombo +Sundanese +Syloti_Nagri +Syriac +Tagalog +Tagbanwa +Tai_Le +Tai_Tham +Tai_Viet +Takri +Tamil +Tangut +Telugu +Terminal_Punctuation +Thaana +Thai +Tibetan +Tifinagh +Tirhuta +Ugaritic +Unified_Ideograph +Unknown +Uppercase +Vai +Variation_Selector +Wancho +Warang_Citi +White_Space +XID_Continue +XID_Start +Yezidi +Yi +Z +Zanabazar_Square +Zl +Zp +Zs +Adlm +Aghb +AHex +Arab +Armi +Armn +Avst +Bali +Bamu +Bass +Batk +Beng +Bhks +Bidi_C +Bopo +Brah +Brai +Bugi +Buhd +Cakm +Cans +Cari +Cased_Letter +Cher +Chrs +CI +Close_Punctuation +Combining_Mark +Connector_Punctuation +Control +Copt +Cprt +Currency_Symbol +CWCF +CWCM +CWL +CWT +CWU +Cyrl +Dash_Punctuation +Decimal_Number +Dep +Deva +DI +Dia +Diak +Dogr +Dsrt +Dupl +EBase +EComp +Egyp +Elba +Elym +EMod +Enclosing_Mark +EPres +Ethi +Ext +ExtPict +Final_Punctuation +Format +Geor +Glag +Gong +Gonm +Goth +Gran +Gr_Base +Grek +Gr_Ext +Gr_Link +Gujr +Guru +Hang +Hani +Hano +Hatr +Hebr +Hex +Hira +Hluw +Hmng +Hmnp +Hung +IDC +Ideo +IDS +IDSB +IDST +Initial_Punctuation +Ital +Java +Join_C +Kali +Kana +Khar +Khmr +Khoj +Kits +Knda +Kthi +Lana +Laoo +Latn +Lepc +Letter +Letter_Number +Limb +Lina +Linb +Line_Separator +LOE +Lowercase_Letter +Lyci +Lydi +Mahj +Maka +Mand +Mani +Marc +Mark +Math_Symbol +Medf +Mend +Merc +Mero +Mlym +Modifier_Letter +Modifier_Symbol +Mong +Mroo +Mtei +Mult +Mymr +Nand +Narb +Nbat +NChar +Nkoo +Nonspacing_Mark +Nshu +Number +OAlpha +ODI +Ogam +OGr_Ext +OIDC +OIDS +Olck +OLower +OMath +Open_Punctuation +Orkh +Orya +Osge +Osma +Other +Other_Letter +Other_Number +Other_Punctuation +Other_Symbol +OUpper +Palm +Paragraph_Separator +Pat_Syn +Pat_WS +Pauc +PCM +Perm +Phag +Phli +Phlp +Phnx +Plrd +Private_Use +Prti +Punctuation +Qaac +Qaai +QMark +RI +Rjng +Rohg +Runr +Samr +Sarb +Saur +SD +Separator +Sgnw +Shaw +Shrd +Sidd +Sind +Sinh +Sogd +Sogo +Sora +Soyo +Space_Separator +Spacing_Mark +STerm +Sund +Surrogate +Sylo +Symbol +Syrc +Tagb +Takr +Tale +Talu +Taml +Tang +Tavt +Telu +Term +Tfng +Tglg +Thaa +Tibt +Tirh +Titlecase_Letter +Ugar +UIdeo +Unassigned +Uppercase_Letter +Vaii +VS +Wara +Wcho +WSpace +XIDC +XIDS +Xpeo +Xsux +Yezi +Yiii +Zanb +Zinh +Zyyy +Zzzz +In_Basic_Latin +In_Latin_1_Supplement +In_Latin_Extended_A +In_Latin_Extended_B +In_IPA_Extensions +In_Spacing_Modifier_Letters +In_Combining_Diacritical_Marks +In_Greek_and_Coptic +In_Cyrillic +In_Cyrillic_Supplement +In_Armenian +In_Hebrew +In_Arabic +In_Syriac +In_Arabic_Supplement +In_Thaana +In_NKo +In_Samaritan +In_Mandaic +In_Syriac_Supplement +In_Arabic_Extended_A +In_Devanagari +In_Bengali +In_Gurmukhi +In_Gujarati +In_Oriya +In_Tamil +In_Telugu +In_Kannada +In_Malayalam +In_Sinhala +In_Thai +In_Lao +In_Tibetan +In_Myanmar +In_Georgian +In_Hangul_Jamo +In_Ethiopic +In_Ethiopic_Supplement +In_Cherokee +In_Unified_Canadian_Aboriginal_Syllabics +In_Ogham +In_Runic +In_Tagalog +In_Hanunoo +In_Buhid +In_Tagbanwa +In_Khmer +In_Mongolian +In_Unified_Canadian_Aboriginal_Syllabics_Extended +In_Limbu +In_Tai_Le +In_New_Tai_Lue +In_Khmer_Symbols +In_Buginese +In_Tai_Tham +In_Combining_Diacritical_Marks_Extended +In_Balinese +In_Sundanese +In_Batak +In_Lepcha +In_Ol_Chiki +In_Cyrillic_Extended_C +In_Georgian_Extended +In_Sundanese_Supplement +In_Vedic_Extensions +In_Phonetic_Extensions +In_Phonetic_Extensions_Supplement +In_Combining_Diacritical_Marks_Supplement +In_Latin_Extended_Additional +In_Greek_Extended +In_General_Punctuation +In_Superscripts_and_Subscripts +In_Currency_Symbols +In_Combining_Diacritical_Marks_for_Symbols +In_Letterlike_Symbols +In_Number_Forms +In_Arrows +In_Mathematical_Operators +In_Miscellaneous_Technical +In_Control_Pictures +In_Optical_Character_Recognition +In_Enclosed_Alphanumerics +In_Box_Drawing +In_Block_Elements +In_Geometric_Shapes +In_Miscellaneous_Symbols +In_Dingbats +In_Miscellaneous_Mathematical_Symbols_A +In_Supplemental_Arrows_A +In_Braille_Patterns +In_Supplemental_Arrows_B +In_Miscellaneous_Mathematical_Symbols_B +In_Supplemental_Mathematical_Operators +In_Miscellaneous_Symbols_and_Arrows +In_Glagolitic +In_Latin_Extended_C +In_Coptic +In_Georgian_Supplement +In_Tifinagh +In_Ethiopic_Extended +In_Cyrillic_Extended_A +In_Supplemental_Punctuation +In_CJK_Radicals_Supplement +In_Kangxi_Radicals +In_Ideographic_Description_Characters +In_CJK_Symbols_and_Punctuation +In_Hiragana +In_Katakana +In_Bopomofo +In_Hangul_Compatibility_Jamo +In_Kanbun +In_Bopomofo_Extended +In_CJK_Strokes +In_Katakana_Phonetic_Extensions +In_Enclosed_CJK_Letters_and_Months +In_CJK_Compatibility +In_CJK_Unified_Ideographs_Extension_A +In_Yijing_Hexagram_Symbols +In_CJK_Unified_Ideographs +In_Yi_Syllables +In_Yi_Radicals +In_Lisu +In_Vai +In_Cyrillic_Extended_B +In_Bamum +In_Modifier_Tone_Letters +In_Latin_Extended_D +In_Syloti_Nagri +In_Common_Indic_Number_Forms +In_Phags_pa +In_Saurashtra +In_Devanagari_Extended +In_Kayah_Li +In_Rejang +In_Hangul_Jamo_Extended_A +In_Javanese +In_Myanmar_Extended_B +In_Cham +In_Myanmar_Extended_A +In_Tai_Viet +In_Meetei_Mayek_Extensions +In_Ethiopic_Extended_A +In_Latin_Extended_E +In_Cherokee_Supplement +In_Meetei_Mayek +In_Hangul_Syllables +In_Hangul_Jamo_Extended_B +In_High_Surrogates +In_High_Private_Use_Surrogates +In_Low_Surrogates +In_Private_Use_Area +In_CJK_Compatibility_Ideographs +In_Alphabetic_Presentation_Forms +In_Arabic_Presentation_Forms_A +In_Variation_Selectors +In_Vertical_Forms +In_Combining_Half_Marks +In_CJK_Compatibility_Forms +In_Small_Form_Variants +In_Arabic_Presentation_Forms_B +In_Halfwidth_and_Fullwidth_Forms +In_Specials +In_Linear_B_Syllabary +In_Linear_B_Ideograms +In_Aegean_Numbers +In_Ancient_Greek_Numbers +In_Ancient_Symbols +In_Phaistos_Disc +In_Lycian +In_Carian +In_Coptic_Epact_Numbers +In_Old_Italic +In_Gothic +In_Old_Permic +In_Ugaritic +In_Old_Persian +In_Deseret +In_Shavian +In_Osmanya +In_Osage +In_Elbasan +In_Caucasian_Albanian +In_Linear_A +In_Cypriot_Syllabary +In_Imperial_Aramaic +In_Palmyrene +In_Nabataean +In_Hatran +In_Phoenician +In_Lydian +In_Meroitic_Hieroglyphs +In_Meroitic_Cursive +In_Kharoshthi +In_Old_South_Arabian +In_Old_North_Arabian +In_Manichaean +In_Avestan +In_Inscriptional_Parthian +In_Inscriptional_Pahlavi +In_Psalter_Pahlavi +In_Old_Turkic +In_Old_Hungarian +In_Hanifi_Rohingya +In_Rumi_Numeral_Symbols +In_Yezidi +In_Old_Sogdian +In_Sogdian +In_Chorasmian +In_Elymaic +In_Brahmi +In_Kaithi +In_Sora_Sompeng +In_Chakma +In_Mahajani +In_Sharada +In_Sinhala_Archaic_Numbers +In_Khojki +In_Multani +In_Khudawadi +In_Grantha +In_Newa +In_Tirhuta +In_Siddham +In_Modi +In_Mongolian_Supplement +In_Takri +In_Ahom +In_Dogra +In_Warang_Citi +In_Dives_Akuru +In_Nandinagari +In_Zanabazar_Square +In_Soyombo +In_Pau_Cin_Hau +In_Bhaiksuki +In_Marchen +In_Masaram_Gondi +In_Gunjala_Gondi +In_Makasar +In_Lisu_Supplement +In_Tamil_Supplement +In_Cuneiform +In_Cuneiform_Numbers_and_Punctuation +In_Early_Dynastic_Cuneiform +In_Egyptian_Hieroglyphs +In_Egyptian_Hieroglyph_Format_Controls +In_Anatolian_Hieroglyphs +In_Bamum_Supplement +In_Mro +In_Bassa_Vah +In_Pahawh_Hmong +In_Medefaidrin +In_Miao +In_Ideographic_Symbols_and_Punctuation +In_Tangut +In_Tangut_Components +In_Khitan_Small_Script +In_Tangut_Supplement +In_Kana_Supplement +In_Kana_Extended_A +In_Small_Kana_Extension +In_Nushu +In_Duployan +In_Shorthand_Format_Controls +In_Byzantine_Musical_Symbols +In_Musical_Symbols +In_Ancient_Greek_Musical_Notation +In_Mayan_Numerals +In_Tai_Xuan_Jing_Symbols +In_Counting_Rod_Numerals +In_Mathematical_Alphanumeric_Symbols +In_Sutton_SignWriting +In_Glagolitic_Supplement +In_Nyiakeng_Puachue_Hmong +In_Wancho +In_Mende_Kikakui +In_Adlam +In_Indic_Siyaq_Numbers +In_Ottoman_Siyaq_Numbers +In_Arabic_Mathematical_Alphabetic_Symbols +In_Mahjong_Tiles +In_Domino_Tiles +In_Playing_Cards +In_Enclosed_Alphanumeric_Supplement +In_Enclosed_Ideographic_Supplement +In_Miscellaneous_Symbols_and_Pictographs +In_Emoticons +In_Ornamental_Dingbats +In_Transport_and_Map_Symbols +In_Alchemical_Symbols +In_Geometric_Shapes_Extended +In_Supplemental_Arrows_C +In_Supplemental_Symbols_and_Pictographs +In_Chess_Symbols +In_Symbols_and_Pictographs_Extended_A +In_Symbols_for_Legacy_Computing +In_CJK_Unified_Ideographs_Extension_B +In_CJK_Unified_Ideographs_Extension_C +In_CJK_Unified_Ideographs_Extension_D +In_CJK_Unified_Ideographs_Extension_E +In_CJK_Unified_Ideographs_Extension_F +In_CJK_Compatibility_Ideographs_Supplement +In_CJK_Unified_Ideographs_Extension_G +In_Tags +In_Variation_Selectors_Supplement +In_Supplementary_Private_Use_Area_A +In_Supplementary_Private_Use_Area_B +In_No_Block diff --git a/harnesses/base.c b/harnesses/base.c index a88e6f2..1206217 100644 --- a/harnesses/base.c +++ b/harnesses/base.c @@ -10,16 +10,32 @@ #include #include #include - #include "oniguruma.h" #define PARSE_DEPTH_LIMIT 8 -#define RETRY_LIMIT 5000 #define CALL_MAX_NEST_LEVEL 8 +#define SUBEXP_CALL_LIMIT 500 +#define BASE_RETRY_LIMIT 20000 +#define BASE_LENGTH 2048 +#define MATCH_STACK_LIMIT 10000000 +#define MAX_REM_SIZE 1048576 +#define MAX_SLOW_REM_SIZE 1024 +#define SLOW_RETRY_LIMIT 2000 + //#define EXEC_PRINT_INTERVAL 500000 //#define DUMP_DATA_INTERVAL 100000 //#define STAT_PATH "fuzzer.stat_log" +#define OPTIONS_AT_COMPILE (ONIG_OPTION_IGNORECASE | ONIG_OPTION_EXTEND | ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE | ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY | ONIG_OPTION_NEGATE_SINGLELINE | ONIG_OPTION_DONT_CAPTURE_GROUP | ONIG_OPTION_CAPTURE_GROUP | ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII | ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER | ONIG_OPTION_TEXT_SEGMENT_WORD ) + +#define OPTIONS_AT_RUNTIME (ONIG_OPTION_NOTBOL | ONIG_OPTION_NOTEOL | ONIG_OPTION_CHECK_VALIDITY_OF_STRING | ONIG_OPTION_NOT_BEGIN_STRING | ONIG_OPTION_NOT_END_STRING | ONIG_OPTION_NOT_BEGIN_POSITION) + + +#define ADJUST_LEN(enc, len) do {\ + int mlen = ONIGENC_MBC_MINLEN(enc);\ + if (mlen != 1) { len -= len % mlen; }\ +} while (0) + typedef unsigned char uint8_t; #ifdef DUMP_INPUT @@ -103,14 +119,34 @@ output_current_time(FILE* fp) #endif static int -search(regex_t* reg, unsigned char* str, unsigned char* end, int backward) +search(regex_t* reg, unsigned char* str, unsigned char* end, OnigOptionType options, int backward, int sl) { int r; unsigned char *start, *range; OnigRegion *region; + unsigned int retry_limit; + size_t len; region = onig_region_new(); + len = (size_t )(end - str); + if (len < BASE_LENGTH) { + if (sl >= 2) + retry_limit = (unsigned int )SLOW_RETRY_LIMIT; + else + retry_limit = (unsigned int )BASE_RETRY_LIMIT; + } + else + retry_limit = (unsigned int )(BASE_RETRY_LIMIT * BASE_LENGTH / len); + +#ifdef STANDALONE + fprintf(stdout, "retry limit: %u\n", retry_limit); +#endif + + onig_set_retry_limit_in_search(retry_limit); + onig_set_match_stack_limit_size(MATCH_STACK_LIMIT); + onig_set_subexp_call_limit_in_search(SUBEXP_CALL_LIMIT); + if (backward != 0) { start = end; range = str; @@ -120,7 +156,7 @@ search(regex_t* reg, unsigned char* str, unsigned char* end, int backward) range = end; } - r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + r = onig_search(reg, str, end, start, range, region, (options & OPTIONS_AT_RUNTIME)); if (r >= 0) { #ifdef STANDALONE int i; @@ -168,7 +204,8 @@ static long VALID_STRING_COUNT; static int exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, - char* apattern, char* apattern_end, char* astr, UChar* end, int backward) + char* apattern, char* apattern_end, char* astr, UChar* end, int backward, + int sl) { int r; regex_t* reg; @@ -181,14 +218,13 @@ exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, EXEC_COUNT_INTERVAL++; onig_initialize(&enc, 1); - onig_set_retry_limit_in_search(RETRY_LIMIT); #ifdef PARSE_DEPTH_LIMIT onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT); #endif onig_set_subexp_call_max_nest_level(CALL_MAX_NEST_LEVEL); r = onig_new(®, pattern, pattern_end, - options, enc, syntax, &einfo); + (options & OPTIONS_AT_COMPILE), enc, syntax, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str((UChar* )s, r, &einfo); @@ -208,12 +244,12 @@ exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, } REGEX_SUCCESS_COUNT++; - r = search(reg, pattern, pattern_end, backward); + r = search(reg, pattern, pattern_end, options, backward, sl); if (r == -2) return -2; if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { VALID_STRING_COUNT++; - r = search(reg, str, end, backward); + r = search(reg, str, end, options, backward, sl); if (r == -2) return -2; } @@ -224,43 +260,52 @@ exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, static int alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, - int backward, int pattern_size, size_t remaining_size, unsigned char *data) + int backward, int pattern_size, size_t rem_size, unsigned char *data) { + extern int onig_detect_can_be_slow_pattern(const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax); + int r; + int sl; + unsigned char *pattern; unsigned char *pattern_end; unsigned char *str_null_end; - // copy first PATTERN_SIZE bytes off to be the pattern - unsigned char *pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1); + pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1); memcpy(pattern, data, pattern_size); pattern_end = pattern + pattern_size; data += pattern_size; - remaining_size -= pattern_size; + rem_size -= pattern_size; + + if (rem_size > MAX_REM_SIZE) rem_size = MAX_REM_SIZE; + + sl = onig_detect_can_be_slow_pattern(pattern, pattern_end, options, enc, syntax); + if (sl > 0) { + if (rem_size > MAX_SLOW_REM_SIZE) + rem_size = MAX_SLOW_REM_SIZE; + } -#if defined(UTF16_BE) || defined(UTF16_LE) - if (remaining_size % 2 == 1) remaining_size--; + ADJUST_LEN(enc, rem_size); +#ifdef STANDALONE + fprintf(stdout, "rem_size: %ld\n", rem_size); #endif - unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1); - memcpy(str, data, remaining_size); - str_null_end = str + remaining_size; + unsigned char *str = (unsigned char*)malloc(rem_size != 0 ? rem_size : 1); + memcpy(str, data, rem_size); + str_null_end = str + rem_size; r = exec(enc, options, syntax, (char *)pattern, (char *)pattern_end, - (char *)str, str_null_end, backward); + (char *)str, str_null_end, backward, sl); free(pattern); free(str); return r; } -#define OPTIONS_MASK (ONIG_OPTION_IGNORECASE | ONIG_OPTION_EXTEND | ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE | ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY | ONIG_OPTION_NEGATE_SINGLELINE | ONIG_OPTION_DONT_CAPTURE_GROUP | ONIG_OPTION_CAPTURE_GROUP) - - #ifdef SYNTAX_TEST -#define NUM_CONTROL_BYTES 6 +#define NUM_CONTROL_BYTES 7 #else -#define NUM_CONTROL_BYTES 5 +#define NUM_CONTROL_BYTES 6 #endif int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) @@ -285,14 +330,14 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) ONIG_ENCODING_CP1251, ONIG_ENCODING_BIG5, ONIG_ENCODING_GB18030, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF16_BE, + ONIG_ENCODING_UTF16_LE, + ONIG_ENCODING_UTF16_BE, + ONIG_ENCODING_UTF16_LE, + ONIG_ENCODING_UTF32_BE, + ONIG_ENCODING_UTF32_LE, + ONIG_ENCODING_UTF32_BE, + ONIG_ENCODING_UTF32_LE, ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_ISO_8859_2, ONIG_ENCODING_ISO_8859_3, @@ -341,7 +386,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) int r; int backward; int pattern_size; - size_t remaining_size; + size_t rem_size; unsigned char *data; unsigned char pattern_size_choice; OnigOptionType options; @@ -364,7 +409,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) if (Size < NUM_CONTROL_BYTES) return 0; - remaining_size = Size; + rem_size = Size; data = (unsigned char* )(Data); #ifdef UTF16_BE @@ -375,7 +420,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) #else encoding_choice = data[0]; data++; - remaining_size--; + rem_size--; int num_encodings = sizeof(encodings)/sizeof(encodings[0]); enc = encodings[encoding_choice % num_encodings]; @@ -385,7 +430,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) #ifdef SYNTAX_TEST syntax_choice = data[0]; data++; - remaining_size--; + rem_size--; int num_syntaxes = sizeof(syntaxes)/sizeof(syntaxes[0]); syntax = syntaxes[syntax_choice % num_syntaxes]; @@ -393,31 +438,30 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) syntax = ONIG_SYNTAX_DEFAULT; #endif - if ((data[1] & 0xc0) == 0) - options = (data[0] | (data[1] << 8)) & OPTIONS_MASK; + if ((data[2] & 0xc0) == 0) + options = data[0] | (data[1] << 8) | (data[2] << 16); else options = data[0] & ONIG_OPTION_IGNORECASE; - data++; - remaining_size--; - data++; - remaining_size--; + data++; rem_size--; + data++; rem_size--; + data++; rem_size--; pattern_size_choice = data[0]; - data++; - remaining_size--; + data++; rem_size--; backward = (data[0] == 0xbb); - data++; - remaining_size--; + data++; rem_size--; - if (remaining_size == 0) + if (backward != 0) { + options = options & ~ONIG_OPTION_FIND_LONGEST; + } + + if (rem_size == 0) pattern_size = 0; else { - pattern_size = (int )pattern_size_choice % remaining_size; -#if defined(UTF16_BE) || defined(UTF16_LE) - if (pattern_size % 2 == 1) pattern_size--; -#endif + pattern_size = (int )pattern_size_choice % rem_size; + ADJUST_LEN(enc, pattern_size); } #ifdef STANDALONE @@ -440,7 +484,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) #endif r = alloc_exec(enc, options, syntax, backward, pattern_size, - remaining_size, data); + rem_size, data); if (r == -2) exit(-2); #ifndef STANDALONE @@ -485,15 +529,25 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) #ifdef STANDALONE +#define MAX_INPUT_DATA_SIZE 4194304 + extern int main(int argc, char* argv[]) { + size_t max_size; size_t n; - uint8_t Data[10000]; + uint8_t Data[MAX_INPUT_DATA_SIZE]; - n = read(0, Data, sizeof(Data)); - fprintf(stdout, "n: %ld\n", n); - LLVMFuzzerTestOneInput(Data, n); + if (argc > 1) { + max_size = (size_t )atoi(argv[1]); + } + else { + max_size = sizeof(Data); + } + n = read(0, Data, max_size); + fprintf(stdout, "read size: %ld, max_size: %ld\n", n, max_size); + + LLVMFuzzerTestOneInput(Data, n); return 0; } #endif /* STANDALONE */ diff --git a/harnesses/makefile b/harnesses/makefile index b324295..d4fcfb6 100644 --- a/harnesses/makefile +++ b/harnesses/makefile @@ -1,4 +1,8 @@ # makefile for harness +DEBUG_OUT = +#DEBUG_OUT = -DONIG_DEBUG_PARSE -DONIG_DEBUG_COMPILE +#DEBUG_OUT = -DONIG_DEBUG_PARSE -DONIG_DEBUG_COMPILE -DONIG_DEBUG_MATCH_COUNTER + SRC = ../src CFLAGS = -I$(SRC) -Wall -g -fsanitize=fuzzer,address -fno-omit-frame-pointer CFLAGS_M = -I$(SRC) -Wall -g -fsanitize=fuzzer-no-link,address -fno-omit-frame-pointer -DSTANDALONE @@ -12,7 +16,8 @@ TARGETS = fuzzer-encode fuzzer-syntax fuzzer-utf16-be fuzzer-utf16-le \ OTHER_TARGETS = libfuzzer-onig libfuzzer-onig-full fuzzer-deluxe read-deluxe -default: $(TARGETS) +#default: $(TARGETS) +default: read-syntax fuzzer-encode: base.c $(ONIG_LIB) clang $(CFLAGS) $< $(LIBS) -o $@ @@ -60,7 +65,7 @@ libfuzzer-onig-full: libfuzzer-onig.cpp $(ONIG_LIB) $(ONIG_LIB): cd ..; make clean #cd ..; autoreconf -vfi - cd ..; ./configure CC=clang LD=clang CFLAGS="-g -fsanitize=address -fno-omit-frame-pointer" LDFLAGS="-g -fsanitize=address -fno-omit-frame-pointer" + cd ..; ./configure CC=clang LD=clang CFLAGS="-g -fsanitize=address -fno-omit-frame-pointer $(DEBUG_OUT)" LDFLAGS="-g -fsanitize=address -fno-omit-frame-pointer" cd ..; make -j4 diff --git a/sample/Makefile.am b/sample/Makefile.am index 2bf4697..c2c4596 100644 --- a/sample/Makefile.am +++ b/sample/Makefile.am @@ -8,9 +8,9 @@ AM_LDFLAGS = -L$(prefix)/lib AM_CPPFLAGS = -I$(top_srcdir)/src if ENABLE_POSIX_API -TESTS = encode listcap names posix simple sql syntax user_property callout echo count bug_fix regset +TESTS = encode listcap names posix simple sql syntax user_property callout echo count bug_fix regset scan else -TESTS = encode listcap names simple sql syntax user_property callout echo count bug_fix regset +TESTS = encode listcap names simple sql syntax user_property callout echo count bug_fix regset scan endif check_PROGRAMS = $(TESTS) @@ -28,6 +28,7 @@ echo_SOURCES = echo.c count_SOURCES = count.c bug_fix = bug_fix.c regset_SOURCES = regset.c +scan_SOURCES = scan.c sampledir = . @@ -47,3 +48,4 @@ endif $(sampledir)/count $(sampledir)/bug_fix $(sampledir)/regset + $(sampledir)/scan diff --git a/sample/scan.c b/sample/scan.c index 4039e46..fe1bac1 100644 --- a/sample/scan.c +++ b/sample/scan.c @@ -21,14 +21,14 @@ scan_callback(int n, int r, OnigRegion* region, void* arg) } static int -scan(regex_t* reg, unsigned char* str, unsigned char* end) +scan(regex_t* reg, OnigOptionType options, unsigned char* str, unsigned char* end) { int r; OnigRegion *region; region = onig_region_new(); - r = onig_scan(reg, str, end, region, ONIG_OPTION_NONE, scan_callback, NULL); + r = onig_scan(reg, str, end, region, options, scan_callback, NULL); if (r >= 0) { fprintf(stdout, "total: %d match\n", r); } @@ -45,7 +45,7 @@ scan(regex_t* reg, unsigned char* str, unsigned char* end) } static int -exec(OnigEncoding enc, OnigOptionType options, char* apattern, char* astr) +exec(OnigEncoding enc, OnigOptionType options, OnigOptionType runtime_options, char* apattern, char* astr) { int r; unsigned char *end; @@ -69,7 +69,7 @@ exec(OnigEncoding enc, OnigOptionType options, char* apattern, char* astr) } end = str + onigenc_str_bytelen_null(enc, str); - r = scan(reg, str, end); + r = scan(reg, runtime_options, str, end); onig_free(reg); onig_end(); @@ -79,11 +79,23 @@ exec(OnigEncoding enc, OnigOptionType options, char* apattern, char* astr) extern int main(int argc, char* argv[]) { - exec(ONIG_ENCODING_UTF8, ONIG_OPTION_NONE, + exec(ONIG_ENCODING_UTF8, ONIG_OPTION_NONE, ONIG_OPTION_NONE, "\\Ga+\\s*", "a aa aaa baaa"); + fprintf(stdout, "\n"); + + exec(ONIG_ENCODING_UTF8, ONIG_OPTION_NONE, ONIG_OPTION_NOT_BEGIN_POSITION, + "\\Ga+\\s*", "a aa aaa baaa"); + fprintf(stdout, "\n"); + exec(ONIG_ENCODING_UTF8, ONIG_OPTION_NONE, ONIG_OPTION_NONE, + "(?!\\G)a+\\s*", "a aa aaa baaa"); fprintf(stdout, "\n"); - exec(ONIG_ENCODING_UTF8, ONIG_OPTION_NONE, + + exec(ONIG_ENCODING_UTF8, ONIG_OPTION_NONE, ONIG_OPTION_NOT_BEGIN_POSITION, + "(?!\\G)a+\\s*", "a aa aaa baaa"); + fprintf(stdout, "\n"); + + exec(ONIG_ENCODING_UTF8, ONIG_OPTION_NONE, ONIG_OPTION_NONE, "a+\\s*", "a aa aaa baaa"); return 0; diff --git a/src/Makefile.am b/src/Makefile.am index 36c2222..44a4167 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -11,10 +11,15 @@ posix_headers = onigposix.h if ENABLE_POSIX_API posix_sources = regposix.c regposerr.c include_HEADERS += $(posix_headers) +AM_CFLAGS += -DUSE_POSIX_API else posix_sources = endif +if ENABLE_BINARY_COMPATIBLE_POSIX_API +AM_CFLAGS += -DUSE_BINARY_COMPATIBLE_POSIX_API +endif + lib_LTLIBRARIES = $(libname) @@ -45,13 +50,29 @@ libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \ gb18030.c koi8_r.c cp1251.c \ onig_init.c -libonig_la_LDFLAGS = -version-info $(LTVERSION) - EXTRA_DIST = koi8.c mktable.c \ unicode_fold_data.c unicode_property_data.c \ unicode_property_data_posix.c \ unicode_egcb_data.c unicode_wb_data.c + +libonig_la_LDFLAGS = $(EXTRA_LIBONIG_LDFLAGS) -version-info $(LTVERSION) + +if USE_LIBONIG_DEF_FILE + +libonig_la_LDFLAGS += -Wl,--output-def,$(LIBONIG_DEF_FILE) + +install-data-hook: + echo "$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_DATA) $(LIBONIG_DEF_FILE) $(DESTDIR)$(libdir)"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_DATA) $(LIBONIG_DEF_FILE) $(DESTDIR)$(libdir) || exit 1 + +uninstall-hook: + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$(LIBONIG_DEF_FILE)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$(LIBONIG_DEF_FILE)" + +endif + + dll: $(CXX) -shared -Wl,--output-def,libonig.def -o libonig.dll *.o \ $(LIBS) diff --git a/src/Makefile.windows b/src/Makefile.windows index 90ebf28..11d6fd8 100644 --- a/src/Makefile.windows +++ b/src/Makefile.windows @@ -18,7 +18,7 @@ LINKFLAGS = -link -incremental:no -pdb:none INSTALL = install -c CP = copy CC = cl -DEFS = -DHAVE_CONFIG_H +DEFS = -DHAVE_CONFIG_H -DUSE_POSIX_API -DUSE_BINARY_COMPATIBLE_POSIX_API subdirs = diff --git a/src/big5.c b/src/big5.c index 79ae1e3..faff845 100644 --- a/src/big5.c +++ b/src/big5.c @@ -2,7 +2,7 @@ big5.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -58,8 +58,15 @@ static int big5_code_to_mbclen(OnigCodePoint code) { if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; - if ((code & 0xff00) != 0) return 2; - if (EncLen_BIG5[(int )(code & 0xff)] == 1) return 1; + + if ((code & 0xff00) != 0) { + if (EncLen_BIG5[(int )(code >> 8) & 0xff] == 2) + return 2; + } + else { + if (EncLen_BIG5[(int )(code & 0xff)] == 1) + return 1; + } return ONIGERR_INVALID_CODE_POINT_VALUE; } diff --git a/src/config.h.cmake.in b/src/config.h.cmake.in index 60db86c..c213a09 100644 --- a/src/config.h.cmake.in +++ b/src/config.h.cmake.in @@ -43,6 +43,12 @@ /* The size of `long', as computed by sizeof. */ #cmakedefine SIZEOF_LONG ${SIZEOF_LONG} +/* The size of `long long', as computed by sizeof. */ +#cmakedefine SIZEOF_LONG_LONG ${SIZEOF_LONG_LONG} + +/* The size of `void*', as computed by sizeof. */ +#cmakedefine SIZEOF_VOIDP ${SIZEOF_VOIDP} + /* Define if enable CR+NL as line terminator */ #cmakedefine USE_CRNL_AS_LINE_TERMINATOR ${USE_CRNL_AS_LINE_TERMINATOR} diff --git a/src/euc_jp.c b/src/euc_jp.c index 640b3e3..bfe91bf 100644 --- a/src/euc_jp.c +++ b/src/euc_jp.c @@ -2,7 +2,7 @@ euc_jp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -114,10 +114,20 @@ static int code_to_mbclen(OnigCodePoint code) { if (ONIGENC_IS_CODE_ASCII(code)) return 1; - else if ((code & 0xff0000) != 0) return 3; - else if ((code & 0xff00) != 0) return 2; - else - return ONIGERR_INVALID_CODE_POINT_VALUE; + else if ((code & 0xff0000) != 0) { + if (EncLen_EUCJP[(int )(code >> 16) & 0xff] == 3) + return 3; + } + else if ((code & 0xff00) != 0) { + if (EncLen_EUCJP[(int )(code >> 8) & 0xff] == 2) + return 2; + } + else if (code < 256) { + if (EncLen_EUCJP[(int )(code & 0xff)] == 1) + return 1; + } + + return ONIGERR_INVALID_CODE_POINT_VALUE; } static int @@ -125,8 +135,13 @@ code_to_mbc(OnigCodePoint code, UChar *buf) { UChar *p = buf; - if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff)); - if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); + if ((code & 0xff0000) != 0) { + *p++ = (UChar )(((code >> 16) & 0xff)); + *p++ = (UChar )(((code >> 8) & 0xff)); + } + else if ((code & 0xff00) != 0) + *p++ = (UChar )(((code >> 8) & 0xff)); + *p++ = (UChar )(code & 0xff); #if 1 diff --git a/src/euc_kr.c b/src/euc_kr.c index 7fa50af..b0e9fbf 100644 --- a/src/euc_kr.c +++ b/src/euc_kr.c @@ -2,7 +2,7 @@ euc_kr.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -58,8 +58,15 @@ static int euckr_code_to_mbclen(OnigCodePoint code) { if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; - if ((code & 0xff00) != 0) return 2; - if (EncLen_EUCKR[(int )(code & 0xff)] == 1) return 1; + + if ((code & 0xff00) != 0) { + if (EncLen_EUCKR[(int )(code >> 8) & 0xff] == 2) + return 2; + } + else { + if (EncLen_EUCKR[(int )(code & 0xff)] == 1) + return 1; + } return ONIGERR_INVALID_CODE_POINT_VALUE; } diff --git a/src/euc_tw.c b/src/euc_tw.c index 8e72b97..99dc5ec 100644 --- a/src/euc_tw.c +++ b/src/euc_tw.c @@ -2,7 +2,7 @@ euc_tw.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -57,15 +57,22 @@ euctw_mbc_enc_len(const UChar* p) static int euctw_code_to_mbclen(OnigCodePoint code) { - if ((code & 0xff000000) != 0) return 4; - else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; - else if ((code & 0xff00) != 0) return 2; + if ((code & 0xff000000) != 0) { + if (EncLen_EUCTW[(int )(code >> 24) & 0xff] == 4) + return 4; + } + else if ((code & 0xff0000) != 0) + return ONIGERR_INVALID_CODE_POINT_VALUE; + else if ((code & 0xff00) != 0) { + if (EncLen_EUCTW[(int )(code >> 8) & 0xff] == 2) + return 2; + } else { if (EncLen_EUCTW[(int )(code & 0xff)] == 1) return 1; - - return ONIGERR_INVALID_CODE_POINT_VALUE; } + + return ONIGERR_INVALID_CODE_POINT_VALUE; } static int diff --git a/src/gb18030.c b/src/gb18030.c index 1385a7f..7409d3e 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -89,15 +89,25 @@ gb18030_mbc_enc_len(const UChar* p) static int gb18030_code_to_mbclen(OnigCodePoint code) { - if ((code & 0xff000000) != 0) return 4; - else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; - else if ((code & 0xff00) != 0) return 2; + if ((code & 0xff000000) != 0) { + if (GB18030_MAP[(int )(code >> 24) & 0xff] == CM) + if (GB18030_MAP[(int )(code >> 16) & 0xff] == C4) + return 4; + } + else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; + else if ((code & 0xff00) != 0) { + if (GB18030_MAP[(int )(code >> 8) & 0xff] == CM) { + char c = GB18030_MAP[(int )code & 0xff]; + if (c == CM || c == C2) + return 2; + } + } else { - if (GB18030_MAP[(int )(code & 0xff)] == CM) - return ONIGERR_INVALID_CODE_POINT_VALUE; - - return 1; + if (GB18030_MAP[(int )(code & 0xff)] != CM) + return 1; } + + return ONIGERR_INVALID_CODE_POINT_VALUE; } static int diff --git a/src/make_property.sh b/src/make_property.sh index e5f1244..1c5e0f5 100755 --- a/src/make_property.sh +++ b/src/make_property.sh @@ -1,7 +1,7 @@ #!/bin/sh GPERF=gperf -SED=gsed +SED=sed TMP1=gperf1.tmp TMP2=gperf2.tmp diff --git a/src/make_unicode_property.sh b/src/make_unicode_property.sh index 5129376..ff7dc62 100755 --- a/src/make_unicode_property.sh +++ b/src/make_unicode_property.sh @@ -1,7 +1,7 @@ #!/bin/sh GPERF=gperf -SED=gsed +SED=sed NAME=unicode_property_data TMP1=gperf1.tmp diff --git a/src/make_unicode_property_data.py b/src/make_unicode_property_data.py index 285c462..d1b3377 100755 --- a/src/make_unicode_property_data.py +++ b/src/make_unicode_property_data.py @@ -1,7 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # make_unicode_property_data.py -# Copyright (c) 2016-2019 K.Kosako +# Copyright (c) 2016-2020 K.Kosako import sys import re @@ -405,7 +405,7 @@ def set_max_prop_name(name): def entry_prop_name(name, index): set_max_prop_name(name) if OUTPUT_LIST_MODE and index >= len(POSIX_LIST): - print >> UPF, "%3d: %s" % (index, name) + print >> UPF, "%s" % (name) def entry_and_print_prop_and_index(name, index): entry_prop_name(name, index) diff --git a/src/onigposix.h b/src/onigposix.h index 37e09ea..3514f80 100644 --- a/src/onigposix.h +++ b/src/onigposix.h @@ -74,19 +74,19 @@ extern "C" { #define REG_POSIX_ENCODING_UTF16_LE 5 -typedef int regoff_t; +typedef int onig_posix_regoff_t; typedef struct { - regoff_t rm_so; - regoff_t rm_eo; -} regmatch_t; + onig_posix_regoff_t rm_so; + onig_posix_regoff_t rm_eo; +} onig_posix_regmatch_t; /* POSIX regex_t */ typedef struct { void* onig; /* Oniguruma regex_t* */ size_t re_nsub; int comp_options; -} regex_t; +} onig_posix_regex_t; #ifndef P_ @@ -160,16 +160,31 @@ ONIG_EXTERN int onig_end P_((void)); #endif /* ONIGURUMA_H */ -ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options)); -ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options)); -ONIG_EXTERN void regfree P_((regex_t* reg)); -ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size)); +ONIG_EXTERN int onig_posix_regcomp P_((onig_posix_regex_t* reg, const char* pat, int options)); +ONIG_EXTERN int onig_posix_regexec P_((onig_posix_regex_t* reg, const char* str, size_t nmatch, onig_posix_regmatch_t* matches, int options)); +ONIG_EXTERN void onig_posix_regfree P_((onig_posix_regex_t* reg)); +ONIG_EXTERN size_t onig_posix_regerror P_((int code, const onig_posix_regex_t* reg, char* buf, size_t size)); /* extended API */ -ONIG_EXTERN void reg_set_encoding P_((int enc)); -ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums)); -ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), void* arg)); -ONIG_EXTERN int reg_number_of_names P_((regex_t* reg)); +ONIG_EXTERN void onig_posix_reg_set_encoding P_((int enc)); +ONIG_EXTERN int onig_posix_reg_name_to_group_numbers P_((onig_posix_regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums)); +ONIG_EXTERN int onig_posix_reg_foreach_name P_((onig_posix_regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,onig_posix_regex_t*,void*), void* arg)); +ONIG_EXTERN int onig_posix_reg_number_of_names P_((onig_posix_regex_t* reg)); + + +/* aliases */ +#define regex_t onig_posix_regex_t +#define regmatch_t onig_posix_regmatch_t +#define regoff_t onig_posix_regoff_t + +#define regcomp onig_posix_regcomp +#define regexec onig_posix_regexec +#define regfree onig_posix_regfree +#define regerror onig_posix_regerror +#define reg_set_encoding onig_posix_reg_set_encoding +#define reg_name_to_group_numbers onig_posix_reg_name_to_group_numbers +#define reg_foreach_name onig_posix_reg_foreach_name +#define reg_number_of_names onig_posix_reg_number_of_names #ifdef __cplusplus } diff --git a/src/oniguruma.h b/src/oniguruma.h index 15f6ef0..d983fc9 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -36,9 +36,9 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 6 #define ONIGURUMA_VERSION_MINOR 9 -#define ONIGURUMA_VERSION_TEENY 5 +#define ONIGURUMA_VERSION_TEENY 6 -#define ONIGURUMA_VERSION_INT 60905 +#define ONIGURUMA_VERSION_INT 60906 #ifndef P_ #if defined(__STDC__) || defined(_WIN32) @@ -395,8 +395,12 @@ typedef unsigned int OnigOptionType; #define ONIG_OPTION_POSIX_IS_ASCII (ONIG_OPTION_SPACE_IS_ASCII << 1) #define ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER (ONIG_OPTION_POSIX_IS_ASCII << 1) #define ONIG_OPTION_TEXT_SEGMENT_WORD (ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER << 1) +/* options (search time) */ +#define ONIG_OPTION_NOT_BEGIN_STRING (ONIG_OPTION_TEXT_SEGMENT_WORD << 1) +#define ONIG_OPTION_NOT_END_STRING (ONIG_OPTION_NOT_BEGIN_STRING << 1) +#define ONIG_OPTION_NOT_BEGIN_POSITION (ONIG_OPTION_NOT_END_STRING << 1) -#define ONIG_OPTION_MAXBIT ONIG_OPTION_TEXT_SEGMENT_WORD /* limit */ +#define ONIG_OPTION_MAXBIT ONIG_OPTION_NOT_BEGIN_POSITION #define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) #define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) @@ -561,6 +565,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16 #define ONIGERR_RETRY_LIMIT_IN_MATCH_OVER -17 #define ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER -18 +#define ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER -19 #define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 #define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 #define ONIGERR_FAIL_TO_INITIALIZE -23 @@ -919,6 +924,10 @@ int onig_set_capture_num_limit P_((int num)); ONIG_EXTERN int onig_set_parse_depth_limit P_((unsigned int depth)); ONIG_EXTERN +unsigned long onig_get_subexp_call_limit_in_search P_((void)); +ONIG_EXTERN +int onig_set_subexp_call_limit_in_search P_((unsigned long n)); +ONIG_EXTERN int onig_get_subexp_call_max_nest_level P_((void)); ONIG_EXTERN int onig_set_subexp_call_max_nest_level P_((int level)); diff --git a/src/regcomp.c b/src/regcomp.c index 4d5b78f..dd2b328 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -133,6 +133,7 @@ ops_init(regex_t* reg, int init_alloc_size) size = sizeof(Operation) * init_alloc_size; p = (Operation* )xrealloc(reg->ops, size); CHECK_NULL_RETURN_MEMERR(p); + reg->ops = p; #ifdef USE_DIRECT_THREADED_CODE { enum OpCode* cp; @@ -144,13 +145,12 @@ ops_init(regex_t* reg, int init_alloc_size) #endif } else { - p = (Operation* )0; + reg->ops = (Operation* )0; #ifdef USE_DIRECT_THREADED_CODE reg->ocs = (enum OpCode* )0; #endif } - reg->ops = p; reg->ops_curr = 0; /* !!! not yet done ops_new() */ reg->ops_alloc = init_alloc_size; reg->ops_used = 0; @@ -176,6 +176,7 @@ ops_expand(regex_t* reg, int n) size = sizeof(Operation) * n; p = (Operation* )xrealloc(reg->ops, size); CHECK_NULL_RETURN_MEMERR(p); + reg->ops = p; #ifdef USE_DIRECT_THREADED_CODE size = sizeof(enum OpCode) * n; @@ -184,7 +185,6 @@ ops_expand(regex_t* reg, int n) reg->ocs = cp; #endif - reg->ops = p; reg->ops_alloc = n; if (reg->ops_used == 0) reg->ops_curr = 0; @@ -265,10 +265,12 @@ ops_free(regex_t* reg) case OP_BACKREF1: case OP_BACKREF2: case OP_BACKREF_N: case OP_BACKREF_N_IC: break; case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC: + case OP_BACKREF_CHECK: +#ifdef USE_BACKREF_WITH_LEVEL case OP_BACKREF_WITH_LEVEL: case OP_BACKREF_WITH_LEVEL_IC: - case OP_BACKREF_CHECK: case OP_BACKREF_CHECK_WITH_LEVEL: +#endif if (op->backref_general.num != 1) xfree(op->backref_general.ns); break; @@ -631,7 +633,7 @@ mmcl_add(MinMaxCharLen* to, MinMaxCharLen* add) to->min = distance_add(to->min, add->min); to->max = distance_add(to->max, add->max); - to->min_is_sure = add->min_is_sure != 0 && to->min_is_sure != 0; + to->min_is_sure = add->min_is_sure != FALSE && to->min_is_sure != FALSE; } static void @@ -656,8 +658,11 @@ static void mmcl_alt_merge(MinMaxCharLen* to, MinMaxCharLen* alt) { if (to->min > alt->min) { - to->min = alt->min; - if (alt->min_is_sure != 0) + to->min = alt->min; + to->min_is_sure = alt->min_is_sure; + } + else if (to->min == alt->min) { + if (alt->min_is_sure != FALSE) to->min_is_sure = TRUE; } @@ -840,7 +845,7 @@ node_char_len1(Node* node, regex_t* reg, MinMaxCharLen* ci, ScanEnv* env, en->min_char_len = ci->min; en->max_char_len = ci->max; NODE_STATUS_ADD(node, FIXED_CLEN); - if (ci->min_is_sure != 0) + if (ci->min_is_sure != FALSE) NODE_STATUS_ADD(node, FIXED_CLEN_MIN_SURE); } } @@ -882,15 +887,15 @@ node_char_len1(Node* node, regex_t* reg, MinMaxCharLen* ci, ScanEnv* env, } break; - case NODE_ANCHOR: + case NODE_GIMMICK: mmcl_set(ci, 0); - /* can't optimize look-behind if anchor exists. */ - ci->min_is_sure = FALSE; break; - case NODE_GIMMICK: + case NODE_ANCHOR: zero: mmcl_set(ci, 0); + /* can't optimize look-behind if anchor exists. */ + ci->min_is_sure = FALSE; break; case NODE_BACKREF: @@ -1082,6 +1087,9 @@ compile_call(CallNode* node, regex_t* reg, ScanEnv* env) if (r != 0) return r; COP(reg)->call.addr = 0; /* dummy addr. */ +#ifdef ONIG_DEBUG_MATCH_COUNTER + COP(reg)->call.called_mem = node->called_gnum; +#endif offset = COP_CURR_OFFSET_BYTES(reg, call.addr); r = unset_addr_list_add(env->unset_addr_list, offset, NODE_CALL_BODY(node)); @@ -1822,7 +1830,6 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env) COP(reg)->memory_end.num = node->m.regnum; if (NODE_IS_CALLED(node)) { - if (r != 0) return r; r = add_op(reg, OP_RETURN); } #else @@ -2764,7 +2771,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) static int make_named_capture_number_map(Node** plink, GroupNumMap* map, int* counter) { - int r = 0; + int r; Node* node = *plink; switch (NODE_TYPE(node)) { @@ -2772,17 +2779,17 @@ make_named_capture_number_map(Node** plink, GroupNumMap* map, int* counter) case NODE_ALT: do { r = make_named_capture_number_map(&(NODE_CAR(node)), map, counter); - } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); + } while (r >= 0 && IS_NOT_NULL(node = NODE_CDR(node))); + if (r < 0) return r; break; case NODE_QUANT: { Node** ptarget = &(NODE_BODY(node)); - Node* old = *ptarget; r = make_named_capture_number_map(ptarget, map, counter); - if (r != 0) return r; - if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) { - r = onig_reduce_nested_quantifier(node); + if (r < 0) return r; + if (r == 1 && NODE_TYPE(*ptarget) == NODE_QUANT) { + return onig_reduce_nested_quantifier(node); } } break; @@ -2796,41 +2803,48 @@ make_named_capture_number_map(Node** plink, GroupNumMap* map, int* counter) map[en->m.regnum].new_val = *counter; en->m.regnum = *counter; r = make_named_capture_number_map(&(NODE_BODY(node)), map, counter); + if (r < 0) return r; } else { *plink = NODE_BODY(node); NODE_BODY(node) = NULL_NODE; onig_node_free(node); r = make_named_capture_number_map(plink, map, counter); + if (r < 0) return r; + return 1; } } else if (en->type == BAG_IF_ELSE) { r = make_named_capture_number_map(&(NODE_BAG_BODY(en)), map, counter); - if (r != 0) return r; + if (r < 0) return r; if (IS_NOT_NULL(en->te.Then)) { r = make_named_capture_number_map(&(en->te.Then), map, counter); - if (r != 0) return r; + if (r < 0) return r; } if (IS_NOT_NULL(en->te.Else)) { r = make_named_capture_number_map(&(en->te.Else), map, counter); - if (r != 0) return r; + if (r < 0) return r; } } - else + else { r = make_named_capture_number_map(&(NODE_BODY(node)), map, counter); + if (r < 0) return r; + } } break; case NODE_ANCHOR: - if (IS_NOT_NULL(NODE_BODY(node))) + if (IS_NOT_NULL(NODE_BODY(node))) { r = make_named_capture_number_map(&(NODE_BODY(node)), map, counter); + if (r < 0) return r; + } break; default: break; } - return r; + return 0; } static int @@ -2982,7 +2996,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) } counter = 0; r = make_named_capture_number_map(root, map, &counter); - if (r != 0) return r; + if (r < 0) return r; r = renumber_backref_traverse(*root, map); if (r != 0) return r; @@ -3546,7 +3560,9 @@ check_node_in_look_behind(Node* node, int not, int* used) if (r != 0) break; if (en->type == BAG_MEMORY) { - if (NODE_IS_BACKREF(node) || NODE_IS_CALLED(node)) *used = TRUE; + if (NODE_IS_BACKREF(node) || NODE_IS_CALLED(node) + || NODE_IS_REFERENCED(node)) + *used = TRUE; } else if (en->type == BAG_IF_ELSE) { if (IS_NOT_NULL(en->te.Then)) { @@ -3978,6 +3994,7 @@ set_empty_repeat_node_trav(Node* node, Node* empty, ScanEnv* env) { BagNode* en = BAG_(node); + r = 0; if (en->type == BAG_MEMORY) { if (NODE_IS_BACKREF(node)) { if (IS_NOT_NULL(empty)) @@ -4484,7 +4501,7 @@ remove_from_list(Node* prev, Node* a) } static int -reduce_string_list(Node* node) +reduce_string_list(Node* node, OnigEncoding enc) { int r = 0; @@ -4515,43 +4532,70 @@ reduce_string_list(Node* node) } } else { - prev = NULL_NODE; + if (IS_NOT_NULL(prev)) { +#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE + StrNode* sn = STR_(prev); + if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end)) + return ONIGERR_INVALID_WIDE_CHAR_VALUE; +#endif + prev = NULL_NODE; + } + r = reduce_string_list(curr, enc); + if (r != 0) return r; prev_node = node; } node = next_node; } while (r == 0 && IS_NOT_NULL(node)); + +#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE + if (IS_NOT_NULL(prev)) { + StrNode* sn = STR_(prev); + if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end)) + return ONIGERR_INVALID_WIDE_CHAR_VALUE; + } +#endif } break; case NODE_ALT: do { - r = reduce_string_list(NODE_CAR(node)); + r = reduce_string_list(NODE_CAR(node), enc); } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); break; +#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE + case NODE_STRING: + { + StrNode* sn = STR_(node); + if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end)) + return ONIGERR_INVALID_WIDE_CHAR_VALUE; + } + break; +#endif + case NODE_ANCHOR: if (IS_NULL(NODE_BODY(node))) break; /* fall */ case NODE_QUANT: - r = reduce_string_list(NODE_BODY(node)); + r = reduce_string_list(NODE_BODY(node), enc); break; case NODE_BAG: { BagNode* en = BAG_(node); - r = reduce_string_list(NODE_BODY(node)); + r = reduce_string_list(NODE_BODY(node), enc); if (r != 0) return r; if (en->type == BAG_IF_ELSE) { if (IS_NOT_NULL(en->te.Then)) { - r = reduce_string_list(en->te.Then); + r = reduce_string_list(en->te.Then, enc); if (r != 0) return r; } if (IS_NOT_NULL(en->te.Else)) { - r = reduce_string_list(en->te.Else); + r = reduce_string_list(en->te.Else, enc); if (r != 0) return r; } } @@ -4723,7 +4767,7 @@ tune_look_behind(Node* node, regex_t* reg, int state, ScanEnv* env) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; } - if (ci.min == 0 && ci.min_is_sure != 0 && used == FALSE) { + if (ci.min == 0 && ci.min_is_sure != FALSE && used == FALSE) { if (an->type == ANCR_LOOK_BEHIND_NOT) r = onig_node_reset_fail(node); else @@ -4779,18 +4823,23 @@ tune_look_behind(Node* node, regex_t* reg, int state, ScanEnv* env) static int tune_next(Node* node, Node* next_node, regex_t* reg) { + int called; NodeType type; + called = FALSE; + retry: type = NODE_TYPE(node); if (type == NODE_QUANT) { QuantNode* qn = QUANT_(node); if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) { #ifdef USE_QUANT_PEEK_NEXT - Node* n = get_tree_head_literal(next_node, 1, reg); - /* '\0': for UTF-16BE etc... */ - if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') { - qn->next_head_exact = n; + if (called == FALSE) { + Node* n = get_tree_head_literal(next_node, 1, reg); + /* '\0': for UTF-16BE etc... */ + if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') { + qn->next_head_exact = n; + } } #endif /* automatic posseivation a*b ==> (?>a*)b */ @@ -4815,6 +4864,8 @@ tune_next(Node* node, Node* next_node, regex_t* reg) else if (type == NODE_BAG) { BagNode* en = BAG_(node); if (en->type == BAG_MEMORY) { + if (NODE_IS_CALLED(node)) + called = TRUE; node = NODE_BODY(node); goto retry; } @@ -4999,17 +5050,18 @@ unravel_cf_look_behind_add(Node** rlist, Node** rsn, { int r, i, found; - found = 0; + found = FALSE; for (i = 0; i < n; i++) { OnigCaseFoldCodeItem* item = items + i; if (item->byte_len == one_len) { if (item->code_len == 1) { - found = 1; + found = TRUE; + break; } } } - if (found == 0) { + if (found == FALSE) { r = unravel_cf_string_add(rlist, rsn, s, s + one_len, 0 /* flag */); } else { @@ -5073,6 +5125,7 @@ unravel_case_fold_string(Node* node, regex_t* reg, int state) one_len = (OnigLen )enclen(enc, p); if (n == 0) { q = p + one_len; + if (q > end) q = end; r = unravel_cf_string_add(&list, &sn, p, q, 0 /* flag */); if (r != 0) goto err; } @@ -5221,12 +5274,12 @@ quantifiers_memory_node_info(Node* node) __inline #endif static int -tune_call_node_call(CallNode* cn, ScanEnv* env, int state) +check_call_reference(CallNode* cn, ScanEnv* env, int state) { MemEnv* mem_env = SCANENV_MEMENV(env); if (cn->by_number != 0) { - int gnum = cn->group_num; + int gnum = cn->called_gnum; if (env->num_named > 0 && IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && @@ -5241,12 +5294,14 @@ tune_call_node_call(CallNode* cn, ScanEnv* env, int state) } set_call_attr: - NODE_CALL_BODY(cn) = mem_env[cn->group_num].mem_node; + NODE_CALL_BODY(cn) = mem_env[cn->called_gnum].mem_node; if (IS_NULL(NODE_CALL_BODY(cn))) { onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); return ONIGERR_UNDEFINED_NAME_REFERENCE; } + + NODE_STATUS_ADD(NODE_CALL_BODY(cn), REFERENCED); } else { int *refs; @@ -5263,7 +5318,7 @@ tune_call_node_call(CallNode* cn, ScanEnv* env, int state) return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; } else { - cn->group_num = refs[0]; + cn->called_gnum = refs[0]; goto set_call_attr; } } @@ -5396,7 +5451,7 @@ tune_call(Node* node, ScanEnv* env, int state) CALL_(node)->entry_count--; } - r = tune_call_node_call(CALL_(node), env, state); + r = check_call_reference(CALL_(node), env, state); break; default: @@ -6187,8 +6242,10 @@ concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc) r = 1; /* 1:full */ break; } - for (j = 0; j < len && p < end; j++) + for (j = 0; j < len && p < end; j++) { + /* coverity[overrun-local] */ to->s[i++] = *p++; + } } to->len = i; @@ -6210,8 +6267,10 @@ concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc) for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { len = enclen(enc, p); if (i + len > OPT_EXACT_MAXLEN) break; - for (j = 0; j < len && p < end; j++) + for (j = 0; j < len && p < end; j++) { + /* coverity[overrun-local] */ to->s[i++] = *p++; + } } to->len = i; @@ -7229,19 +7288,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, else reg->ops_used = 0; - reg->string_pool = 0; - reg->string_pool_end = 0; - reg->num_mem = 0; - reg->num_repeat = 0; - reg->num_empty_check = 0; - reg->repeat_range_alloc = 0; - reg->repeat_range = (RepeatRange* )NULL; - reg->empty_status_mem = 0; - r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env); if (r != 0) goto err; - r = reduce_string_list(root); + r = reduce_string_list(root, reg->enc); if (r != 0) goto err; /* mixed use named group and no-named group */ @@ -7653,6 +7703,134 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) return onig_is_code_in_cc_len(len, code, cc); } +typedef struct { + int prec_read; + int look_behind; + int backref_with_level; + int call; +} SlowElementCount; + +static int +node_detect_can_be_slow(Node* node, SlowElementCount* ct) +{ + int r; + + r = 0; + switch (NODE_TYPE(node)) { + case NODE_LIST: + case NODE_ALT: + do { + r = node_detect_can_be_slow(NODE_CAR(node), ct); + if (r != 0) return r; + } while (IS_NOT_NULL(node = NODE_CDR(node))); + break; + + case NODE_QUANT: + r = node_detect_can_be_slow(NODE_BODY(node), ct); + break; + + case NODE_ANCHOR: + switch (ANCHOR_(node)->type) { + case ANCR_PREC_READ: + case ANCR_PREC_READ_NOT: + ct->prec_read++; + break; + case ANCR_LOOK_BEHIND: + case ANCR_LOOK_BEHIND_NOT: + ct->look_behind++; + break; + default: + break; + } + + if (ANCHOR_HAS_BODY(ANCHOR_(node))) + r = node_detect_can_be_slow(NODE_BODY(node), ct); + break; + + case NODE_BAG: + { + BagNode* en = BAG_(node); + + r = node_detect_can_be_slow(NODE_BODY(node), ct); + if (r != 0) return r; + + if (en->type == BAG_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + r = node_detect_can_be_slow(en->te.Then, ct); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) { + r = node_detect_can_be_slow(en->te.Else, ct); + if (r != 0) return r; + } + } + } + break; + +#ifdef USE_BACKREF_WITH_LEVEL + case NODE_BACKREF: + if (NODE_IS_NEST_LEVEL(node)) + ct->backref_with_level++; + break; +#endif + +#ifdef USE_CALL + case NODE_CALL: + ct->call++; + break; +#endif + + default: + break; + } + + return r; +} + +extern int +onig_detect_can_be_slow_pattern(const UChar* pattern, + const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, + OnigSyntaxType* syntax) +{ + int r; + regex_t* reg; + Node* root; + ScanEnv scan_env; + SlowElementCount count; + + reg = (regex_t* )xmalloc(sizeof(regex_t)); + if (IS_NULL(reg)) return ONIGERR_MEMORY; + + r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); + if (r != 0) { + xfree(reg); + return r; + } + + root = 0; + r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env); + if (r == 0) { + count.prec_read = 0; + count.look_behind = 0; + count.backref_with_level = 0; + count.call = 0; + + r = node_detect_can_be_slow(root, &count); + if (r == 0) { + int n = count.prec_read + count.look_behind + + count.backref_with_level + count.call; + r = n; + } + } + + if (IS_NOT_NULL(scan_env.mem_env_dynamic)) + xfree(scan_env.mem_env_dynamic); + + onig_node_free(root); + onig_free(reg); + return r; +} + #ifdef ONIG_DEBUG_PARSE @@ -7734,14 +7912,18 @@ print_indent_tree(FILE* f, Node* node, int indent) break; case NODE_CCLASS: +#define CCLASS_MBUF_MAX_OUTPUT_NUM 10 + fprintf(f, "", node); if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f); if (CCLASS_(node)->mbuf) { BBuf* bbuf = CCLASS_(node)->mbuf; - for (i = 0; i < bbuf->used; i++) { + fprintf(f, " mbuf(%u) ", bbuf->used); + for (i = 0; i < bbuf->used && i < CCLASS_MBUF_MAX_OUTPUT_NUM; i++) { if (i > 0) fprintf(f, ","); fprintf(f, "%0x", bbuf->p[i]); } + if (i < bbuf->used) fprintf(f, "..."); } break; @@ -7822,6 +8004,11 @@ print_indent_tree(FILE* f, Node* node, int indent) if (i > 0) fputs(", ", f); fprintf(f, "%d", p[i]); } +#ifdef USE_BACKREF_WITH_LEVEL + if (NODE_IS_NEST_LEVEL(node)) { + fprintf(f, ", level: %d", br->nest_level); + } +#endif } break; @@ -7830,6 +8017,7 @@ print_indent_tree(FILE* f, Node* node, int indent) { CallNode* cn = CALL_(node); fprintf(f, "", node); + fprintf(f, " num: %d, name", cn->called_gnum); p_string(f, cn->name_end - cn->name, cn->name); } break; @@ -7881,6 +8069,8 @@ print_indent_tree(FILE* f, Node* node, int indent) fprintf(f, "memory:%d", BAG_(node)->m.regnum); if (NODE_IS_CALLED(node)) fprintf(f, ", called"); + else if (NODE_IS_REFERENCED(node)) + fprintf(f, ", referenced"); if (NODE_IS_FIXED_ADDR(node)) fprintf(f, ", fixed-addr"); break; diff --git a/src/regenc.c b/src/regenc.c index dbfbc89..27e4549 100644 --- a/src/regenc.c +++ b/src/regenc.c @@ -263,12 +263,12 @@ onigenc_strlen_null(OnigEncoding enc, const UChar* s) extern int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) { - UChar* start = (UChar* )s; - UChar* p = (UChar* )s; + const UChar* start = s; + const UChar* p = s; while (1) { if (*p == '\0') { - UChar* q; + const UChar* q; int len = ONIGENC_MBC_MINLEN(enc); if (len == 1) return (int )(p - start); diff --git a/src/regerror.c b/src/regerror.c index 58bc7fd..dc1c8b6 100644 --- a/src/regerror.c +++ b/src/regerror.c @@ -56,6 +56,8 @@ onig_error_code_to_format(int code) p = "retry-limit-in-match over"; break; case ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER: p = "retry-limit-in-search over"; break; + case ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER: + p = "subexp-call-limit-in-search over"; break; case ONIGERR_TYPE_BUG: p = "undefined type (bug)"; break; case ONIGERR_PARSER_BUG: diff --git a/src/regexec.c b/src/regexec.c index 1b6895d..bb6b474 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -46,15 +46,15 @@ #define CHECK_INTERRUPT_IN_MATCH -#define STACK_MEM_START(reg, i) \ - (MEM_STATUS_AT((reg)->push_mem_start, (i)) != 0 ? \ - STACK_AT(mem_start_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_start_stk[i]))) +#define STACK_MEM_START(reg, idx) \ + (MEM_STATUS_AT((reg)->push_mem_start, (idx)) != 0 ? \ + STACK_AT(mem_start_stk[idx].i)->u.mem.pstr : mem_start_stk[idx].s) -#define STACK_MEM_END(reg, i) \ - (MEM_STATUS_AT((reg)->push_mem_end, (i)) != 0 ? \ - STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_end_stk[i]))) +#define STACK_MEM_END(reg, idx) \ + (MEM_STATUS_AT((reg)->push_mem_end, (idx)) != 0 ? \ + STACK_AT(mem_end_stk[idx].i)->u.mem.pstr : mem_end_stk[idx].s) -static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high, UChar** low_prev); +static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high); static int search_in_range(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, /* match range */ const UChar* data_range, /* subject string range */ OnigRegion* region, OnigOptionType option, OnigMatchParam* mp); @@ -170,6 +170,9 @@ typedef struct { int best_len; /* for ONIG_OPTION_FIND_LONGEST */ UChar* best_s; #endif +#ifdef USE_CALL + unsigned long subexp_call_in_search_counter; +#endif } MatchArg; @@ -1057,8 +1060,6 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) /** stack **/ -#define INVALID_STACK_INDEX -1 - #define STK_ALT_FLAG 0x0001 /* stack type */ @@ -1099,7 +1100,15 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #define STK_MASK_TO_VOID_TARGET 0x100e #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ -typedef intptr_t StackIndex; +typedef ptrdiff_t StackIndex; + +#define INVALID_STACK_INDEX ((StackIndex )-1) + +typedef union { + StackIndex i; + UChar* s; +} StkPtrType; + typedef struct _StackType { unsigned int type; @@ -1108,7 +1117,6 @@ typedef struct _StackType { struct { Operation* pcode; /* byte code position */ UChar* pstr; /* string position */ - UChar* pstr_prev; /* previous char position of pstr */ } state; struct { int count; @@ -1119,8 +1127,8 @@ typedef struct _StackType { struct { UChar *pstr; /* start/end position */ /* Following information is set, if this stack type is MEM-START */ - StackIndex prev_start; /* prev. info (for backtrack "(...)*" ) */ - StackIndex prev_end; /* prev. info (for backtrack "(...)*" ) */ + StkPtrType prev_start; /* prev. info (for backtrack "(...)*" ) */ + StkPtrType prev_end; /* prev. info (for backtrack "(...)*" ) */ } mem; struct { UChar *pstr; /* start position */ @@ -1166,8 +1174,8 @@ struct OnigCalloutArgsStruct { MatchArg* msa; StackType* stk_base; StackType* stk; - StackIndex* mem_start_stk; - StackIndex* mem_end_stk; + StkPtrType* mem_start_stk; + StkPtrType* mem_end_stk; }; #endif @@ -1178,7 +1186,7 @@ struct OnigCalloutArgsStruct { #define UPDATE_FOR_STACK_REALLOC do{\ repeat_stk = (StackIndex* )alloc_base;\ empty_check_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\ - mem_start_stk = (StackIndex* )(empty_check_stk + reg->num_empty_check);\ + mem_start_stk = (StkPtrType* )(empty_check_stk + reg->num_empty_check);\ mem_end_stk = mem_start_stk + num_mem + 1;\ } while(0) @@ -1194,7 +1202,7 @@ struct OnigCalloutArgsStruct { #define PTR_NUM_SIZE(reg) (((reg)->num_mem + 1) * 2) #define UPDATE_FOR_STACK_REALLOC do{\ - mem_start_stk = (StackIndex* )alloc_base;\ + mem_start_stk = (StkPtrType* )alloc_base;\ mem_end_stk = mem_start_stk + num_mem + 1;\ } while(0) @@ -1218,8 +1226,12 @@ struct OnigCalloutArgsStruct { #endif #if defined(USE_CALL) +#define SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv) \ + (msa).subexp_call_in_search_counter = 0; + #define POP_CALL else if (stk->type == STK_RETURN) {subexp_call_nest_counter++;} else if (stk->type == STK_CALL_FRAME) {subexp_call_nest_counter--;} #else +#define SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv) #define POP_CALL #endif @@ -1231,6 +1243,7 @@ struct OnigCalloutArgsStruct { (msa).start = (arg_start);\ (msa).match_stack_limit = (mpv)->match_stack_limit;\ RETRY_IN_MATCH_ARG_INIT(msa,mpv)\ + SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\ (msa).mp = mpv;\ (msa).best_len = ONIG_MISMATCH;\ (msa).ptr_num = PTR_NUM_SIZE(reg);\ @@ -1243,6 +1256,7 @@ struct OnigCalloutArgsStruct { (msa).start = (arg_start);\ (msa).match_stack_limit = (mpv)->match_stack_limit;\ RETRY_IN_MATCH_ARG_INIT(msa,mpv)\ + SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\ (msa).mp = mpv;\ (msa).ptr_num = PTR_NUM_SIZE(reg);\ } while(0) @@ -1258,27 +1272,27 @@ struct OnigCalloutArgsStruct { is_alloca = 0;\ alloc_base = msa->stack_p;\ stk_base = (StackType* )(alloc_base\ - + (sizeof(StackIndex) * msa->ptr_num));\ + + (sizeof(StkPtrType) * msa->ptr_num));\ stk = stk_base;\ stk_end = stk_base + msa->stack_n;\ }\ else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\ is_alloca = 0;\ - alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\ + alloc_base = (char* )xmalloc(sizeof(StkPtrType) * msa->ptr_num\ + sizeof(StackType) * (stack_num));\ CHECK_NULL_RETURN_MEMERR(alloc_base);\ stk_base = (StackType* )(alloc_base\ - + (sizeof(StackIndex) * msa->ptr_num));\ + + (sizeof(StkPtrType) * msa->ptr_num));\ stk = stk_base;\ stk_end = stk_base + (stack_num);\ }\ else {\ is_alloca = 1;\ - alloc_base = (char* )xalloca(sizeof(StackIndex) * msa->ptr_num\ + alloc_base = (char* )xalloca(sizeof(StkPtrType) * msa->ptr_num\ + sizeof(StackType) * (stack_num));\ CHECK_NULL_RETURN_MEMERR(alloc_base);\ stk_base = (StackType* )(alloc_base\ - + (sizeof(StackIndex) * msa->ptr_num));\ + + (sizeof(StkPtrType) * msa->ptr_num));\ stk = stk_base;\ stk_end = stk_base + (stack_num);\ }\ @@ -1288,7 +1302,7 @@ struct OnigCalloutArgsStruct { #define STACK_SAVE(msa,is_alloca,alloc_base) do{\ (msa)->stack_n = (int )(stk_end - stk_base);\ if ((is_alloca) != 0) {\ - size_t size = sizeof(StackIndex) * (msa)->ptr_num\ + size_t size = sizeof(StkPtrType) * (msa)->ptr_num\ + sizeof(StackType) * (msa)->stack_n;\ (msa)->stack_p = xmalloc(size);\ CHECK_NULL_RETURN_MEMERR((msa)->stack_p);\ @@ -1373,6 +1387,24 @@ onig_set_retry_limit_in_search(unsigned long n) #endif } +#ifdef USE_CALL +static unsigned long SubexpCallLimitInSearch = DEFAULT_SUBEXP_CALL_LIMIT_IN_SEARCH; + +extern unsigned long +onig_get_subexp_call_limit_in_search(void) +{ + return SubexpCallLimitInSearch; +} + +extern int +onig_set_subexp_call_limit_in_search(unsigned long n) +{ + SubexpCallLimitInSearch = n; + return 0; +} + +#endif + #ifdef USE_CALLOUT static OnigCalloutFunc DefaultProgressCallout; static OnigCalloutFunc DefaultRetractionCallout; @@ -1637,9 +1669,9 @@ stack_double(int* is_alloca, char** arg_alloc_base, stk = *arg_stk; n = (unsigned int )(stk_end - stk_base); - size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n; + size = sizeof(StkPtrType) * msa->ptr_num + sizeof(StackType) * n; n *= 2; - new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n; + new_size = sizeof(StkPtrType) * msa->ptr_num + sizeof(StackType) * n; if (*is_alloca != 0) { new_alloc_base = (char* )xmalloc(new_size); if (IS_NULL(new_alloc_base)) { @@ -1669,7 +1701,7 @@ stack_double(int* is_alloca, char** arg_alloc_base, used = (int )(stk - stk_base); *arg_alloc_base = alloc_base; *arg_stk_base = (StackType* )(alloc_base - + (sizeof(StackIndex) * msa->ptr_num)); + + (sizeof(StkPtrType) * msa->ptr_num)); *arg_stk = *arg_stk_base + used; *arg_stk_end = *arg_stk_base + n; return 0; @@ -1694,22 +1726,20 @@ stack_double(int* is_alloca, char** arg_alloc_base, #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) -#define STACK_PUSH(stack_type,pat,s,sprev) do {\ +#define STACK_PUSH(stack_type,pat,s) do {\ STACK_ENSURE(1);\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ STACK_INC;\ } while(0) -#define STACK_PUSH_WITH_ZID(stack_type,pat,s,sprev,id) do {\ +#define STACK_PUSH_WITH_ZID(stack_type,pat,s,id) do {\ STACK_ENSURE(1);\ stk->type = (stack_type);\ stk->zid = (int )(id);\ stk->u.state.pcode = (pat);\ stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ STACK_INC;\ } while(0) @@ -1724,7 +1754,6 @@ stack_double(int* is_alloca, char** arg_alloc_base, stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ stk->u.state.pstr = s;\ - stk->u.state.pstr_prev = sprev;\ STACK_INC;\ } while (0) #else @@ -1735,10 +1764,9 @@ stack_double(int* is_alloca, char** arg_alloc_base, } while (0) #endif -#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) -#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev) -#define STACK_PUSH_ALT_WITH_ZID(pat,s,sprev,id) \ - STACK_PUSH_WITH_ZID(STK_ALT,pat,s,sprev,id) +#define STACK_PUSH_ALT(pat,s) STACK_PUSH(STK_ALT,pat,s) +#define STACK_PUSH_SUPER_ALT(pat,s) STACK_PUSH(STK_SUPER_ALT,pat,s) +#define STACK_PUSH_ALT_WITH_ZID(pat,s,id) STACK_PUSH_WITH_ZID(STK_ALT,pat,s,id) #if 0 #define STACK_PUSH_REPEAT(sid, pat) do {\ @@ -1767,8 +1795,8 @@ stack_double(int* is_alloca, char** arg_alloc_base, stk->u.mem.pstr = (s);\ stk->u.mem.prev_start = mem_start_stk[mnum];\ stk->u.mem.prev_end = mem_end_stk[mnum];\ - mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ - mem_end_stk[mnum] = INVALID_STACK_INDEX;\ + mem_start_stk[mnum].i = GET_STACK_INDEX(stk);\ + mem_end_stk[mnum].i = INVALID_STACK_INDEX;\ STACK_INC;\ } while(0) @@ -1779,7 +1807,7 @@ stack_double(int* is_alloca, char** arg_alloc_base, stk->u.mem.pstr = (s);\ stk->u.mem.prev_start = mem_start_stk[mnum];\ stk->u.mem.prev_end = mem_end_stk[mnum];\ - mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ + mem_end_stk[mnum].i = GET_STACK_INDEX(stk);\ STACK_INC;\ } while(0) @@ -1861,12 +1889,11 @@ stack_double(int* is_alloca, char** arg_alloc_base, STACK_INC;\ } while(0) -#define STACK_PUSH_MARK_WITH_POS(sid, s, sprev) do {\ +#define STACK_PUSH_MARK_WITH_POS(sid, s) do {\ STACK_ENSURE(1);\ stk->type = STK_MARK;\ stk->zid = (sid);\ stk->u.val.v = (UChar* )(s);\ - stk->u.val.v2 = (sprev);\ STACK_INC;\ } while(0) @@ -1885,7 +1912,6 @@ stack_double(int* is_alloca, char** arg_alloc_base, stk->zid = (sid);\ stk->u.val.type = (stype);\ stk->u.val.v = (UChar* )(sval);\ - stk->u.val.v2 = sprev;\ STACK_INC;\ } while(0) @@ -1932,7 +1958,6 @@ stack_double(int* is_alloca, char** arg_alloc_base, && k->zid == (sid)) {\ if (level == 0) {\ (sval) = k->u.val.v;\ - sprev = k->u.val.v2;\ break;\ }\ }\ @@ -2135,14 +2160,14 @@ stack_double(int* is_alloca, char** arg_alloc_base, } while(0) #define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\ - if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\ + if (k->u.mem.prev_end.i == INVALID_STACK_INDEX) {\ (addr) = 0;\ }\ else {\ if (MEM_STATUS_AT((reg)->push_mem_end, k->zid))\ - (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\ + (addr) = STACK_AT(k->u.mem.prev_end.i)->u.mem.pstr;\ else\ - (addr) = (UChar* )k->u.mem.prev_end;\ + (addr) = k->u.mem.prev_end.s;\ }\ } while (0) @@ -2163,7 +2188,7 @@ stack_double(int* is_alloca, char** arg_alloc_base, if (endp == 0) {\ (isnull) = 0; break;\ }\ - else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\ + else if (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != endp) {\ (isnull) = 0; break;\ }\ else if (endp != s) {\ @@ -2199,7 +2224,7 @@ stack_double(int* is_alloca, char** arg_alloc_base, if (endp == 0) {\ (isnull) = 0; break;\ }\ - else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) { \ + else if (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != endp) { \ (isnull) = 0; break;\ }\ else if (endp != s) {\ @@ -2362,6 +2387,10 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, p1++; p2++; } + if (s2 >= end2) { + if (s1 < end1) return 0; + else break; + } } *ps2 = s2; @@ -2390,7 +2419,7 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, #define ON_STR_END(s) ((s) == end) #define DATA_ENSURE_CHECK1 (s < right_range) #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) -#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail +#define DATA_ENSURE(n) if (right_range - s < (n)) goto fail #define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range @@ -2632,9 +2661,9 @@ typedef struct { #define BYTECODE_INTERPRETER_START GOTO_OP; #define BYTECODE_INTERPRETER_END -#define CASE_OP(x) L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(0) +#define CASE_OP(x) L_##x: SOP_IN(OP_##x); MATCH_DEBUG_OUT(0) #define DEFAULT_OP /* L_DEFAULT: */ -#define NEXT_OP sprev = sbegin; JUMP_OP +#define NEXT_OP JUMP_OP #define JUMP_OP GOTO_OP #ifdef USE_DIRECT_THREADED_CODE #define GOTO_OP goto *(p->opaddr) @@ -2648,9 +2677,8 @@ typedef struct { #define BYTECODE_INTERPRETER_START \ while (1) {\ MATCH_DEBUG_OUT(0)\ - sbegin = s;\ switch (p->opcode) { -#define BYTECODE_INTERPRETER_END } sprev = sbegin; } +#define BYTECODE_INTERPRETER_END } } #define CASE_OP(x) case OP_##x: SOP_IN(OP_##x); #define DEFAULT_OP default: #define NEXT_OP break @@ -2718,12 +2746,22 @@ typedef struct { best_len = err_code; goto match_at_end;\ } while(0) +#define MATCH_COUNTER_OUT(title) do {\ + int i;\ + fprintf(DBGFP, "%s (%ld): retry limit: %8lu, subexp_call: %8lu\n", (title), (sstart - str), retry_in_match_counter, msa->subexp_call_in_search_counter); \ + fprintf(DBGFP, " ");\ + for (i = 0; i < MAX_SUBEXP_CALL_COUNTERS; i++) {\ + fprintf(DBGFP, " %6lu", subexp_call_counters[i]);\ + }\ + fprintf(DBGFP, "\n");\ + fflush(DBGFP);\ +} while (0) + /* match data(str - end) from position (sstart). */ -/* if sstart == str then set sprev to NULL. */ static int match_at(regex_t* reg, const UChar* str, const UChar* end, - const UChar* in_right_range, const UChar* sstart, UChar* sprev, + const UChar* in_right_range, const UChar* sstart, MatchArg* msa) { @@ -2782,10 +2820,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, &&L_BACKREF_N_IC, &&L_BACKREF_MULTI, &&L_BACKREF_MULTI_IC, +#ifdef USE_BACKREF_WITH_LEVEL &&L_BACKREF_WITH_LEVEL, &&L_BACKREF_WITH_LEVEL_IC, +#endif &&L_BACKREF_CHECK, +#ifdef USE_BACKREF_WITH_LEVEL &&L_BACKREF_CHECK_WITH_LEVEL, +#endif &&L_MEM_START, &&L_MEM_START_PUSH, &&L_MEM_END_PUSH, @@ -2838,13 +2880,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, LengthType tlen, tlen2; MemNumType mem; RelAddrType addr; - UChar *s, *ps, *sbegin; + UChar *s, *ps; UChar *right_range; int is_alloca; char *alloc_base; StackType *stk_base, *stk, *stk_end; StackType *stkp; /* used as any purpose. */ - StackIndex *mem_start_stk, *mem_end_stk; + StkPtrType *mem_start_stk, *mem_end_stk; UChar* keep; #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR @@ -2858,6 +2900,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_CALLOUT int of; #endif +#ifdef ONIG_DEBUG_MATCH_COUNTER +#define MAX_SUBEXP_CALL_COUNTERS 9 + unsigned long subexp_call_counters[MAX_SUBEXP_CALL_COUNTERS]; +#endif Operation* p = reg->ops; OnigOptionType option = reg->options; @@ -2872,6 +2918,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, static unsigned int counter = 1; #endif +#ifdef ONIG_DEBUG_MATCH_COUNTER + for (i = 0; i < MAX_SUBEXP_CALL_COUNTERS; i++) { + subexp_call_counters[i] = 0; + } +#endif + #ifdef USE_DIRECT_THREADED_CODE if (IS_NULL(msa)) { for (i = 0; i < reg->ops_used; i++) { @@ -2903,12 +2955,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_INIT(INIT_MATCH_STACK_SIZE); UPDATE_FOR_STACK_REALLOC; for (i = 1; i <= num_mem; i++) { - mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; + mem_start_stk[i].i = mem_end_stk[i].i = INVALID_STACK_INDEX; } #ifdef ONIG_DEBUG_MATCH - fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p, sprev: %p\n", - str, end, sstart, sprev); + fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p\n", str, end, sstart); fprintf(DBGFP, "size: %d, start offset: %d\n", (int )(end - str), (int )(sstart - str)); #endif @@ -2932,24 +2983,27 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (n > msa->best_len) { msa->best_len = n; msa->best_s = (UChar* )sstart; + goto set_region; } else goto end_best_len; } #endif best_len = n; + + set_region: region = msa->region; if (region) { if (keep > s) keep = s; -#ifdef USE_POSIX_API_REGION_OPTION +#ifdef USE_POSIX_API if (OPTON_POSIX_REGION(msa->options)) { posix_regmatch_t* rmt = (posix_regmatch_t* )region; rmt[0].rm_so = (regoff_t )(keep - str); rmt[0].rm_eo = (regoff_t )(s - str); for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (mem_end_stk[i].i != INVALID_STACK_INDEX) { rmt[i].rm_so = (regoff_t )(STACK_MEM_START(reg, i) - str); rmt[i].rm_eo = (regoff_t )(STACK_MEM_END(reg, i) - str); } @@ -2959,11 +3013,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } else { -#endif /* USE_POSIX_API_REGION_OPTION */ +#endif /* USE_POSIX_API */ region->beg[0] = (int )(keep - str); region->end[0] = (int )(s - str); for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (mem_end_stk[i].i != INVALID_STACK_INDEX) { region->beg[i] = (int )(STACK_MEM_START(reg, i) - str); region->end[i] = (int )(STACK_MEM_END(reg, i) - str); } @@ -2996,7 +3050,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (r < 0) MATCH_AT_ERROR_RETURN(r); } #endif /* USE_CAPTURE_HISTORY */ -#ifdef USE_POSIX_API_REGION_OPTION +#ifdef USE_POSIX_API } /* else OPTON_POSIX_REGION() */ #endif } /* if (region) */ @@ -3012,8 +3066,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, best_len = ONIG_MISMATCH; goto fail; /* for retry */ } - if (OPTON_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { - goto fail; /* for retry */ + if (OPTON_FIND_LONGEST(option)) { + if (s >= in_right_range && msa->best_s == sstart) + best_len = msa->best_len; + else + goto fail; /* for retry */ } } @@ -3034,7 +3091,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; - sprev = s; s++; INC_OP; JUMP_OUT; @@ -3047,7 +3103,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; - sprev = s; s++; INC_OP; JUMP_OUT; @@ -3062,7 +3117,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; - sprev = s; s++; INC_OP; JUMP_OUT; @@ -3079,7 +3133,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; - sprev = s; s++; INC_OP; JUMP_OUT; @@ -3091,7 +3144,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, while (tlen-- > 0) { if (*ps++ != *s++) goto fail; } - sprev = s - 1; INC_OP; JUMP_OUT; @@ -3112,7 +3164,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, ps++; s++; if (*ps != *s) goto fail; ps++; s++; - sprev = s; if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; @@ -3131,7 +3182,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, ps++; s++; if (*ps != *s) goto fail; ps++; s++; - sprev = s; if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; @@ -3149,7 +3199,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; } - sprev = s - 2; INC_OP; JUMP_OUT; @@ -3165,7 +3214,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; } - sprev = s - 3; INC_OP; JUMP_OUT; @@ -3179,7 +3227,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; } - sprev = s - tlen; INC_OP; JUMP_OUT; @@ -3295,11 +3342,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(ANYCHAR_STAR) INC_OP; while (DATA_ENSURE_CHECK1) { - STACK_PUSH_ALT(p, s, sprev); + STACK_PUSH_ALT(p, s); n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; s += n; } JUMP_OUT; @@ -3307,15 +3353,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(ANYCHAR_ML_STAR) INC_OP; while (DATA_ENSURE_CHECK1) { - STACK_PUSH_ALT(p, s, sprev); + STACK_PUSH_ALT(p, s); n = enclen(encode, s); if (n > 1) { DATA_ENSURE(n); - sprev = s; s += n; } else { - sprev = s; s++; } } @@ -3329,12 +3373,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, INC_OP; while (DATA_ENSURE_CHECK1) { if (c == *s) { - STACK_PUSH_ALT(p, s, sprev); + STACK_PUSH_ALT(p, s); } n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; s += n; } } @@ -3348,16 +3391,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, INC_OP; while (DATA_ENSURE_CHECK1) { if (c == *s) { - STACK_PUSH_ALT(p, s, sprev); + STACK_PUSH_ALT(p, s); } n = enclen(encode, s); if (n > 1) { DATA_ENSURE(n); - sprev = s; s += n; } else { - sprev = s; s++; } } @@ -3410,14 +3451,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) goto fail; } - else if (ON_STR_END(s)) { - if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) - goto fail; - } else { - if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) - == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) - goto fail; + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + if (ON_STR_END(s)) { + if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } + else { + if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) + == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } } } INC_OP; @@ -3432,14 +3476,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) goto fail; } - else if (ON_STR_END(s)) { - if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) - goto fail; - } else { - if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) - != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) - goto fail; + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + if (ON_STR_END(s)) { + if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } + else { + if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) + != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } } } INC_OP; @@ -3452,7 +3499,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mode = p->word_boundary.mode; if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { - if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { + UChar* sprev; + if (ON_STR_BEGIN(s)) { + INC_OP; + JUMP_OUT; + } + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { INC_OP; JUMP_OUT; } @@ -3465,10 +3518,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, ModeType mode; mode = p->word_boundary.mode; - if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { - if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { - INC_OP; - JUMP_OUT; + if (! ON_STR_BEGIN(s)) { + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { + if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { + INC_OP; + JUMP_OUT; + } } } } @@ -3478,6 +3534,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(TEXT_SEGMENT_BOUNDARY) { int is_break; + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); switch (p->text_segment_boundary.type) { case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: @@ -3507,12 +3564,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(BEGIN_BUF) if (! ON_STR_BEGIN(s)) goto fail; + if (OPTON_NOTBOL(msa->options)) goto fail; + if (OPTON_NOT_BEGIN_STRING(msa->options)) goto fail; INC_OP; JUMP_OUT; CASE_OP(END_BUF) if (! ON_STR_END(s)) goto fail; + if (OPTON_NOTEOL(msa->options)) goto fail; + if (OPTON_NOT_END_STRING(msa->options)) goto fail; INC_OP; JUMP_OUT; @@ -3523,15 +3584,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, INC_OP; JUMP_OUT; } - else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { - INC_OP; - JUMP_OUT; + else if (! ON_STR_END(s)) { + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { + INC_OP; + JUMP_OUT; + } } goto fail; CASE_OP(END_LINE) if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (OPTON_NOTEOL(msa->options)) goto fail; @@ -3556,9 +3621,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(SEMI_END_BUF) if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (OPTON_NOTEOL(msa->options)) goto fail; + if (OPTON_NOT_END_STRING(msa->options)) goto fail; INC_OP; JUMP_OUT; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE @@ -3567,6 +3634,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && ON_STR_END(s + enclen(encode, s))) { + if (OPTON_NOTEOL(msa->options)) goto fail; + if (OPTON_NOT_END_STRING(msa->options)) goto fail; INC_OP; JUMP_OUT; } @@ -3575,6 +3644,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, UChar* ss = s + enclen(encode, s); ss += enclen(encode, ss); if (ON_STR_END(ss)) { + if (OPTON_NOTEOL(msa->options)) goto fail; + if (OPTON_NOT_END_STRING(msa->options)) goto fail; INC_OP; JUMP_OUT; } @@ -3586,6 +3657,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, switch (p->check_position.type) { case CHECK_POSITION_SEARCH_START: if (s != msa->start) goto fail; + if (OPTON_NOT_BEGIN_POSITION(msa->options)) goto fail; break; case CHECK_POSITION_CURRENT_RIGHT_RANGE: if (s != right_range) goto fail; @@ -3604,7 +3676,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(MEM_START) mem = p->memory_start.num; - mem_start_stk[mem] = (StackIndex )((void* )s); + mem_start_stk[mem].s = s; INC_OP; JUMP_OUT; @@ -3616,7 +3688,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(MEM_END) mem = p->memory_end.num; - mem_end_stk[mem] = (StackIndex )((void* )s); + mem_end_stk[mem].s = s; INC_OP; JUMP_OUT; @@ -3629,20 +3701,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ si = GET_STACK_INDEX(stkp); STACK_PUSH_MEM_END(mem, s); - mem_start_stk[mem] = si; + mem_start_stk[mem].i = si; INC_OP; JUMP_OUT; } CASE_OP(MEM_END_REC) mem = p->memory_end.num; - mem_end_stk[mem] = (StackIndex )((void* )s); + mem_end_stk[mem].s = s; STACK_GET_MEM_START(mem, stkp); if (MEM_STATUS_AT(reg->push_mem_start, mem)) - mem_start_stk[mem] = GET_STACK_INDEX(stkp); + mem_start_stk[mem].i = GET_STACK_INDEX(stkp); else - mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); + mem_start_stk[mem].s = stkp->u.mem.pstr; STACK_PUSH_MEM_END_MARK(mem); INC_OP; @@ -3661,21 +3733,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mem = p->backref_n.n1; backref: { - int len; UChar *pstart, *pend; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_end_stk[mem].i == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem].i == INVALID_STACK_INDEX) goto fail; pstart = STACK_MEM_START(reg, mem); pend = STACK_MEM_END(reg, mem); n = (int )(pend - pstart); if (n != 0) { DATA_ENSURE(n); - sprev = s; STRING_CMP(s, pstart, n); - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; } } INC_OP; @@ -3684,21 +3752,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(BACKREF_N_IC) mem = p->backref_n.n1; { - int len; UChar *pstart, *pend; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_end_stk[mem].i == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem].i == INVALID_STACK_INDEX) goto fail; pstart = STACK_MEM_START(reg, mem); pend = STACK_MEM_END(reg, mem); n = (int )(pend - pstart); if (n != 0) { DATA_ENSURE(n); - sprev = s; STRING_CMP_IC(case_fold_flag, pstart, &s, n); - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; } } INC_OP; @@ -3706,28 +3770,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(BACKREF_MULTI) { - int len, is_fail; + int is_fail; UChar *pstart, *pend, *swork; tlen = p->backref_general.num; for (i = 0; i < tlen; i++) { mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i]; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue; pstart = STACK_MEM_START(reg, mem); pend = STACK_MEM_END(reg, mem); n = (int )(pend - pstart); if (n != 0) { DATA_ENSURE(n); - sprev = s; swork = s; STRING_CMP_VALUE(swork, pstart, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; } break; /* success */ } @@ -3738,28 +3799,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(BACKREF_MULTI_IC) { - int len, is_fail; + int is_fail; UChar *pstart, *pend, *swork; tlen = p->backref_general.num; for (i = 0; i < tlen; i++) { mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i]; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue; pstart = STACK_MEM_START(reg, mem); pend = STACK_MEM_END(reg, mem); n = (int )(pend - pstart); if (n != 0) { DATA_ENSURE(n); - sprev = s; swork = s; STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; } break; /* success */ } @@ -3774,10 +3832,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto backref_with_level; CASE_OP(BACKREF_WITH_LEVEL) { - int len; int level; MemNumType* mems; - UChar* ssave; n = 0; backref_with_level: @@ -3785,17 +3841,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, tlen = p->backref_general.num; mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns; - ssave = s; - if (backref_match_at_nested_level(reg, stk, stk_base, n, - case_fold_flag, level, (int )tlen, mems, &s, end)) { - if (ssave != s) { - sprev = ssave; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - } - } - else + if (! backref_match_at_nested_level(reg, stk, stk_base, n, + case_fold_flag, level, (int )tlen, mems, &s, end)) { goto fail; + } } INC_OP; JUMP_OUT; @@ -3810,8 +3859,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, for (i = 0; i < tlen; i++) { mem = mems[i]; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue; break; /* success */ } if (i == tlen) goto fail; @@ -3928,13 +3977,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(PUSH) addr = p->push.addr; - STACK_PUSH_ALT(p + addr, s, sprev); + STACK_PUSH_ALT(p + addr, s); INC_OP; JUMP_OUT; CASE_OP(PUSH_SUPER) addr = p->push.addr; - STACK_PUSH_SUPER_ALT(p + addr, s, sprev); + STACK_PUSH_SUPER_ALT(p + addr, s); INC_OP; JUMP_OUT; @@ -3956,7 +4005,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, addr = p->push_or_jump_exact1.addr; c = p->push_or_jump_exact1.c; if (DATA_ENSURE_CHECK1 && c == *s) { - STACK_PUSH_ALT(p + addr, s, sprev); + STACK_PUSH_ALT(p + addr, s); INC_OP; JUMP_OUT; } @@ -3972,9 +4021,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, addr = p->push_if_peek_next.addr; c = p->push_if_peek_next.c; if (DATA_ENSURE_CHECK1 && c == *s) { - STACK_PUSH_ALT(p + addr, s, sprev); - INC_OP; - JUMP_OUT; + STACK_PUSH_ALT(p + addr, s); } } INC_OP; @@ -3986,7 +4033,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT_INC(mem, 0); if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p + addr, s, sprev); + STACK_PUSH_ALT(p + addr, s); } INC_OP; JUMP_OUT; @@ -3997,7 +4044,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT_INC(mem, 0); if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p + 1, s, sprev); + STACK_PUSH_ALT(p + 1, s); p += addr; } else @@ -4014,7 +4061,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else if (n >= reg->repeat_range[mem].lower) { INC_OP; - STACK_PUSH_ALT(p, s, sprev); + STACK_PUSH_ALT(p, s); p = reg->repeat_range[mem].u.pcode; } else { @@ -4033,7 +4080,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else { if (n >= reg->repeat_range[mem].lower) { - STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s, sprev); + STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s); INC_OP; } else { @@ -4047,6 +4094,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (subexp_call_nest_counter == SubexpCallMaxNestLevel) goto fail; subexp_call_nest_counter++; + + if (SubexpCallLimitInSearch != 0) { + msa->subexp_call_in_search_counter++; +#ifdef ONIG_DEBUG_MATCH_COUNTER + if (p->call.called_mem < MAX_SUBEXP_CALL_COUNTERS) + subexp_call_counters[p->call.called_mem]++; + if (msa->subexp_call_in_search_counter % 1000 == 0) + MATCH_COUNTER_OUT("CALL"); +#endif + if (msa->subexp_call_in_search_counter > + SubexpCallLimitInSearch) { + MATCH_AT_ERROR_RETURN(ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER); + } + } + addr = p->call.addr; INC_OP; STACK_PUSH_CALL_FRAME(p); p = reg->ops + addr; @@ -4070,7 +4132,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, for (tlen = p->move.n; tlen > 0; tlen--) { len = enclen(encode, s); - sprev = s; s += len; if (s > end) goto fail; if (s == end) { @@ -4079,7 +4140,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } } - sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); INC_OP; JUMP_OUT; @@ -4088,10 +4148,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (tlen != 0) { s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(s)) goto fail; - sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); } if (p->step_back_start.remaining != 0) { - STACK_PUSH_ALT_WITH_ZID(p + 1, s, sprev, p->step_back_start.remaining); + STACK_PUSH_ALT_WITH_ZID(p + 1, s, p->step_back_start.remaining); p += p->step_back_start.addr; } else @@ -4103,9 +4162,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (tlen != INFINITE_LEN) tlen--; s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, 1); if (IS_NULL(s)) goto fail; - sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); if (tlen != 0) { - STACK_PUSH_ALT_WITH_ZID(p, s, sprev, (int )tlen); + STACK_PUSH_ALT_WITH_ZID(p, s, (int )tlen); } INC_OP; JUMP_OUT; @@ -4114,8 +4172,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mem = p->cut_to_mark.id; /* mem: mark id */ STACK_TO_VOID_TO_MARK(stkp, mem); if (p->cut_to_mark.restore_pos != 0) { - s = stkp->u.val.v; - sprev = stkp->u.val.v2; + s = stkp->u.val.v; } INC_OP; JUMP_OUT; @@ -4123,7 +4180,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(MARK) mem = p->mark.id; /* mem: mark id */ if (p->mark.save_pos != 0) - STACK_PUSH_MARK_WITH_POS(mem, s, sprev); + STACK_PUSH_MARK_WITH_POS(mem, s); else STACK_PUSH_MARK(mem); @@ -4275,9 +4332,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, fail: #endif STACK_POP; - p = stk->u.state.pcode; - s = stk->u.state.pstr; - sprev = stk->u.state.pstr_prev; + p = stk->u.state.pcode; + s = stk->u.state.pstr; CHECK_RETRY_LIMIT_IN_MATCH; JUMP_OUT; @@ -4290,6 +4346,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (msa->retry_limit_in_search != 0) { msa->retry_limit_in_search_counter += retry_in_match_counter; } + +#ifdef ONIG_DEBUG_MATCH_COUNTER + MATCH_COUNTER_OUT("END"); +#endif + STACK_SAVE(msa, is_alloca, alloc_base); return best_len; } @@ -4324,12 +4385,11 @@ typedef struct { int state; /* value of enum SearchRangeStatus */ UChar* low; UChar* high; - UChar* low_prev; UChar* sch_range; } SearchRange; #define REGSET_MATCH_AND_RETURN_CHECK(upper_range) \ - r = match_at(reg, str, end, (upper_range), s, prev, msas + i); \ + r = match_at(reg, str, end, (upper_range), s, msas + i); \ if (r != ONIG_MISMATCH) {\ if (r >= 0) {\ goto match;\ @@ -4345,8 +4405,8 @@ regset_search_body_position_lead(OnigRegSet* set, OnigOptionType option, MatchArg* msas, int* rmatch_pos) { int r, n, i; - UChar *s, *prev; - UChar *low, *high, *low_prev; + UChar *s; + UChar *low, *high; UChar* sch_range; regex_t* reg; OnigEncoding enc; @@ -4354,12 +4414,7 @@ regset_search_body_position_lead(OnigRegSet* set, n = set->n; enc = set->enc; - s = (UChar* )start; - if (s > str) - prev = onigenc_get_prev_char_head(enc, str, s); - else - prev = (UChar* )NULL; sr = (SearchRange* )xmalloc(sizeof(*sr) * n); CHECK_NULL_RETURN_MEMERR(sr); @@ -4375,18 +4430,16 @@ regset_search_body_position_lead(OnigRegSet* set, else sch_range = (UChar* )end; - if (forward_search(reg, str, end, s, sch_range, &low, &high, &low_prev)) { + if (forward_search(reg, str, end, s, sch_range, &low, &high)) { sr[i].state = SRS_LOW_HIGH; sr[i].low = low; sr[i].high = high; - sr[i].low_prev = low_prev; sr[i].sch_range = sch_range; } } else { sch_range = (UChar* )end; - if (forward_search(reg, str, end, s, sch_range, - &low, &high, (UChar** )NULL)) { + if (forward_search(reg, str, end, s, sch_range, &low, &high)) { goto total_active; } } @@ -4396,7 +4449,6 @@ regset_search_body_position_lead(OnigRegSet* set, sr[i].state = SRS_ALL_RANGE; sr[i].low = s; sr[i].high = (UChar* )range; - sr[i].low_prev = prev; } } @@ -4412,10 +4464,9 @@ regset_search_body_position_lead(OnigRegSet* set, if (s < sr[i].low) continue; if (s >= sr[i].high) { if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range, - &low, &high, &low_prev) != 0) { + &low, &high) != 0) { sr[i].low = low; sr[i].high = high; - sr[i].low_prev = low_prev; if (s < low) continue; } else { @@ -4436,16 +4487,13 @@ regset_search_body_position_lead(OnigRegSet* set, for (i = 0; i < n; i++) { if (sr[i].state == SRS_LOW_HIGH && low > sr[i].low) { low = sr[i].low; - low_prev = sr[i].low_prev; } } if (low == range) break; s = low; - prev = low_prev; } else { - prev = s; s += enclen(enc, s); } } while (1); @@ -4459,10 +4507,9 @@ regset_search_body_position_lead(OnigRegSet* set, if (s < sr[i].low) continue; if (s >= sr[i].high) { if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range, - &low, &high, &low_prev) != 0) { + &low, &high) != 0) { sr[i].low = low; sr[i].high = high; - /* sr[i].low_prev = low_prev; */ if (s < low) continue; } else { @@ -4483,7 +4530,6 @@ regset_search_body_position_lead(OnigRegSet* set, if (set->anychar_inf != 0) prev_is_newline = ONIGENC_IS_MBC_NEWLINE(set->enc, s, end); - prev = s; s += enclen(enc, s); } while (1); } @@ -4552,7 +4598,7 @@ onig_regset_search_with_param(OnigRegSet* set, { int r; int i; - UChar *s, *prev; + UChar *s; regex_t* reg; OnigEncoding enc; OnigRegion* region; @@ -4654,7 +4700,6 @@ onig_regset_search_with_param(OnigRegSet* set, else if (str == end) { /* empty string */ start = end = str; s = (UChar* )start; - prev = (UChar* )NULL; msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n); CHECK_NULL_RETURN_MEMERR(msas); @@ -4669,7 +4714,7 @@ onig_regset_search_with_param(OnigRegSet* set, /* Can't use REGSET_MATCH_AND_RETURN_CHECK() because r must be set regex index (i) */ - r = match_at(reg, str, end, end, s, prev, msas + i); + r = match_at(reg, str, end, end, s, msas + i); if (r != ONIG_MISMATCH) { if (r >= 0) { r = i; @@ -4814,7 +4859,7 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, else s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); - while (s >= text) { + while (PTR_GE(s, text)) { if (*s == *target) { p = s + 1; t = target + 1; @@ -4855,7 +4900,7 @@ sunday_quick_search_step_forward(regex_t* reg, tail = target_end - 1; tlen1 = (int )(tail - target); end = text_range; - if (end + tlen1 > text_end) + if (tlen1 > text_end - end) end = text_end - tlen1; map_offset = reg->map_offset; @@ -4893,15 +4938,38 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end, const UChar *s, *t, *p, *end; const UChar *tail; int map_offset; - - end = text_range + (target_end - target); - if (end > text_end) - end = text_end; + ptrdiff_t target_len; map_offset = reg->map_offset; tail = target_end - 1; - s = text + (tail - target); + target_len = target_end - target; + if (target_len > text_end - text_range) { + end = text_end; + if (target_len > text_end - text) + return (UChar* )NULL; + } + else { + end = text_range + target_len; + } + + s = text + target_len - 1; + +#ifdef USE_STRICT_POINTER_ADDRESS + if (s < end) { + while (TRUE) { + p = s; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )p; + p--; t--; + } + if (text_end - s <= map_offset) break; + if (reg->map[*(s + map_offset)] >= end - s) break; + s += reg->map[*(s + map_offset)]; + } + } +#else while (s < end) { p = s; t = tail; @@ -4909,9 +4977,10 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end, if (t == target) return (UChar* )p; p--; t--; } - if (s + map_offset >= text_end) break; + if (text_end - s <= map_offset) break; s += reg->map[*(s + map_offset)]; } +#endif return (UChar* )NULL; } @@ -4937,7 +5006,7 @@ map_search_backward(OnigEncoding enc, UChar map[], { const UChar *s = text_start; - while (s >= text) { + while (PTR_GE(s, text)) { if (map[*s]) return (UChar* )s; s = onigenc_get_prev_char_head(enc, adjust_text, s); @@ -4963,13 +5032,16 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, OnigMatchParam* mp) { int r; - UChar *prev; MatchArg msa; +#ifndef USE_POSIX_API + if (OPTON_POSIX_REGION(option)) return ONIGERR_INVALID_ARGUMENT; +#endif + ADJUST_MATCH_PARAM(reg, mp); MATCH_ARG_INIT(msa, reg, option, region, at, mp); if (region -#ifdef USE_POSIX_API_REGION_OPTION +#ifdef USE_POSIX_API && !OPTON_POSIX_REGION(option) #endif ) { @@ -4986,8 +5058,14 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, } } - prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); - r = match_at(reg, str, end, end, at, prev, &msa); + r = match_at(reg, str, end, end, at, &msa); +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + if (OPTON_FIND_LONGEST(option) && r == ONIG_MISMATCH) { + if (msa.best_len >= 0) { + r = msa.best_len; + } + } +#endif } end: @@ -4997,7 +5075,7 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, - UChar* range, UChar** low, UChar** high, UChar** low_prev) + UChar* range, UChar** low, UChar** high) { UChar *p, *pprev = (UChar* )NULL; @@ -5081,33 +5159,18 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, } if (reg->dist_max == 0) { - *low = p; - if (low_prev) { - if (*low > start) - *low_prev = onigenc_get_prev_char_head(reg->enc, start, p); - else - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); - } + *low = p; *high = p; } else { if (reg->dist_max != INFINITE_LEN) { if (p - str < reg->dist_max) { *low = (UChar* )str; - if (low_prev) - *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low); } else { *low = p - reg->dist_max; if (*low > start) { - *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, start, - *low, (const UChar** )low_prev); - } - else { - if (low_prev) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), *low); + *low = onigenc_get_right_adjust_char_head(reg->enc, start, *low); } } } @@ -5263,7 +5326,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, OnigOptionType option, OnigMatchParam* mp) { int r; - UChar *s, *prev; + UChar *s; MatchArg msa; const UChar *orig_start = start; @@ -5275,8 +5338,15 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, ADJUST_MATCH_PARAM(reg, mp); +#ifndef USE_POSIX_API + if (OPTON_POSIX_REGION(option)) { + r = ONIGERR_INVALID_ARGUMENT; + goto finish_no_msa; + } +#endif + if (region -#ifdef USE_POSIX_API_REGION_OPTION +#ifdef USE_POSIX_API && ! OPTON_POSIX_REGION(option) #endif ) { @@ -5294,27 +5364,14 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, } -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE #define MATCH_AND_RETURN_CHECK(upper_range) \ - r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - if (! OPTON_FIND_LONGEST(reg->options)) {\ - goto match;\ - }\ - }\ - else goto finish; /* error */ \ - } -#else -#define MATCH_AND_RETURN_CHECK(upper_range) \ - r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ + r = match_at(reg, str, end, (upper_range), s, &msa);\ if (r != ONIG_MISMATCH) {\ if (r >= 0) {\ goto match;\ }\ else goto finish; /* error */ \ } -#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ /* anchor optimize: resume search range */ @@ -5422,7 +5479,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, if (reg->threshold_len == 0) { start = end = str = address_for_empty_string; s = (UChar* )start; - prev = (UChar* )NULL; MATCH_ARG_INIT(msa, reg, option, region, start, mp); MATCH_AND_RETURN_CHECK(end); @@ -5440,13 +5496,8 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, s = (UChar* )start; if (range > start) { /* forward search */ - if (s > str) - prev = onigenc_get_prev_char_head(reg->enc, str, s); - else - prev = (UChar* )NULL; - if (reg->optimize != OPTIMIZE_NONE) { - UChar *sch_range, *low, *high, *low_prev; + UChar *sch_range, *low, *high; if (reg->dist_max != 0) { if (reg->dist_max == INFINITE_LEN) @@ -5467,27 +5518,27 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, if (reg->dist_max != INFINITE_LEN) { do { - if (! forward_search(reg, str, end, s, sch_range, &low, &high, - &low_prev)) goto mismatch; + if (! forward_search(reg, str, end, s, sch_range, &low, &high)) + goto mismatch; if (s < low) { s = low; - prev = low_prev; } while (s <= high) { MATCH_AND_RETURN_CHECK(data_range); - prev = s; s += enclen(reg->enc, s); } } while (s < range); goto mismatch; } else { /* check only. */ - if (! forward_search(reg, str, end, s, sch_range, &low, &high, - (UChar** )NULL)) goto mismatch; + if (! forward_search(reg, str, end, s, sch_range, &low, &high)) + goto mismatch; if ((reg->anchor & ANCR_ANYCHAR_INF) != 0 && (reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) { do { + UChar* prev; + MATCH_AND_RETURN_CHECK(data_range); prev = s; s += enclen(reg->enc, s); @@ -5504,7 +5555,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, do { MATCH_AND_RETURN_CHECK(data_range); - prev = s; s += enclen(reg->enc, s); } while (s < range); @@ -5549,12 +5599,11 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, if (s > high) s = high; - while (s >= low) { - prev = onigenc_get_prev_char_head(reg->enc, str, s); + while (PTR_GE(s, low)) { MATCH_AND_RETURN_CHECK(orig_start); - s = prev; + s = onigenc_get_prev_char_head(reg->enc, str, s); } - } while (s >= range); + } while (PTR_GE(s, range)); goto mismatch; } else { /* check only. */ @@ -5566,10 +5615,9 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, } do { - prev = onigenc_get_prev_char_head(reg->enc, str, s); MATCH_AND_RETURN_CHECK(orig_start); - s = prev; - } while (s >= range); + s = onigenc_get_prev_char_head(reg->enc, str, s); + } while (PTR_GE(s, range)); } mismatch: @@ -5589,7 +5637,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, /* If result is mismatch and no FIND_NOT_EMPTY option, then the region is not set in match_at(). */ if (OPTON_FIND_NOT_EMPTY(reg->options) && region -#ifdef USE_POSIX_API_REGION_OPTION +#ifdef USE_POSIX_API && !OPTON_POSIX_REGION(option) #endif ) { @@ -5952,7 +6000,7 @@ extern int onig_init_for_match_at(regex_t* reg) { return match_at(reg, (const UChar* )NULL, (const UChar* )NULL, - (const UChar* )NULL, (const UChar* )NULL, (UChar* )NULL, + (const UChar* )NULL, (const UChar* )NULL, (MatchArg* )NULL); } #endif @@ -6139,8 +6187,8 @@ onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, i const UChar* str; StackType* stk_base; int i; - StackIndex* mem_start_stk; - StackIndex* mem_end_stk; + StkPtrType* mem_start_stk; + StkPtrType* mem_end_stk; i = mem_num; reg = a->regex; @@ -6150,7 +6198,7 @@ onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, i mem_end_stk = a->mem_end_stk; if (i > 0) { - if (a->mem_end_stk[i] != INVALID_STACK_INDEX) { + if (a->mem_end_stk[i].i != INVALID_STACK_INDEX) { *begin = (int )(STACK_MEM_START(reg, i) - str); *end = (int )(STACK_MEM_END(reg, i) - str); } diff --git a/src/regint.h b/src/regint.h index 04ebe0a..74a5c61 100644 --- a/src/regint.h +++ b/src/regint.h @@ -34,6 +34,7 @@ /* #define ONIG_DEBUG_COMPILE */ /* #define ONIG_DEBUG_SEARCH */ /* #define ONIG_DEBUG_MATCH */ +/* #define ONIG_DEBUG_MATCH_COUNTER */ /* #define ONIG_DONT_OPTIMIZE */ /* for byte-code statistical data. */ @@ -41,7 +42,7 @@ #if defined(ONIG_DEBUG_PARSE) || defined(ONIG_DEBUG_MATCH) || \ defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \ - defined(ONIG_DEBUG_STATISTICS) + defined(ONIG_DEBUG_MATCH_COUNTER) || defined(ONIG_DEBUG_STATISTICS) #ifndef ONIG_DEBUG #define ONIG_DEBUG #define DBGFP stderr @@ -70,23 +71,29 @@ #endif /* internal config */ +#define USE_CHECK_VALIDITY_OF_STRING_IN_TREE #define USE_OP_PUSH_OR_JUMP_EXACT #define USE_QUANT_PEEK_NEXT #define USE_ST_LIBRARY #define USE_TIMEOFDAY +#define USE_STRICT_POINTER_ADDRESS +#define USE_STRICT_POINTER_COMPARISON #define USE_WORD_BEGIN_END /* "\<", "\>" */ #define USE_CAPTURE_HISTORY #define USE_VARIABLE_META_CHARS -#define USE_POSIX_API_REGION_OPTION #define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE /* #define USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */ +/* enabled by configure --enable-posix-api=yes */ +/* #define USE_POSIX_API */ + +#define DEFAULT_PARSE_DEPTH_LIMIT 4096 #define INIT_MATCH_STACK_SIZE 160 #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ #define DEFAULT_RETRY_LIMIT_IN_MATCH 10000000 #define DEFAULT_RETRY_LIMIT_IN_SEARCH 0 /* unlimited */ -#define DEFAULT_PARSE_DEPTH_LIMIT 4096 +#define DEFAULT_SUBEXP_CALL_LIMIT_IN_SEARCH 0 /* unlimited */ #define DEFAULT_SUBEXP_CALL_MAX_NEST_LEVEL 20 @@ -181,6 +188,12 @@ #define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY #define NULL_UCHARP ((UChar* )0) +#ifdef USE_STRICT_POINTER_COMPARISON +#define PTR_GE(p,q) ((p) != NULL && (p) >= (q)) +#else +#define PTR_GE(p,q) (p) >= (q) +#endif + #ifndef ONIG_INT_MAX #define ONIG_INT_MAX INT_MAX #endif @@ -255,11 +268,22 @@ #ifdef _WIN32 -#if defined(_MSC_VER) && (_MSC_VER < 1300) +#ifdef _MSC_VER + +#if _MSC_VER < 1300 typedef int intptr_t; typedef unsigned int uintptr_t; #endif + +#if _MSC_VER < 1600 +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#endif + #endif +#endif /* _WIN32 */ #if SIZEOF_VOIDP == SIZEOF_LONG typedef unsigned long hash_data_type; @@ -378,6 +402,9 @@ typedef unsigned int MemStatusType; #define OPTON_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION) #define OPTON_CHECK_VALIDITY_OF_STRING(option) ((option) & \ ONIG_OPTION_CHECK_VALIDITY_OF_STRING) +#define OPTON_NOT_BEGIN_STRING(option) ((option) & ONIG_OPTION_NOT_BEGIN_STRING) +#define OPTON_NOT_END_STRING(option) ((option) & ONIG_OPTION_NOT_END_STRING) +#define OPTON_NOT_BEGIN_POSITION(option) ((option) & ONIG_OPTION_NOT_BEGIN_POSITION) #define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \ ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) @@ -562,10 +589,14 @@ enum OpCode { OP_BACKREF_N_IC, OP_BACKREF_MULTI, OP_BACKREF_MULTI_IC, +#ifdef USE_BACKREF_WITH_LEVEL OP_BACKREF_WITH_LEVEL, /* \k, \k */ OP_BACKREF_WITH_LEVEL_IC, /* \k, \k */ +#endif OP_BACKREF_CHECK, /* (?(n)), (?('name')) */ +#ifdef USE_BACKREF_WITH_LEVEL OP_BACKREF_CHECK_WITH_LEVEL, /* (?(n-level)), (?('name-level')) */ +#endif OP_MEM_START, OP_MEM_START_PUSH, /* push back-tracker to stack */ OP_MEM_END_PUSH, /* push back-tracker to stack */ @@ -891,6 +922,9 @@ typedef struct { } update_var; struct { AbsAddrType addr; +#ifdef ONIG_DEBUG_MATCH_COUNTER + MemNumType called_mem; +#endif } call; #ifdef USE_CALLOUT struct { diff --git a/src/regparse.c b/src/regparse.c index cc015a7..dd2824b 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -290,7 +290,7 @@ bbuf_clone(BBuf** rto, BBuf* from) CHECK_NULL_RETURN_MEMERR(to); r = BB_INIT(to, from->alloc); if (r != 0) { - xfree(to->p); + bbuf_free(to); *rto = 0; return r; } @@ -303,6 +303,8 @@ static int backref_rel_to_abs(int rel_no, ScanEnv* env) { if (rel_no > 0) { + if (rel_no > ONIG_INT_MAX - env->num_mem) + return ONIGERR_INVALID_BACKREF; return env->num_mem + rel_no; } else { @@ -437,6 +439,7 @@ strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end int capa) { UChar* r; + ptrdiff_t dest_delta = dest_end - dest; if (dest) r = (UChar* )xrealloc(dest, capa + 1); @@ -444,7 +447,7 @@ strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end r = (UChar* )xmalloc(capa + 1); CHECK_NULL_RETURN(r); - onig_strcpy(r + (dest_end - dest), src, src_end); + onig_strcpy(r + dest_delta, src, src_end); return r; } @@ -1294,7 +1297,9 @@ static int i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e, void* arg ARG_UNUSED) { - xfree(e->name); + if (IS_NOT_NULL(e)) { + xfree(e->name); + } /*xfree(key->s); */ /* is same as e->name */ xfree(key); xfree(e); @@ -2502,7 +2507,7 @@ node_new_call(UChar* name, UChar* name_end, int gnum, int by_number) CALL_(node)->by_number = by_number; CALL_(node)->name = name; CALL_(node)->name_end = name_end; - CALL_(node)->group_num = gnum; + CALL_(node)->called_gnum = gnum; CALL_(node)->entry_count = 1; return node; } @@ -3135,7 +3140,6 @@ make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* qua lower = QUANT_(quant)->lower; upper = QUANT_(quant)->upper; - onig_node_free(quant); r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env); if (r != 0) goto err; @@ -3202,9 +3206,9 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter, simple: r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant, body, possessive, env); + onig_node_free(quant); if (r != 0) { ns[4] = NULL_NODE; - onig_node_free(quant); onig_node_free(body); goto err; } @@ -3708,21 +3712,24 @@ get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_ while (! PEND) { PFETCH(c); - if (! IS_CODE_POINT_DIVIDE(c)) break; - } - if (IS_CODE_POINT_DIVIDE(c)) - return ONIGERR_INVALID_CODE_POINT_VALUE; - - if (c == '}') { - *src = p; - return 1; /* end of sequence */ - } - else if (c == '-' && in_cc == TRUE) { - *src = p; - return 2; /* range */ + if (! IS_CODE_POINT_DIVIDE(c)) { + if (c == '}') { + *src = p; + return 1; /* end of sequence */ + } + else if (c == '-' && in_cc == TRUE) { + *src = p; + return 2; /* range */ + } + PUNFETCH; + break; + } + else { + if (PEND) + return ONIGERR_INVALID_CODE_POINT_VALUE; + } } - PUNFETCH; r = scan_number_of_base(&p, end, 1, enc, rcode, base); if (r != 0) return r; @@ -3873,13 +3880,17 @@ not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf) to = data[i*2+1]; if (pre <= from - 1) { r = add_code_range_to_buf(pbuf, pre, from - 1); - if (r != 0) return r; + if (r != 0) { + bbuf_free(*pbuf); + return r; + } } if (to == ~((OnigCodePoint )0)) break; pre = to + 1; } if (to < ~((OnigCodePoint )0)) { r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0)); + if (r != 0) bbuf_free(*pbuf); } return r; } @@ -4564,7 +4575,7 @@ fetch_interval(UChar** src, UChar* end, PToken* tok, ScanEnv* env) /* \M-, \C-, \c, or \... */ static int -fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) +fetch_escaped_value_raw(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) { int v; OnigCodePoint c; @@ -4583,7 +4594,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) if (PEND) return ONIGERR_END_PATTERN_AT_META; PFETCH_S(c); if (c == MC_ESC(env->syntax)) { - v = fetch_escaped_value(&p, end, env, &c); + v = fetch_escaped_value_raw(&p, end, env, &c); if (v < 0) return v; } c = ((c & 0xff) | 0x80); @@ -4612,7 +4623,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) } else { if (c == MC_ESC(env->syntax)) { - v = fetch_escaped_value(&p, end, env, &c); + v = fetch_escaped_value_raw(&p, end, env, &c); if (v < 0) return v; } c &= 0x9f; @@ -4634,6 +4645,21 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) return 0; } +static int +fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) +{ + int r; + int len; + + r = fetch_escaped_value_raw(src, end, env, val); + if (r != 0) return r; + + len = ONIGENC_CODE_TO_MBCLEN(env->enc, *val); + if (len < 0) return len; + + return 0; +} + static int fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env); static OnigCodePoint @@ -5192,7 +5218,7 @@ fetch_token_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env, int state) else { int curr_state; - curr_state = (state == CS_RANGE) ? CPS_EMPTY : CPS_START; + curr_state = (state == CS_RANGE) ? CPS_EMPTY : CPS_START; r = check_code_point_sequence_cc(p, end, tok->base_num, enc, curr_state); if (r < 0) return r; @@ -6372,7 +6398,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) } static int -parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) +prs_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) { #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20 #define POSIX_BRACKET_NAME_MIN_LEN 4 @@ -6481,7 +6507,7 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) } static int -parse_char_property(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) +prs_char_property(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) { int r, ctype; CClassNode* cc; @@ -6617,7 +6643,7 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, } static int -parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) +prs_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) { int r, neg, len, fetched, and_start; OnigCodePoint in_code, curr_code; @@ -6715,6 +6741,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) p = psave; for (i = 1; i < len; i++) { r = fetch_token_cc(tok, &p, end, env, CS_COMPLETE); + if (r < 0) goto err; } fetched = 0; } @@ -6759,7 +6786,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) break; case TK_CC_POSIX_BRACKET_OPEN: - r = parse_posix_bracket(cc, &p, end, env); + r = prs_posix_bracket(cc, &p, end, env); if (r < 0) goto err; if (r == 1) { /* is not POSIX bracket */ CC_ESC_WARN(env, (UChar* )"["); @@ -6869,7 +6896,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) } state = CS_COMPLETE; - r = parse_cc(&anode, tok, &p, end, env); + r = prs_cc(&anode, tok, &p, end, env); if (r != 0) { onig_node_free(anode); goto cc_open_err; @@ -6967,14 +6994,14 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env) return r; } -static int parse_alts(Node** top, PToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env, int group_head); +static int prs_alts(Node** top, PToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env, int group_head); #ifdef USE_CALLOUT /* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */ static int -parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env) +prs_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env) { int r; int i; @@ -7078,18 +7105,18 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv contents = onigenc_strdup(enc, code_start, code_end); CHECK_NULL_RETURN_MEMERR(contents); - r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env); - if (r != 0) { - xfree(contents); - return r; - } - e = onig_reg_callout_list_at(env->reg, num); if (IS_NULL(e)) { xfree(contents); return ONIGERR_MEMORY; } + r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env); + if (r != 0) { + xfree(contents); + return r; + } + e->of = ONIG_CALLOUT_OF_CONTENTS; e->in = in; e->name_id = ONIG_NON_NAME_ID; @@ -7101,7 +7128,7 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv } static long -parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl) +prs_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl) { long v; long d; @@ -7137,10 +7164,27 @@ parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* return ONIG_NORMAL; } +static void +clear_callout_args(int n, unsigned int types[], OnigValue vals[]) +{ + int i; + + for (i = 0; i < n; i++) { + switch (types[i]) { + case ONIG_TYPE_STRING: + if (IS_NOT_NULL(vals[i].s.start)) + xfree(vals[i].s.start); + break; + default: + break; + } + } +} + static int -parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, - int max_arg_num, unsigned int types[], OnigValue vals[], - ScanEnv* env) +prs_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, + int max_arg_num, unsigned int types[], OnigValue vals[], + ScanEnv* env) { #define MAX_CALLOUT_ARG_BYTE_LENGTH 128 @@ -7168,7 +7212,10 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, bufend = buf; s = e = p; while (1) { - if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + if (PEND) { + r = ONIGERR_INVALID_CALLOUT_PATTERN; + goto err_clear; + } e = p; PFETCH_S(c); @@ -7196,8 +7243,10 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, add_char: if (skip_mode == FALSE) { clen = p - e; - if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH) - return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */ + if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH) { + r = ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */ + goto err_clear; + } xmemcpy(bufend, e, clen); bufend += clen; @@ -7208,15 +7257,17 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, } if (cn != 0) { - if (max_arg_num >= 0 && n >= max_arg_num) - return ONIGERR_INVALID_CALLOUT_ARG; + if (max_arg_num >= 0 && n >= max_arg_num) { + r = ONIGERR_INVALID_CALLOUT_ARG; + goto err_clear; + } if (skip_mode == FALSE) { if ((types[n] & ONIG_TYPE_LONG) != 0) { int fixed = 0; if (cn > 0) { long rl; - r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl); + r = prs_long(enc, buf, bufend, 1, LONG_MAX, &rl); if (r == ONIG_NORMAL) { vals[n].l = rl; fixed = 1; @@ -7226,8 +7277,10 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, if (fixed == 0) { types[n] = (types[n] & ~ONIG_TYPE_LONG); - if (types[n] == ONIG_TYPE_VOID) - return ONIGERR_INVALID_CALLOUT_ARG; + if (types[n] == ONIG_TYPE_VOID) { + r = ONIGERR_INVALID_CALLOUT_ARG; + goto err_clear; + } } } @@ -7236,22 +7289,29 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, break; case ONIG_TYPE_CHAR: - if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG; + if (cn != 1) { + r = ONIGERR_INVALID_CALLOUT_ARG; + goto err_clear; + } vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend); break; case ONIG_TYPE_STRING: { UChar* rs = onigenc_strdup(enc, buf, bufend); - CHECK_NULL_RETURN_MEMERR(rs); + if (IS_NULL(rs)) { + r = ONIGERR_MEMORY; goto err_clear; + } vals[n].s.start = rs; vals[n].s.end = rs + (e - s); } break; case ONIG_TYPE_TAG: - if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e)) - return ONIGERR_INVALID_CALLOUT_TAG_NAME; + if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e)) { + r = ONIGERR_INVALID_CALLOUT_TAG_NAME; + goto err_clear; + } vals[n].s.start = s; vals[n].s.end = e; @@ -7259,7 +7319,8 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, case ONIG_TYPE_VOID: case ONIG_TYPE_POINTER: - return ONIGERR_PARSER_BUG; + r = ONIGERR_PARSER_BUG; + goto err_clear; break; } } @@ -7270,15 +7331,23 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, if (c == cterm) break; } - if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN; + if (c != cterm) { + r = ONIGERR_INVALID_CALLOUT_PATTERN; + goto err_clear; + } *src = p; return n; + + err_clear: + if (skip_mode == FALSE) + clear_callout_args(n, types, vals); + return r; } /* (*name[TAG]) (*name[TAG]{a,b,..}) */ static int -parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env) +prs_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env) { int r; int i; @@ -7343,7 +7412,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en /* read for single check only */ save = p; - arg_num = parse_callout_args(TRUE, '}', &p, end, -1, NULL, NULL, env); + arg_num = prs_callout_args(TRUE, '}', &p, end, -1, NULL, NULL, env); if (arg_num < 0) return arg_num; is_not_single = PPEEK_IS(cterm) ? 0 : 1; @@ -7357,10 +7426,13 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en types[i] = get_callout_arg_type_by_name_id(name_id, i); } - arg_num = parse_callout_args(FALSE, '}', &p, end, max_arg_num, types, vals, env); + arg_num = prs_callout_args(FALSE, '}', &p, end, max_arg_num, types, vals, env); if (arg_num < 0) return arg_num; - if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + if (PEND) { + r = ONIGERR_END_PATTERN_IN_GROUP; + goto err_clear; + } PFETCH_S(c); } else { @@ -7379,32 +7451,40 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en in = onig_get_callout_in_by_name_id(name_id); opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id); - if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num)) - return ONIGERR_INVALID_CALLOUT_ARG; + if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num)) { + r = ONIGERR_INVALID_CALLOUT_ARG; + goto err_clear; + } - if (c != cterm) - return ONIGERR_INVALID_CALLOUT_PATTERN; + if (c != cterm) { + r = ONIGERR_INVALID_CALLOUT_PATTERN; + goto err_clear; + } r = reg_callout_list_entry(env, &num); - if (r != 0) return r; + if (r != 0) goto err_clear; ext = onig_get_regex_ext(env->reg); - CHECK_NULL_RETURN_MEMERR(ext); + if (IS_NULL(ext)) { + r = ONIGERR_MEMORY; goto err_clear; + } if (IS_NULL(ext->pattern)) { r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end); - if (r != ONIG_NORMAL) return r; + if (r != ONIG_NORMAL) goto err_clear; } if (tag_start != tag_end) { r = callout_tag_entry(env, env->reg, tag_start, tag_end, num); - if (r != ONIG_NORMAL) return r; + if (r != ONIG_NORMAL) goto err_clear; } - r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env); - if (r != ONIG_NORMAL) return r; - e = onig_reg_callout_list_at(env->reg, num); - CHECK_NULL_RETURN_MEMERR(e); + if (IS_NULL(e)) { + r = ONIGERR_MEMORY; goto err_clear; + } + + r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env); + if (r != ONIG_NORMAL) goto err_clear; e->of = ONIG_CALLOUT_OF_NAME; e->in = in; @@ -7425,12 +7505,16 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en *np = node; *src = p; return 0; + + err_clear: + clear_callout_args(arg_num, types, vals); + return r; } #endif static int -parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env) +prs_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, + ScanEnv* env) { int r, num; Node *target; @@ -7457,7 +7541,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, group: r = fetch_token(tok, &p, end, env); if (r < 0) return r; - r = parse_alts(np, tok, term, &p, end, env, FALSE); + r = prs_alts(np, tok, term, &p, end, env, FALSE); if (r < 0) return r; *src = p; return 1; /* group */ @@ -7554,7 +7638,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, r = fetch_token(tok, &p, end, env); if (r < 0) return r; - r = parse_alts(&absent, tok, term, &p, end, env, TRUE); + r = prs_alts(&absent, tok, term, &p, end, env, TRUE); if (r < 0) { onig_node_free(absent); return r; @@ -7600,7 +7684,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) return ONIGERR_UNDEFINED_GROUP_OPTION; - r = parse_callout_of_contents(np, ')', &p, end, env); + r = prs_callout_of_contents(np, ')', &p, end, env); if (r != 0) return r; goto end; @@ -7620,10 +7704,12 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, if (IS_CODE_DIGIT_ASCII(enc, c) || c == '-' || c == '+' || c == '<' || c == '\'') { - UChar* name_end; - int back_num; +#ifdef USE_BACKREF_WITH_LEVEL int exist_level; int level; +#endif + UChar* name_end; + int back_num; enum REF_NUM num_type; int is_enclosed; @@ -7631,8 +7717,8 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, if (! is_enclosed) PUNFETCH; prev = p; - exist_level = 0; #ifdef USE_BACKREF_WITH_LEVEL + exist_level = 0; name_end = NULL_UCHARP; /* no need. escape gcc warning. */ r = fetch_name_with_level( (OnigCodePoint )(is_enclosed != 0 ? c : '('), @@ -7709,7 +7795,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */ condition_is_checker = 0; PFETCH(c); - r = parse_callout_of_contents(&condition, ')', &p, end, env); + r = prs_callout_of_contents(&condition, ')', &p, end, env); if (r != 0) return r; goto end_condition; } @@ -7719,7 +7805,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, else if (c == '*' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) { condition_is_checker = 0; - r = parse_callout_of_name(&condition, ')', &p, end, env); + r = prs_callout_of_name(&condition, ')', &p, end, env); if (r != 0) return r; goto end_condition; } @@ -7730,7 +7816,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, condition_is_checker = 0; r = fetch_token(tok, &p, end, env); if (r < 0) return r; - r = parse_alts(&condition, tok, term, &p, end, env, FALSE); + r = prs_alts(&condition, tok, term, &p, end, env, FALSE); if (r < 0) { onig_node_free(condition); return r; @@ -7773,7 +7859,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, onig_node_free(condition); return r; } - r = parse_alts(&target, tok, term, &p, end, env, TRUE); + r = prs_alts(&target, tok, term, &p, end, env, TRUE); if (r < 0) { onig_node_free(condition); onig_node_free(target); @@ -7949,7 +8035,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, env->options = option; r = fetch_token(tok, &p, end, env); if (r < 0) return r; - r = parse_alts(&target, tok, term, &p, end, env, FALSE); + r = prs_alts(&target, tok, term, &p, end, env, FALSE); env->options = prev; if (r < 0) { onig_node_free(target); @@ -7976,7 +8062,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, else if (c == '*' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) { PINC; - r = parse_callout_of_name(np, ')', &p, end, env); + r = prs_callout_of_name(np, ')', &p, end, env); if (r != 0) return r; goto end; @@ -7996,7 +8082,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, CHECK_NULL_RETURN_MEMERR(*np); r = fetch_token(tok, &p, end, env); if (r < 0) return r; - r = parse_alts(&target, tok, term, &p, end, env, FALSE); + r = prs_alts(&target, tok, term, &p, end, env, FALSE); if (r < 0) { onig_node_free(target); return r; @@ -8006,7 +8092,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end, if (NODE_TYPE(*np) == NODE_BAG) { if (BAG_(*np)->type == BAG_MEMORY) { - /* Don't move this to previous of parse_alts() */ + /* Don't move this to previous of prs_alts() */ r = scan_env_set_mem_node(env, BAG_(*np)->m.regnum, *np); if (r != 0) return r; } @@ -8285,8 +8371,8 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg) } static int -parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env, int group_head) +prs_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, + ScanEnv* env, int group_head) { int r, len, group; Node* qn; @@ -8311,7 +8397,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, break; case TK_SUBEXP_OPEN: - r = parse_bag(np, tok, TK_SUBEXP_CLOSE, src, end, env); + r = prs_bag(np, tok, TK_SUBEXP_CLOSE, src, end, env); if (r < 0) return r; if (r == 1) { /* group */ if (group_head == 0) @@ -8341,7 +8427,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, env->options = BAG_(*np)->o.options; r = fetch_token(tok, src, end, env); if (r < 0) return r; - r = parse_alts(&target, tok, term, src, end, env, FALSE); + r = prs_alts(&target, tok, term, src, end, env, FALSE); env->options = prev; if (r < 0) { onig_node_free(target); @@ -8419,8 +8505,9 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, case TK_CODE_POINT: { UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf); + len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.code); if (len < 0) return len; + len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf); #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG *np = node_new_str_crude(buf, buf + len, env->options); #else @@ -8465,7 +8552,12 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, *np = node_new_cclass(); CHECK_NULL_RETURN_MEMERR(*np); cc = CCLASS_(*np); - add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env); + r = add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env); + if (r != 0) { + onig_node_free(*np); + *np = NULL_NODE; + return r; + } if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); } break; @@ -8478,7 +8570,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, break; case TK_CHAR_PROPERTY: - r = parse_char_property(np, tok, src, end, env); + r = prs_char_property(np, tok, src, end, env); if (r != 0) return r; break; @@ -8486,7 +8578,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, { CClassNode* cc; - r = parse_cc(np, tok, src, end, env); + r = prs_cc(np, tok, src, end, env); if (r != 0) return r; cc = CCLASS_(*np); @@ -8685,8 +8777,8 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, } static int -parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env, int group_head) +prs_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end, + ScanEnv* env, int group_head) { int r; Node *node, **headp; @@ -8694,7 +8786,7 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end, *top = NULL; INC_PARSE_DEPTH(env->parse_depth); - r = parse_exp(&node, tok, term, src, end, env, group_head); + r = prs_exp(&node, tok, term, src, end, env, group_head); if (r < 0) { onig_node_free(node); return r; @@ -8712,7 +8804,7 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end, headp = &(NODE_CDR(*top)); while (r != TK_EOT && r != term && r != TK_ALT) { - r = parse_exp(&node, tok, term, src, end, env, FALSE); + r = prs_exp(&node, tok, term, src, end, env, FALSE); if (r < 0) { onig_node_free(node); return r; @@ -8736,8 +8828,8 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end, /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ static int -parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env, int group_head) +prs_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end, + ScanEnv* env, int group_head) { int r; Node *node, **headp; @@ -8747,7 +8839,7 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end, INC_PARSE_DEPTH(env->parse_depth); save_options = env->options; - r = parse_branch(&node, tok, term, src, end, env, group_head); + r = prs_branch(&node, tok, term, src, end, env, group_head); if (r < 0) { onig_node_free(node); return r; @@ -8767,7 +8859,7 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end, while (r == TK_ALT) { r = fetch_token(tok, src, end, env); if (r < 0) return r; - r = parse_branch(&node, tok, term, src, end, env, FALSE); + r = prs_branch(&node, tok, term, src, end, env, FALSE); if (r < 0) { onig_node_free(node); return r; @@ -8800,7 +8892,7 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end, } static int -parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) +prs_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) { int r; PToken tok; @@ -8808,7 +8900,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) ptoken_init(&tok); r = fetch_token(&tok, src, end, env); if (r < 0) return r; - r = parse_alts(top, &tok, TK_EOT, src, end, env, FALSE); + r = prs_alts(top, &tok, TK_EOT, src, end, env, FALSE); if (r < 0) return r; return 0; @@ -8846,6 +8938,15 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end, RegexExt* ext; #endif + reg->string_pool = 0; + reg->string_pool_end = 0; + reg->num_mem = 0; + reg->num_repeat = 0; + reg->num_empty_check = 0; + reg->repeat_range_alloc = 0; + reg->repeat_range = (RepeatRange* )NULL; + reg->empty_status_mem = 0; + names_clear(reg); scan_env_clear(env); @@ -8863,7 +8964,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end, return ONIGERR_INVALID_WIDE_CHAR_VALUE; p = (UChar* )pattern; - r = parse_regexp(root, &p, (UChar* )end, env); + r = prs_regexp(root, &p, (UChar* )end, env); if (r != 0) return r; #ifdef USE_CALL diff --git a/src/regparse.h b/src/regparse.h index 979e982..c60a42d 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -163,7 +163,7 @@ typedef struct { struct _Node* body; /* to BagNode : BAG_MEMORY */ int by_number; - int group_num; + int called_gnum; UChar* name; UChar* name_end; int entry_count; @@ -339,6 +339,7 @@ typedef struct { #define NODE_ST_TEXT_SEGMENT_WORD (1<<23) #define NODE_ST_ABSENT_WITH_SIDE_EFFECTS (1<<24) /* stopper or clear */ #define NODE_ST_FIXED_CLEN_MIN_SURE (1<<25) +#define NODE_ST_REFERENCED (1<<26) #define NODE_STATUS(node) (((Node* )node)->u.base.status) @@ -374,6 +375,7 @@ typedef struct { #define NODE_IS_TEXT_SEGMENT_WORD(node) ((NODE_STATUS(node) & NODE_ST_TEXT_SEGMENT_WORD) != 0) #define NODE_IS_ABSENT_WITH_SIDE_EFFECTS(node) ((NODE_STATUS(node) & NODE_ST_ABSENT_WITH_SIDE_EFFECTS) != 0) #define NODE_IS_FIXED_CLEN_MIN_SURE(node) ((NODE_STATUS(node) & NODE_ST_FIXED_CLEN_MIN_SURE) != 0) +#define NODE_IS_REFERENCED(node) ((NODE_STATUS(node) & NODE_ST_REFERENCED) != 0) #define NODE_PARENT(node) ((node)->u.base.parent) #define NODE_BODY(node) ((node)->u.base.body) diff --git a/src/regposerr.c b/src/regposerr.c index 12d95a9..e5b7899 100644 --- a/src/regposerr.c +++ b/src/regposerr.c @@ -37,6 +37,18 @@ #include "config.h" #include "onigposix.h" +#undef regex_t +#undef regmatch_t +#undef regoff_t +#undef regcomp +#undef regexec +#undef regfree +#undef regerror +#undef reg_set_encoding +#undef reg_name_to_group_numbers +#undef reg_foreach_name +#undef reg_number_of_names + #ifndef ONIG_NO_STANDARD_C_HEADERS #include #include @@ -92,10 +104,9 @@ static char* ESTRING[] = { }; - extern size_t -regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf, - size_t size) +onig_posix_regerror(int posix_ecode, const onig_posix_regex_t* reg ARG_UNUSED, + char* buf, size_t size) { char* s; char tbuf[35]; @@ -121,3 +132,14 @@ regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf, } return len; } + +#ifdef USE_BINARY_COMPATIBLE_POSIX_API + +extern size_t +regerror(int posix_ecode, const onig_posix_regex_t* reg ARG_UNUSED, + char* buf, size_t size) +{ + return onig_posix_regerror(posix_ecode, reg, buf, size); +} + +#endif diff --git a/src/regposix.c b/src/regposix.c index 4e523a4..497ba02 100644 --- a/src/regposix.c +++ b/src/regposix.c @@ -33,6 +33,18 @@ #include "onigposix.h" +#undef regex_t +#undef regmatch_t +#undef regoff_t +#undef regcomp +#undef regexec +#undef regfree +#undef regerror +#undef reg_set_encoding +#undef reg_name_to_group_numbers +#undef reg_foreach_name +#undef reg_number_of_names + #define ONIG_C(reg) ((onig_regex_t* )((reg)->onig)) #define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig)) @@ -64,6 +76,7 @@ onig2posix_error_code(int code) { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL }, { ONIGERR_RETRY_LIMIT_IN_MATCH_OVER, REG_EONIG_INTERNAL }, { ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER, REG_EONIG_INTERNAL }, + { ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER, REG_EONIG_INTERNAL }, { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL }, { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL }, { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL }, @@ -144,7 +157,7 @@ onig2posix_error_code(int code) } extern int -regcomp(regex_t* reg, const char* pattern, int posix_options) +onig_posix_regcomp(onig_posix_regex_t* reg, const char* pattern, int posix_options) { int r, len; OnigSyntaxType* syntax = OnigDefaultSyntax; @@ -178,12 +191,12 @@ regcomp(regex_t* reg, const char* pattern, int posix_options) } extern int -regexec(regex_t* reg, const char* str, size_t nmatch, - regmatch_t pmatch[], int posix_options) +onig_posix_regexec(onig_posix_regex_t* reg, const char* str, size_t nmatch, + onig_posix_regmatch_t pmatch[], int posix_options) { int r, i, len; UChar* end; - regmatch_t* pm; + onig_posix_regmatch_t* pm; OnigOptionType options; options = ONIG_OPTION_POSIX_REGION; @@ -191,11 +204,11 @@ regexec(regex_t* reg, const char* str, size_t nmatch, if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL; if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) { - pm = (regmatch_t* )NULL; + pm = (onig_posix_regmatch_t* )NULL; nmatch = 0; } else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) { - pm = (regmatch_t* )xmalloc(sizeof(regmatch_t) + pm = (onig_posix_regmatch_t* )xmalloc(sizeof(onig_posix_regmatch_t) * (ONIG_C(reg)->num_mem + 1)); if (pm == NULL) return REG_ESPACE; @@ -212,7 +225,7 @@ regexec(regex_t* reg, const char* str, size_t nmatch, if (r >= 0) { r = 0; /* Match */ if (pm != pmatch && pm != NULL) { - xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch); + xmemcpy(pmatch, pm, sizeof(onig_posix_regmatch_t) * nmatch); } } else if (r == ONIG_MISMATCH) { @@ -236,7 +249,7 @@ regexec(regex_t* reg, const char* str, size_t nmatch, } extern void -regfree(regex_t* reg) +onig_posix_regfree(onig_posix_regex_t* reg) { onig_free(ONIG_C(reg)); reg->onig = (void* )0; @@ -244,7 +257,7 @@ regfree(regex_t* reg) extern void -reg_set_encoding(int mb_code) +onig_posix_reg_set_encoding(int mb_code) { OnigEncoding enc; @@ -279,15 +292,15 @@ reg_set_encoding(int mb_code) } extern int -reg_name_to_group_numbers(regex_t* reg, +onig_posix_reg_name_to_group_numbers(onig_posix_regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums) { return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums); } typedef struct { - int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*); - regex_t* reg; + int (*func)(const unsigned char*, const unsigned char*,int,int*,onig_posix_regex_t*,void*); + onig_posix_regex_t* reg; void* arg; } i_wrap; @@ -301,8 +314,8 @@ i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs, } extern int -reg_foreach_name(regex_t* reg, - int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), +onig_posix_reg_foreach_name(onig_posix_regex_t* reg, + int (*func)(const unsigned char*, const unsigned char*,int,int*,onig_posix_regex_t*,void*), void* arg) { i_wrap warg; @@ -315,7 +328,58 @@ reg_foreach_name(regex_t* reg, } extern int -reg_number_of_names(regex_t* reg) +onig_posix_reg_number_of_names(onig_posix_regex_t* reg) { return onig_number_of_names(ONIG_C(reg)); } + + +#ifdef USE_BINARY_COMPATIBLE_POSIX_API + +extern int +regcomp(onig_posix_regex_t* reg, const char* pattern, int posix_options) +{ + return onig_posix_regcomp(reg, pattern, posix_options); +} + +extern int +regexec(onig_posix_regex_t* reg, const char* str, size_t nmatch, + onig_posix_regmatch_t pmatch[], int posix_options) +{ + return onig_posix_regexec(reg, str, nmatch, pmatch, posix_options); +} + +extern void +regfree(onig_posix_regex_t* reg) +{ + onig_posix_regfree(reg); +} + +extern void +reg_set_encoding(int mb_code) +{ + onig_posix_reg_set_encoding(mb_code); +} + +extern int +reg_name_to_group_numbers(onig_posix_regex_t* reg, + const unsigned char* name, const unsigned char* name_end, int** nums) +{ + return onig_posix_reg_name_to_group_numbers(reg, name, name_end, nums); +} + +extern int +reg_foreach_name(onig_posix_regex_t* reg, + int (*func)(const unsigned char*, const unsigned char*,int,int*,onig_posix_regex_t*,void*), + void* arg) +{ + return onig_posix_reg_foreach_name(reg, func, arg); +} + +extern int +reg_number_of_names(onig_posix_regex_t* reg) +{ + return onig_posix_reg_number_of_names(reg); +} + +#endif /* USE_BINARY_COMPATIBLE_POSIX_API */ diff --git a/src/sjis.c b/src/sjis.c index 1fd92d9..10afd9d 100644 --- a/src/sjis.c +++ b/src/sjis.c @@ -2,7 +2,7 @@ sjis.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -113,13 +113,15 @@ static int code_to_mbclen(OnigCodePoint code) { if (code < 256) { - return EncLen_SJIS[(int )code] == 1; + if (EncLen_SJIS[(int )code] == 1) + return 1; } - else if (code <= 0xffff) { - return 2; + else if (code < 0x10000) { + if (EncLen_SJIS[(int )(code >> 8) & 0xff] == 2) + return 2; } - else - return ONIGERR_INVALID_CODE_POINT_VALUE; + + return ONIGERR_INVALID_CODE_POINT_VALUE; } static OnigCodePoint diff --git a/src/st.c b/src/st.c index 522f205..8ee610b 100644 --- a/src/st.c +++ b/src/st.c @@ -151,6 +151,7 @@ st_init_table_with_size(type, size) #endif size = new_size(size); /* round up to prime number */ + if (size <= 0) return 0; tbl = alloc(st_table); if (tbl == 0) return 0; @@ -318,10 +319,13 @@ rehash(table) register st_table *table; { register st_table_entry *ptr, *next, **new_bins; - int i, old_num_bins = table->num_bins, new_num_bins; + int i, new_num_bins, old_num_bins; unsigned int hash_val; - new_num_bins = new_size(old_num_bins+1); + old_num_bins = table->num_bins; + new_num_bins = new_size(old_num_bins + 1); + if (new_num_bins <= 0) return ; + new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*)); if (new_bins == 0) { return ; diff --git a/src/unicode.c b/src/unicode.c index 080da74..6703d4b 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -387,15 +387,15 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, for (i = 0; i < ncs[0]; i++) { for (j = 0; j < ncs[1]; j++) { for (k = 0; k < ncs[2]; k++) { + if (cs[0][i] == orig_codes[0] && cs[1][j] == orig_codes[1] && + cs[2][k] == orig_codes[2]) + continue; + items[n].byte_len = lens[2]; items[n].code_len = 3; items[n].code[0] = cs[0][i]; items[n].code[1] = cs[1][j]; items[n].code[2] = cs[2][k]; - if (items[n].code[0] == orig_codes[0] && - items[n].code[1] == orig_codes[1] && - items[n].code[2] == orig_codes[2]) - continue; n++; } } @@ -431,13 +431,12 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, for (i = 0; i < ncs[0]; i++) { for (j = 0; j < ncs[1]; j++) { + if (cs[0][i] == orig_codes[0] && cs[1][j] == orig_codes[1]) + continue; items[n].byte_len = lens[1]; items[n].code_len = 2; items[n].code[0] = cs[0][i]; items[n].code[1] = cs[1][j]; - if (items[n].code[0] == orig_codes[0] && - items[n].code[1] == orig_codes[1]) - continue; n++; } } diff --git a/src/utf16_be.c b/src/utf16_be.c index d237b93..5014e18 100644 --- a/src/utf16_be.c +++ b/src/utf16_be.c @@ -2,7 +2,7 @@ utf16_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/src/utf16_le.c b/src/utf16_le.c index f14d263..35ceb3c 100644 --- a/src/utf16_le.c +++ b/src/utf16_le.c @@ -2,7 +2,7 @@ utf16_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/src/utf32_be.c b/src/utf32_be.c index bdd3db7..31bd98b 100644 --- a/src/utf32_be.c +++ b/src/utf32_be.c @@ -2,7 +2,7 @@ utf32_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -67,7 +67,10 @@ utf32be_is_mbc_newline(const UChar* p, const UChar* end) static OnigCodePoint utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) { - return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]); + OnigCodePoint code; + + code = (OnigCodePoint )((((p[0] & 0x7f) * 256 + p[1]) * 256 + p[2]) * 256 + p[3]); + return code; } static int diff --git a/src/utf32_le.c b/src/utf32_le.c index 473ab74..f50cab7 100644 --- a/src/utf32_le.c +++ b/src/utf32_le.c @@ -2,7 +2,7 @@ utf32_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -67,7 +67,10 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end) static OnigCodePoint utf32le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) { - return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]); + OnigCodePoint code; + + code = (OnigCodePoint )((((p[3] & 0x7f) * 256 + p[2]) * 256 + p[1]) * 256 + p[0]); + return code; } static int diff --git a/test/test_back.c b/test/test_back.c index 9a337b9..6bf5159 100644 --- a/test/test_back.c +++ b/test/test_back.c @@ -18,6 +18,10 @@ static int nsucc = 0; static int nfail = 0; static int nerror = 0; +#ifdef __TRUSTINSOFT_ANALYZER__ +static int nall = 0; +#endif + static FILE* err_file; static OnigRegion* region; @@ -25,6 +29,10 @@ static OnigRegion* region; static void xx(char* pattern, char* str, int from, int to, int mem, int not, int error_no, int line_no) { +#ifdef __TRUSTINSOFT_ANALYZER__ + if (nall++ % TIS_TEST_CHOOSE_MAX != TIS_TEST_CHOOSE_CURRENT) return; +#endif + int r; regex_t* reg; OnigErrorInfo einfo; @@ -1419,7 +1427,7 @@ extern int main(int argc, char* argv[]) x2("\\p{Common}", "\xe3\x8b\xbf", 0, 3); /* U+32FF */ x2("\\p{In_Enclosed_CJK_Letters_and_Months}", "\xe3\x8b\xbf", 0, 3); /* U+32FF */ - e("\\x{7fffffff}", "", ONIGERR_TOO_BIG_WIDE_CHAR_VALUE); + e("\\x{7fffffff}", "", ONIGERR_INVALID_CODE_POINT_VALUE); e("[\\x{7fffffff}]", "", ONIGERR_INVALID_CODE_POINT_VALUE); e("\\u040", "@", ONIGERR_INVALID_CODE_POINT_VALUE); e("(?\\g)", "zzzz", ONIGERR_NEVER_ENDING_RECURSION); diff --git a/test/test_regset.c b/test/test_regset.c index c8442a1..7476ec4 100644 --- a/test/test_regset.c +++ b/test/test_regset.c @@ -50,6 +50,7 @@ make_regset(int line_no, int n, char* pat[], OnigRegSet** rset, int error_no) nfail++; } } + onig_regset_free(set); return r; } @@ -96,6 +97,7 @@ time_test(int repeat, int n, char* ps[], char* s, char* end, double* rt_set, dou ONIG_REGSET_POSITION_LEAD, ONIG_OPTION_NONE, &match_pos); if (r < 0) { fprintf(stderr, "FAIL onig_regset_search(POSITION_LEAD): %d\n", r); + onig_regset_free(set); return r; } } @@ -109,6 +111,7 @@ time_test(int repeat, int n, char* ps[], char* s, char* end, double* rt_set, dou ONIG_REGSET_REGEX_LEAD, ONIG_OPTION_NONE, &match_pos); if (r < 0) { fprintf(stderr, "FAIL onig_regset_search(REGEX_LEAD): %d\n", r); + onig_regset_free(set); return r; } } @@ -158,7 +161,10 @@ time_compare(int n, char* ps[], char* s, char* end) for (i = 0; i < n; i++) { fisher_yates_shuffle(n, ps, cps); r = time_test(repeat, n, cps, s, end, &t_set, &t_reg); - if (r != 0) return ; + if (r != 0) { + free(cps); + return ; + } total_set += t_set; total_reg += t_reg; } @@ -231,6 +237,7 @@ xx(int line_no, int n, char* ps[], char* s, int from, int to, int mem, int not, if (region == 0) { fprintf(stderr, "ERROR: %d: can't get region.\n", line_no); nerror++; + onig_regset_free(set); return ; } @@ -285,7 +292,7 @@ n(int line_no, int n, char* ps[], char* s) static int get_all_content_of_file(char* path, char** rs, char** rend) { - size_t len; + ssize_t len; size_t n; char* line; FILE* fp; diff --git a/test/test_syntax.c b/test/test_syntax.c index df80e59..06fef45 100644 --- a/test/test_syntax.c +++ b/test/test_syntax.c @@ -17,6 +17,10 @@ static int nsucc = 0; static int nfail = 0; static int nerror = 0; +#ifdef __TRUSTINSOFT_ANALYZER__ +static int nall = 0; +#endif + static FILE* err_file; static OnigRegion* region; @@ -26,6 +30,10 @@ static OnigSyntaxType* Syntax; static void xx(char* pattern, char* str, int from, int to, int mem, int not, int error_no) { +#ifdef __TRUSTINSOFT_ANALYZER__ + if (nall++ % TIS_TEST_CHOOSE_MAX != TIS_TEST_CHOOSE_CURRENT) return; +#endif + int r; regex_t* reg; OnigErrorInfo einfo; diff --git a/test/test_utf8.c b/test/test_utf8.c index 1bbc071..7a4322d 100644 --- a/test/test_utf8.c +++ b/test/test_utf8.c @@ -18,6 +18,10 @@ static int nsucc = 0; static int nfail = 0; static int nerror = 0; +#ifdef __TRUSTINSOFT_ANALYZER__ +static int nall = 0; +#endif + static FILE* err_file; static OnigRegion* region; @@ -25,6 +29,10 @@ static OnigRegion* region; static void xx(char* pattern, char* str, int from, int to, int mem, int not, int error_no) { +#ifdef __TRUSTINSOFT_ANALYZER__ + if (nall++ % TIS_TEST_CHOOSE_MAX != TIS_TEST_CHOOSE_CURRENT) return; +#endif + int r; regex_t* reg; OnigErrorInfo einfo; @@ -545,6 +553,17 @@ extern int main(int argc, char* argv[]) x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2); x3("(?<=(abc))d", "abcd", 0, 3, 1); x2("(?<=(?i:abc))d", "ABCd", 3, 4); + x2("(?<=^|b)c", " cbc", 3, 4); + x2("(?<=a|^|b)c", " cbc", 3, 4); + x2("(?<=a|(^)|b)c", " cbc", 3, 4); + x2("(?<=a|(^)|b)c", "cbc", 0, 1); + n("(Q)|(?<=a|(?(1))|b)c", "czc"); + x2("(Q)(?<=a|(?(1))|b)c", "cQc", 1, 3); + x2("(?<=a|(?~END)|b)c", "ENDc", 3, 4); + n("(?", "aa", 0, 2); x2("(?)|(?<=(\\k<1>))", ""); + x2("(a|\\k<2>)|(?<=(\\k<1>))", "a", 0, 1); + x2("(a|\\k<2>)|(?<=b(\\k<1>))", "ba", 1, 2); x2("((?(a)\\g<1>|b))", "aab", 0, 3); x2("((?(a)\\g<1>))", "aab", 0, 2); @@ -1459,6 +1488,10 @@ extern int main(int argc, char* argv[]) e("(?i)000000000000000000000\xf0", "", ONIGERR_INVALID_CODE_POINT_VALUE); /* https://bugs.php.net/bug.php?id=77382 */ n("0000\\\xf5", "0"); /* https://bugs.php.net/bug.php?id=77385 */ n("(?i)FFF00000000000000000\xfd", ""); /* https://bugs.php.net/bug.php?id=77394 */ + n("(?x)\n (?:*]|&&|\\|\\||\\?|\\*\\/|^await|[^\\._$[:alnum:]]await|^return|[^\\._$[:alnum:]]return|^default|[^\\._$[:alnum:]]default|^yield|[^\\._$[:alnum:]]yield|^)\\s*\n (?!<\\s*[_$[:alpha:]][_$[:alnum:]]*((\\s+extends\\s+[^=>])|,)) # look ahead is not type parameter of arrow\n (?=(<)\\s*(?:([_$[:alpha:]][-_$[:alnum:].]*)(?))", " while (i < len && f(array[i]))"); /* Issue #192 */ + + x2("aaaaaaaaaaaaaaaaaaaaaaaあb", "aaaaaaaaaaaaaaaaaaaaaaaあb", 0, 27); /* Issue #221 */ + e("x{55380}{77590}", "", ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); e("(xyz){40000}{99999}(?vv)", "", ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); e("f{90000,90000}{80000,80000}", "", ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); @@ -1467,7 +1500,7 @@ extern int main(int argc, char* argv[]) x2("\\p{Common}", "\xe3\x8b\xbf", 0, 3); /* U+32FF */ x2("\\p{In_Enclosed_CJK_Letters_and_Months}", "\xe3\x8b\xbf", 0, 3); /* U+32FF */ - e("\\x{7fffffff}", "", ONIGERR_TOO_BIG_WIDE_CHAR_VALUE); + e("\\x{7fffffff}", "", ONIGERR_INVALID_CODE_POINT_VALUE); e("[\\x{7fffffff}]", "", ONIGERR_INVALID_CODE_POINT_VALUE); e("\\u040", "@", ONIGERR_INVALID_CODE_POINT_VALUE); e("(?\\g)", "zzzz", ONIGERR_NEVER_ENDING_RECURSION); diff --git a/test/testc.c b/test/testc.c index fbede67..b3a34ea 100644 --- a/test/testc.c +++ b/test/testc.c @@ -14,11 +14,19 @@ static int nsucc = 0; static int nfail = 0; static int nerror = 0; +#ifdef __TRUSTINSOFT_ANALYZER__ +static int nall = 0; +#endif + static FILE* err_file; static OnigRegion* region; static void xx(char* pattern, char* str, int from, int to, int mem, int not) { +#ifdef __TRUSTINSOFT_ANALYZER__ + if (nall++ % TIS_TEST_CHOOSE_MAX != TIS_TEST_CHOOSE_CURRENT) return; +#endif + int r; regex_t* reg; OnigErrorInfo einfo; diff --git a/test/testu.c b/test/testu.c index 24397ab..c1971e7 100644 --- a/test/testu.c +++ b/test/testu.c @@ -14,6 +14,10 @@ static int nsucc = 0; static int nfail = 0; static int nerror = 0; +#ifdef __TRUSTINSOFT_ANALYZER__ +static int nall = 0; +#endif + static FILE* err_file; #ifndef POSIX_TEST @@ -36,7 +40,7 @@ static void uconv(char* from, char* to, int len) if (c == 0) { c = (unsigned char )from[i+1]; if (c < 0x20 || c >= 0x7f || c == 0x5c || c == 0x22) { - sprintf(q, "\\%03o", c); + sprintf(q, "\\%03o", (unsigned int )c); q += 4; } else { @@ -45,10 +49,10 @@ static void uconv(char* from, char* to, int len) } } else { - sprintf(q, "\\%03o", c); + sprintf(q, "\\%03o", (unsigned int )c); q += 4; c = (unsigned char )from[i+1]; - sprintf(q, "\\%03o", c); + sprintf(q, "\\%03o", (unsigned int )c); q += 4; } } @@ -58,6 +62,10 @@ static void uconv(char* from, char* to, int len) static void xx(char* pattern, char* str, int from, int to, int mem, int not) { +#ifdef __TRUSTINSOFT_ANALYZER__ + if (nall++ % TIS_TEST_CHOOSE_MAX != TIS_TEST_CHOOSE_CURRENT) return; +#endif + int r; char cpat[4000], cstr[4000]; diff --git a/tis-ci/config.h b/tis-ci/config.h new file mode 100644 index 0000000..9400605 --- /dev/null +++ b/tis-ci/config.h @@ -0,0 +1,109 @@ +/* src/config.h. Generated from config.h.in by configure. */ +/* src/config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP + systems. This function is required for `alloca.c' support on those systems. + */ +/* #undef CRAY_STACKSEG_END */ + +/* Define to 1 if using `alloca.c'. */ +/* #undef C_ALLOCA */ + +/* Define to 1 if you have `alloca', as a function or macro. */ +#define HAVE_ALLOCA 1 + +/* Define to 1 if you have and it should be used (not on Ultrix). + */ +#define HAVE_ALLOCA_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIMES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "onig" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "onig" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "onig 6.9.4" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "onig" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "6.9.4" + +/* The size of `int', as computed by sizeof. */ +#define SIZEOF_INT 4 + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 8 + +/* The size of `long long', as computed by sizeof. */ +#define SIZEOF_LONG_LONG 8 + +/* The size of `void*', as computed by sizeof. */ +#define SIZEOF_VOIDP 8 + +/* If using the C implementation of alloca, define if you know the + direction of stack growth for your system; otherwise it will be + automatically deduced at runtime. + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown */ +/* #undef STACK_DIRECTION */ + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Define if enable CR+NL as line terminator */ +/* #undef USE_CRNL_AS_LINE_TERMINATOR */ + +/* Version number of package */ +#define VERSION "6.9.4" + +/* Define to `unsigned int' if does not define. */ +/* #undef size_t */ diff --git a/tis-ci/stub.c b/tis-ci/stub.c new file mode 100644 index 0000000..11f1570 --- /dev/null +++ b/tis-ci/stub.c @@ -0,0 +1,3 @@ +void srand(unsigned int seed) { + return; +} diff --git a/tis-ci/test_back.config b/tis-ci/test_back.config new file mode 100644 index 0000000..a0ab67f --- /dev/null +++ b/tis-ci/test_back.config @@ -0,0 +1,26 @@ +{ + "compilation_cmd": "-I ../tis-ci -I ../src -D alloca=__builtin_alloca", + "files": [ + "../test/test_back.c", + "../src/unicode.c", + "../src/regcomp.c", + "../src/regenc.c", + "../src/utf16_be.c", + "../src/regparse.c", + "../src/st.c", + "../src/regexec.c", + "../src/unicode_unfold_key.c", + "../src/unicode_fold3_key.c", + "../src/unicode_fold2_key.c", + "../src/unicode_fold1_key.c", + "../src/utf8.c", + "../src/regerror.c", + "../src/regversion.c", + "../src/ascii.c" + ], + "machdep": "gcc_x86_64", + "main": "main", + "name": "test_back.c FULL", + "address-alignment": 65536, /* hexadecimal 0x10000 */ + "val-warn-undefined-pointer-comparison": "none" +} diff --git a/tis-ci/test_regset.config b/tis-ci/test_regset.config new file mode 100644 index 0000000..2589b5a --- /dev/null +++ b/tis-ci/test_regset.config @@ -0,0 +1,20 @@ +{ + "compilation_cmd": "-I ../tis-ci -I ../src -D alloca=__builtin_alloca", + "files": [ + "../test/test_regset.c", + "../tis-ci/stub.c", + "../src/regcomp.c", + "../src/regenc.c", + "../src/utf8.c", + "../src/regexec.c", + "../src/ascii.c", + "../src/regparse.c", + "../src/st.c", + "../src/regversion.c" + ], + "machdep": "gcc_x86_64", + "main": "main", + "name": "test_regset.c FULL", + "address-alignment": 65536, /* hexadecimal 0x10000 */ + "val-warn-undefined-pointer-comparison": "none" +} diff --git a/tis-ci/test_syntax.config b/tis-ci/test_syntax.config new file mode 100644 index 0000000..92b040f --- /dev/null +++ b/tis-ci/test_syntax.config @@ -0,0 +1,26 @@ +{ + "compilation_cmd": "-I ../tis-ci -I ../src -D alloca=__builtin_alloca", + "files": [ + "../test/test_syntax.c", + "../src/unicode.c", + "../src/regcomp.c", + "../src/regenc.c", + "../src/utf16_be.c", + "../src/regparse.c", + "../src/st.c", + "../src/regexec.c", + "../src/unicode_unfold_key.c", + "../src/unicode_fold3_key.c", + "../src/unicode_fold2_key.c", + "../src/unicode_fold1_key.c", + "../src/utf8.c", + "../src/regsyntax.c", + "../src/ascii.c", + "../src/regversion.c" + ], + "machdep": "gcc_x86_64", + "main": "main", + "name": "test_syntax.c FULL", + "address-alignment": 65536, /* hexadecimal 0x10000 */ + "val-warn-undefined-pointer-comparison": "none" +} diff --git a/tis-ci/test_utf8.config b/tis-ci/test_utf8.config new file mode 100644 index 0000000..5ae259e --- /dev/null +++ b/tis-ci/test_utf8.config @@ -0,0 +1,25 @@ +{ + "compilation_cmd": "-I ../tis-ci -I ../src -D alloca=__builtin_alloca", + "files": [ + "../test/test_utf8.c", + "../tis-ci/stub.c", + "../src/regcomp.c", + "../src/regenc.c", + "../src/utf8.c", + "../src/regexec.c", + "../src/ascii.c", + "../src/regparse.c", + "../src/st.c", + "../src/unicode.c", + "../src/unicode_unfold_key.c", + "../src/unicode_fold3_key.c", + "../src/unicode_fold2_key.c", + "../src/unicode_fold1_key.c", + "../src/regversion.c" + ], + "machdep": "gcc_x86_64", + "main": "main", + "name": "test_utf8.c FULL", + "address-alignment": 65536, /* hexadecimal 0x10000 */ + "val-warn-undefined-pointer-comparison": "none" +} diff --git a/tis-ci/testc.config b/tis-ci/testc.config new file mode 100644 index 0000000..57a1e6f --- /dev/null +++ b/tis-ci/testc.config @@ -0,0 +1,26 @@ +{ + "compilation_cmd": "-I ../tis-ci -I ../src -D alloca=__builtin_alloca", + "files": [ + "../test/testc.c", + "../src/unicode.c", + "../src/regcomp.c", + "../src/regenc.c", + "../src/utf16_be.c", + "../src/regparse.c", + "../src/st.c", + "../src/regexec.c", + "../src/unicode_unfold_key.c", + "../src/unicode_fold3_key.c", + "../src/unicode_fold2_key.c", + "../src/unicode_fold1_key.c", + "../src/euc_jp.c", + "../src/euc_jp_prop.c", + "../src/ascii.c", + "../src/regversion.c" + ], + "machdep": "gcc_x86_64", + "main": "main", + "name": "testc.c FULL", + "address-alignment": 65536, /* hexadecimal 0x10000 */ + "val-warn-undefined-pointer-comparison": "none" +} diff --git a/tis-ci/testu.config b/tis-ci/testu.config new file mode 100644 index 0000000..c4f88a7 --- /dev/null +++ b/tis-ci/testu.config @@ -0,0 +1,24 @@ +{ + "compilation_cmd": "-I ../tis-ci -I ../src -D alloca=__builtin_alloca", + "files": [ + "../test/testu.c", + "../src/unicode.c", + "../src/regcomp.c", + "../src/regenc.c", + "../src/utf16_be.c", + "../src/regparse.c", + "../src/st.c", + "../src/regexec.c", + "../src/unicode_unfold_key.c", + "../src/unicode_fold3_key.c", + "../src/unicode_fold2_key.c", + "../src/unicode_fold1_key.c", + "../src/utf8.c", + "../src/regversion.c" + ], + "machdep": "gcc_x86_64", + "main": "main", + "name": "testu.c FULL", + "address-alignment": 65536, /* hexadecimal 0x10000 */ + "val-warn-undefined-pointer-comparison": "none" +} diff --git a/tis.config b/tis.config new file mode 100644 index 0000000..aba448e --- /dev/null +++ b/tis.config @@ -0,0 +1,1336 @@ +[ + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=1", + "name": "test_utf8.c (1/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=2", + "name": "test_utf8.c (2/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=3", + "name": "test_utf8.c (3/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=4", + "name": "test_utf8.c (4/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=5", + "name": "test_utf8.c (5/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=6", + "name": "test_utf8.c (6/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=7", + "name": "test_utf8.c (7/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=8", + "name": "test_utf8.c (8/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=9", + "name": "test_utf8.c (9/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=10", + "name": "test_utf8.c (10/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=11", + "name": "test_utf8.c (11/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=12", + "name": "test_utf8.c (12/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=13", + "name": "test_utf8.c (13/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=14", + "name": "test_utf8.c (14/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=15", + "name": "test_utf8.c (15/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=16", + "name": "test_utf8.c (16/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=17", + "name": "test_utf8.c (17/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=18", + "name": "test_utf8.c (18/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=19", + "name": "test_utf8.c (19/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=20", + "name": "test_utf8.c (20/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=21", + "name": "test_utf8.c (21/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=22", + "name": "test_utf8.c (22/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=23", + "name": "test_utf8.c (23/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=24", + "name": "test_utf8.c (24/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=25", + "name": "test_utf8.c (25/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=26", + "name": "test_utf8.c (26/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=27", + "name": "test_utf8.c (27/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=28", + "name": "test_utf8.c (28/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=29", + "name": "test_utf8.c (29/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=30", + "name": "test_utf8.c (30/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=31", + "name": "test_utf8.c (31/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=32", + "name": "test_utf8.c (32/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=33", + "name": "test_utf8.c (33/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=34", + "name": "test_utf8.c (34/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=35", + "name": "test_utf8.c (35/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=36", + "name": "test_utf8.c (36/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=37", + "name": "test_utf8.c (37/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=38", + "name": "test_utf8.c (38/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=39", + "name": "test_utf8.c (39/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=40", + "name": "test_utf8.c (40/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=41", + "name": "test_utf8.c (41/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=42", + "name": "test_utf8.c (42/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=43", + "name": "test_utf8.c (43/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=44", + "name": "test_utf8.c (44/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=45", + "name": "test_utf8.c (45/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=46", + "name": "test_utf8.c (46/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=47", + "name": "test_utf8.c (47/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=48", + "name": "test_utf8.c (48/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=49", + "name": "test_utf8.c (49/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=50", + "name": "test_utf8.c (50/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=51", + "name": "test_utf8.c (51/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=52", + "name": "test_utf8.c (52/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=53", + "name": "test_utf8.c (53/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=54", + "name": "test_utf8.c (54/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=55", + "name": "test_utf8.c (55/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=56", + "name": "test_utf8.c (56/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=57", + "name": "test_utf8.c (57/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=58", + "name": "test_utf8.c (58/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=59", + "name": "test_utf8.c (59/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=60", + "name": "test_utf8.c (60/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=61", + "name": "test_utf8.c (61/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=62", + "name": "test_utf8.c (62/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=63", + "name": "test_utf8.c (63/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=64", + "name": "test_utf8.c (64/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=65", + "name": "test_utf8.c (65/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=66", + "name": "test_utf8.c (66/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=67", + "name": "test_utf8.c (67/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=68", + "name": "test_utf8.c (68/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=69", + "name": "test_utf8.c (69/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=70", + "name": "test_utf8.c (70/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=71", + "name": "test_utf8.c (71/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=72", + "name": "test_utf8.c (72/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=73", + "name": "test_utf8.c (73/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=74", + "name": "test_utf8.c (74/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=75", + "name": "test_utf8.c (75/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=76", + "name": "test_utf8.c (76/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=77", + "name": "test_utf8.c (77/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=78", + "name": "test_utf8.c (78/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=79", + "name": "test_utf8.c (79/80)" + }, + { + "include": "tis-ci/test_utf8.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=0", + "name": "test_utf8.c (80/80)" + }, + { + "include": "tis-ci/test_regset.config", + "name": "test_regset.c FULL" + }, + { + "include": "tis-ci/test_syntax.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=6 -DTIS_TEST_CHOOSE_CURRENT=1", + "name": "test_syntax.c (1/6)" + }, + { + "include": "tis-ci/test_syntax.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=6 -DTIS_TEST_CHOOSE_CURRENT=2", + "name": "test_syntax.c (2/6)" + }, + { + "include": "tis-ci/test_syntax.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=6 -DTIS_TEST_CHOOSE_CURRENT=3", + "name": "test_syntax.c (3/6)" + }, + { + "include": "tis-ci/test_syntax.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=6 -DTIS_TEST_CHOOSE_CURRENT=4", + "name": "test_syntax.c (4/6)" + }, + { + "include": "tis-ci/test_syntax.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=6 -DTIS_TEST_CHOOSE_CURRENT=5", + "name": "test_syntax.c (5/6)" + }, + { + "include": "tis-ci/test_syntax.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=6 -DTIS_TEST_CHOOSE_CURRENT=0", + "name": "test_syntax.c (6/6)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=1", + "name": "testu.c (1/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=2", + "name": "testu.c (2/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=3", + "name": "testu.c (3/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=4", + "name": "testu.c (4/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=5", + "name": "testu.c (5/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=6", + "name": "testu.c (6/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=7", + "name": "testu.c (7/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=8", + "name": "testu.c (8/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=9", + "name": "testu.c (9/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=10", + "name": "testu.c (10/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=11", + "name": "testu.c (11/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=12", + "name": "testu.c (12/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=13", + "name": "testu.c (13/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=14", + "name": "testu.c (14/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=15", + "name": "testu.c (15/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=16", + "name": "testu.c (16/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=17", + "name": "testu.c (17/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=18", + "name": "testu.c (18/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=19", + "name": "testu.c (19/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=20", + "name": "testu.c (20/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=21", + "name": "testu.c (21/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=22", + "name": "testu.c (22/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=23", + "name": "testu.c (23/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=24", + "name": "testu.c (24/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=25", + "name": "testu.c (25/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=26", + "name": "testu.c (26/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=27", + "name": "testu.c (27/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=28", + "name": "testu.c (28/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=29", + "name": "testu.c (29/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=30", + "name": "testu.c (30/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=31", + "name": "testu.c (31/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=32", + "name": "testu.c (32/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=33", + "name": "testu.c (33/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=34", + "name": "testu.c (34/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=35", + "name": "testu.c (35/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=36", + "name": "testu.c (36/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=37", + "name": "testu.c (37/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=38", + "name": "testu.c (38/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=39", + "name": "testu.c (39/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=40", + "name": "testu.c (40/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=41", + "name": "testu.c (41/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=42", + "name": "testu.c (42/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=43", + "name": "testu.c (43/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=44", + "name": "testu.c (44/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=45", + "name": "testu.c (45/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=46", + "name": "testu.c (46/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=47", + "name": "testu.c (47/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=48", + "name": "testu.c (48/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=49", + "name": "testu.c (49/50)" + }, + { + "include": "tis-ci/testu.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=0", + "name": "testu.c (50/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=1", + "name": "testc.c (1/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=2", + "name": "testc.c (2/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=3", + "name": "testc.c (3/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=4", + "name": "testc.c (4/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=5", + "name": "testc.c (5/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=6", + "name": "testc.c (6/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=7", + "name": "testc.c (7/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=8", + "name": "testc.c (8/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=9", + "name": "testc.c (9/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=10", + "name": "testc.c (10/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=11", + "name": "testc.c (11/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=12", + "name": "testc.c (12/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=13", + "name": "testc.c (13/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=14", + "name": "testc.c (14/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=15", + "name": "testc.c (15/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=16", + "name": "testc.c (16/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=17", + "name": "testc.c (17/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=18", + "name": "testc.c (18/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=19", + "name": "testc.c (19/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=20", + "name": "testc.c (20/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=21", + "name": "testc.c (21/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=22", + "name": "testc.c (22/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=23", + "name": "testc.c (23/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=24", + "name": "testc.c (24/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=25", + "name": "testc.c (25/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=26", + "name": "testc.c (26/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=27", + "name": "testc.c (27/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=28", + "name": "testc.c (28/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=29", + "name": "testc.c (29/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=30", + "name": "testc.c (30/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=31", + "name": "testc.c (31/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=32", + "name": "testc.c (32/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=33", + "name": "testc.c (33/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=34", + "name": "testc.c (34/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=35", + "name": "testc.c (35/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=36", + "name": "testc.c (36/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=37", + "name": "testc.c (37/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=38", + "name": "testc.c (38/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=39", + "name": "testc.c (39/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=40", + "name": "testc.c (40/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=41", + "name": "testc.c (41/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=42", + "name": "testc.c (42/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=43", + "name": "testc.c (43/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=44", + "name": "testc.c (44/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=45", + "name": "testc.c (45/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=46", + "name": "testc.c (46/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=47", + "name": "testc.c (47/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=48", + "name": "testc.c (48/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=49", + "name": "testc.c (49/50)" + }, + { + "include": "tis-ci/testc.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=50 -DTIS_TEST_CHOOSE_CURRENT=0", + "name": "testc.c (50/50)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=1", + "name": "test_back.c (1/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=2", + "name": "test_back.c (2/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=3", + "name": "test_back.c (3/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=4", + "name": "test_back.c (4/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=5", + "name": "test_back.c (5/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=6", + "name": "test_back.c (6/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=7", + "name": "test_back.c (7/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=8", + "name": "test_back.c (8/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=9", + "name": "test_back.c (9/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=10", + "name": "test_back.c (10/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=11", + "name": "test_back.c (11/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=12", + "name": "test_back.c (12/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=13", + "name": "test_back.c (13/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=14", + "name": "test_back.c (14/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=15", + "name": "test_back.c (15/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=16", + "name": "test_back.c (16/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=17", + "name": "test_back.c (17/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=18", + "name": "test_back.c (18/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=19", + "name": "test_back.c (19/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=20", + "name": "test_back.c (20/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=21", + "name": "test_back.c (21/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=22", + "name": "test_back.c (22/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=23", + "name": "test_back.c (23/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=24", + "name": "test_back.c (24/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=25", + "name": "test_back.c (25/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=26", + "name": "test_back.c (26/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=27", + "name": "test_back.c (27/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=28", + "name": "test_back.c (28/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=29", + "name": "test_back.c (29/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=30", + "name": "test_back.c (30/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=31", + "name": "test_back.c (31/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=32", + "name": "test_back.c (32/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=33", + "name": "test_back.c (33/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=34", + "name": "test_back.c (34/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=35", + "name": "test_back.c (35/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=36", + "name": "test_back.c (36/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=37", + "name": "test_back.c (37/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=38", + "name": "test_back.c (38/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=39", + "name": "test_back.c (39/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=40", + "name": "test_back.c (40/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=41", + "name": "test_back.c (41/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=42", + "name": "test_back.c (42/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=43", + "name": "test_back.c (43/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=44", + "name": "test_back.c (44/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=45", + "name": "test_back.c (45/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=46", + "name": "test_back.c (46/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=47", + "name": "test_back.c (47/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=48", + "name": "test_back.c (48/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=49", + "name": "test_back.c (49/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=50", + "name": "test_back.c (50/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=51", + "name": "test_back.c (51/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=52", + "name": "test_back.c (52/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=53", + "name": "test_back.c (53/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=54", + "name": "test_back.c (54/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=55", + "name": "test_back.c (55/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=56", + "name": "test_back.c (56/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=57", + "name": "test_back.c (57/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=58", + "name": "test_back.c (58/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=59", + "name": "test_back.c (59/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=60", + "name": "test_back.c (60/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=61", + "name": "test_back.c (61/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=62", + "name": "test_back.c (62/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=63", + "name": "test_back.c (63/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=64", + "name": "test_back.c (64/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=65", + "name": "test_back.c (65/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=66", + "name": "test_back.c (66/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=67", + "name": "test_back.c (67/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=68", + "name": "test_back.c (68/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=69", + "name": "test_back.c (69/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=70", + "name": "test_back.c (70/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=71", + "name": "test_back.c (71/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=72", + "name": "test_back.c (72/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=73", + "name": "test_back.c (73/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=74", + "name": "test_back.c (74/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=75", + "name": "test_back.c (75/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=76", + "name": "test_back.c (76/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=77", + "name": "test_back.c (77/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=78", + "name": "test_back.c (78/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=79", + "name": "test_back.c (79/80)" + }, + { + "include": "tis-ci/test_back.config", + "compilation_cmd": "-DTIS_TEST_CHOOSE_MAX=80 -DTIS_TEST_CHOOSE_CURRENT=0", + "name": "test_back.c (80/80)" + } +] -- cgit v1.2.3 From 827be2f4780a52b55431eaa4bd69c6a04ee5a050 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Sun, 8 Nov 2020 11:27:50 +0100 Subject: Refresh symbols file --- debian/changelog | 8 ++++++++ debian/symbols | 19 +++++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/debian/changelog b/debian/changelog index a221978..cea228e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,11 @@ +libonig (6.9.6-1) UNRELEASED; urgency=medium + + * New upstream release. + - Refresh symbols file. + - Fix CVE-2020-26159. + + -- Jörg Frings-Fürst Sun, 08 Nov 2020 10:59:25 +0100 + libonig (6.9.5-2) unstable; urgency=medium * debian/rules diff --git a/debian/symbols b/debian/symbols index 1ea68aa..e764aea 100644 --- a/debian/symbols +++ b/debian/symbols @@ -69,6 +69,7 @@ libonig.so.5 libonig5 #MINVER# onig_copy_encoding@Base 6.8.1 onig_copy_syntax@Base 6.8.1 onig_copyright@Base 6.8.1 + onig_detect_can_be_slow_pattern@Base 6.9.6 onig_end@Base 6.8.1 onig_error_code_to_format@Base 6.8.1 onig_error_code_to_str@Base 6.8.1 @@ -122,6 +123,7 @@ libonig.so.5 libonig5 #MINVER# onig_get_start_by_callout_args@Base 6.8.1 onig_get_string_by_callout_args@Base 6.8.1 onig_get_string_end_by_callout_args@Base 6.8.1 + onig_get_subexp_call_limit_in_search@Base 6.9.6 onig_get_subexp_call_max_nest_level@Base 6.9.5 onig_get_syntax@Base 6.8.1 onig_get_syntax_behavior@Base 6.8.1 @@ -167,6 +169,14 @@ libonig.so.5 libonig5 #MINVER# onig_number_of_names@Base 6.8.1 onig_parse_tree@Base 6.8.1 onig_positive_int_multiply@Base 6.9.1 + onig_posix_reg_foreach_name@Base 6.9.6 + onig_posix_reg_name_to_group_numbers@Base 6.9.6 + onig_posix_reg_number_of_names@Base 6.9.6 + onig_posix_reg_set_encoding@Base 6.9.6 + onig_posix_regcomp@Base 6.9.6 + onig_posix_regerror@Base 6.9.6 + onig_posix_regexec@Base 6.9.6 + onig_posix_regfree@Base 6.9.6 onig_reduce_nested_quantifier@Base 6.8.1 onig_reg_callout_list_at@Base 6.8.1 onig_reg_init@Base 6.8.1 @@ -212,6 +222,7 @@ libonig.so.5 libonig5 #MINVER# onig_set_retry_limit_in_match_of_match_param@Base 6.8.1 onig_set_retry_limit_in_search@Base 6.9.5 onig_set_retry_limit_in_search_of_match_param@Base 6.9.5 + onig_set_subexp_call_limit_in_search@Base 6.9.6 onig_set_subexp_call_max_nest_level@Base 6.9.5 onig_set_syntax_behavior@Base 6.8.1 onig_set_syntax_op2@Base 6.8.1 @@ -311,11 +322,3 @@ libonig.so.5 libonig5 #MINVER# re_mbcinit@Base 6.8.1 re_search@Base 6.8.1 re_set_casetable@Base 6.8.1 - reg_foreach_name@Base 6.9.5 - reg_name_to_group_numbers@Base 6.9.5 - reg_number_of_names@Base 6.9.5 - reg_set_encoding@Base 6.9.5 - regcomp@Base 6.9.5 - regerror@Base 6.9.5 - regexec@Base 6.9.5 - regfree@Base 6.9.5 -- cgit v1.2.3 From 256edac683da4bf12d04897ea013ffb0c5893cec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Sun, 8 Nov 2020 12:49:04 +0100 Subject: Add remove for libonig.[la|a], d/changelog: Change distribution to unstable, Change date and time --- debian/changelog | 8 ++++++-- debian/control | 2 +- debian/rules | 2 ++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/debian/changelog b/debian/changelog index cea228e..ec66e9c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,10 +1,14 @@ -libonig (6.9.6-1) UNRELEASED; urgency=medium +libonig (6.9.6-1) unstable; urgency=medium * New upstream release. - Refresh symbols file. - Fix CVE-2020-26159. + * Migrate to debhelper-compat 13: + - Bump debhelper-compat version in debian/control to = 13. + * debian/rules: + - Add remove for libonig.[la|a] to fix warning about missing installs. - -- Jörg Frings-Fürst Sun, 08 Nov 2020 10:59:25 +0100 + -- Jörg Frings-Fürst Sun, 08 Nov 2020 12:47:46 +0100 libonig (6.9.5-2) unstable; urgency=medium diff --git a/debian/control b/debian/control index fc0c05f..41c5bf7 100644 --- a/debian/control +++ b/debian/control @@ -3,7 +3,7 @@ Section: libs Priority: extra Maintainer: Jörg Frings-Fürst Build-Depends: - debhelper-compat (= 12) + debhelper-compat (= 13) Standards-Version: 4.5.0 Rules-Requires-Root: no Homepage: https://github.com/kkos/oniguruma diff --git a/debian/rules b/debian/rules index 0f34908..8434f6c 100755 --- a/debian/rules +++ b/debian/rules @@ -21,6 +21,8 @@ override_dh_auto_configure: override_dh_install: $(RM) debian/tmp/usr/bin/onig-config + $(RM) debian/tmp/usr/lib/${DEB_HOST_MULTIARCH}/libonig.a + $(RM) debian/tmp/usr/lib/${DEB_HOST_MULTIARCH}/libonig.la dh_install -X.la -X.a override_dh_makeshlibs: -- cgit v1.2.3 From dbedac4783f09292abb187d0c59d4032594038b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Sun, 8 Nov 2020 13:08:46 +0100 Subject: Add Bug Closes --- debian/changelog | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index ec66e9c..b60e189 100644 --- a/debian/changelog +++ b/debian/changelog @@ -2,13 +2,13 @@ libonig (6.9.6-1) unstable; urgency=medium * New upstream release. - Refresh symbols file. - - Fix CVE-2020-26159. + - Fix CVE-2020-26159 (Closes: #972113). * Migrate to debhelper-compat 13: - Bump debhelper-compat version in debian/control to = 13. * debian/rules: - Add remove for libonig.[la|a] to fix warning about missing installs. - -- Jörg Frings-Fürst Sun, 08 Nov 2020 12:47:46 +0100 + -- Jörg Frings-Fürst Sun, 08 Nov 2020 13:08:04 +0100 libonig (6.9.5-2) unstable; urgency=medium -- cgit v1.2.3