diff options
author | Jテカrg Frings-Fテシrst <debian@jff.email> | 2018-12-21 20:06:28 +0100 |
---|---|---|
committer | Jテカrg Frings-Fテシrst <debian@jff.email> | 2018-12-21 20:06:28 +0100 |
commit | f883fa5bd37b6420f5dc25027a68289c64028063 (patch) | |
tree | 80452d3b1840e553402538830e903ed9fecdb3a8 | |
parent | 66dfd6613394a8903701840bbc9d67de537e597e (diff) | |
parent | f1aff0cc17934fa45d9520dae6986562e033cb8f (diff) |
Merge branch 'release/debian/6.9.1-1'debian/6.9.1-1
80 files changed, 3033 insertions, 2950 deletions
@@ -27,3 +27,33 @@ Makefile.in /build /onig-*.tar.gz m4/*.m4 + +# src/ +/src/CaseFolding.txt +/src/unicode_fold?_key.gperf +/src/unicode_unfold_key.gperf +/src/UNICODE_PROPERTIES +/src/*.txt +/src/mktable + +# test/ +/test/test_utf8 +/test/testc +/test/testcu +/test/testp + +# sample/ +/sample/crnl +/sample/encode +/sample/listcap +/sample/names +/sample/posix +/sample/simple +/sample/sql +/sample/syntax +/sample/user_property +/sample/callout +/sample/echo +/sample/count +/sample/bug_fix +/sample/log* diff --git a/CMakeLists.txt b/CMakeLists.txt index 6aae87b..06068bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.1) -project(oniguruma VERSION 6.9.0) +project(oniguruma VERSION 6.9.1) set(PACKAGE onig) set(PACKAGE_VERSION ${PROJECT_VERSION}) @@ -33,13 +33,7 @@ include(TestBigEndian) check_function_exists(alloca HAVE_ALLOCA) check_include_files(alloca.h HAVE_ALLOCA_H) -set(HAVE_PROTOTYPES 1) -check_include_files(stdarg.h HAVE_STDARG_PROTOTYPES) check_include_files(stdint.h HAVE_STDINT_H) -check_include_files(stdlib.h HAVE_STDLIB_H) -check_include_files(strings.h HAVE_STRINGS_H) -check_include_files(string.h HAVE_STRING_H) -check_include_files(limits.h HAVE_LIMITS_H) check_include_files(sys/times.h HAVE_SYS_TIMES_H) check_include_files(sys/time.h HAVE_SYS_TIME_H) check_include_files(sys/types.h HAVE_SYS_TYPES_H) @@ -48,7 +42,6 @@ check_include_files(inttypes.h HAVE_INTTYPES_H) check_type_size(int SIZEOF_INT) check_type_size(long SIZEOF_LONG) check_type_size(short SIZEOF_SHORT) -check_include_files("stdlib.h;stdarg.h;string.h;float.h" STDC_HEADERS) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/config.h.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/config.h) @@ -1,5 +1,25 @@ History +2018/12/11: Version 6.9.1 + +2018/10/08: use ENC_FLAG_SKIP_OFFSET_XXX values +2018/10/06: UTF-8 supports code range from 0x0000 to 0x10FFFF + (https://tools.ietf.org/html/rfc3629) +2018/10/05: speed improvement +2018/10/03: use OPTIMIZE_STR_CASE_FOLD_FAST +2018/10/01: convert CRLF line endings to LF +2018/09/27: set SIZEOF_SIZE_T for windows platforms +2018/09/22: use Sunday quick search algorithm instead of Boyer-Moor-Horspool +2018/09/20: introduce threaded code into match_at() +2018/09/17: remove HAVE_STRINGS_H +2018/09/16: remove HAVE_PROTOTYPES and HAVE_STDARG_PROTOTYPES +2018/09/14: add a command line option '-gc' for make_unicode_property_data.py. +2018/09/08: remove AC_HEADER_STDC +2018/09/06: remove AC_OUTPUT macro call +2018/09/06: remove AC_FUNC_MEMCMP, AC_HEADER_TIME, AC_C_CONST, HAVE__SETJMP and + HAVE_STRING_H +2018/09/05: remove HAVE_LIMITS_H, HAVE_FLOAT_H and HAVE_STDLIB_H + 2018/09/03: Version 6.9.0 2018/08/24: add Unicode Emoji properties @@ -394,12 +414,12 @@ History 2006/11/07: [dist] remove test.rb, testconv.rb and testconvu.rb. 2006/11/07: [bug] get_case_fold_codes_by_str() should handle 'Ss' and 'sS' combination for ess-tsett. -2006/11/07: [impl] apply_all_case_fold() doesn't need to return all +2006/11/07: [impl] apply_all_case_fold() doesn't need to return all case character combination for multi-character folding. (ONIGENC_CASE_FOLD_MULTI_CHAR) 2006/11/07: [bug] (thanks Byte) add { 0xa3, 0xb3 } to CaseFoldMap[] for KOI8-R. -2006/11/06: [spec] change ONIG_OPTION_FIND_LONGEST to search all of +2006/11/06: [spec] change ONIG_OPTION_FIND_LONGEST to search all of the string range. add USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE. 2006/11/02: [impl] re-implement expand_case_fold_string() for @@ -667,7 +687,7 @@ History 2006/05/11: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. 2006/05/11: [bug] (thanks Yuji Kaneda) - dead-lock in onig_end(). + dead-lock in onig_end(). 2006/05/11: [dist] update index.html. 2006/05/08: Version 4.0.3 @@ -719,7 +739,7 @@ History use GNU libtool/automake. change configure.in and add Makefile.am, sample/Makefile.am. add AUTHORS file. -2006/01/24: [dist] test programs return exit code -1 when test fails. +2006/01/24: [dist] test programs return exit code -1 when test fails. 2006/01/24: [bug] (thanks KIMURA Koichi) invalid syntax definition in ONIG_SYNTAX_GREP. ONIG_SYN_OP_BRACE_INTERVAL @@ -737,7 +757,7 @@ History 2005/11/24: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. 2005/11/21: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin]. 2005/11/21: [bug] (thanks Allan Odgaard) - utf-8 character comments in extended mode leads + utf-8 character comments in extended mode leads invalid result. ex. /(?x)(?<= # <any-utf-8 multibyte char>o\n~) / fix onigenc_unicode_is_code_ctype() and @@ -819,7 +839,7 @@ History add new character encoding ONIG_ENCODING_GB18030. 2005/06/30: [bug] invalid ctype check for multibyte encodings. ("graph", "print") - fix onigenc_mb2/4_is_code_ctype(), + fix onigenc_mb2/4_is_code_ctype(), eucjp_is_code_ctype() and sjis_is_code_ctype(). 2005/06/30: [bug] invalid conversion from code point to mbc in onigenc_mb4_code_to_mbc(). @@ -894,7 +914,7 @@ History remove oniggnu.h from make 19. 2005/03/01: [bug] (thanks matz) [ruby-dev:25778] uninitialized member (OptEnv.backrefed_status) - was used. + was used. 2005/02/19: Version 3.7.0 @@ -945,7 +965,7 @@ History 2005/01/19: [bug] (thanks Isao Sonobe) callback function argument name_end of onig_foreach_name() was wrong. - name key of name table should be null terminated for + name key of name table should be null terminated for character encoding length. add strdup_with_null(), rename onig_strdup() to k_strdup(). use e->name_len in i_names(). @@ -1217,7 +1237,7 @@ History RelAddrType, AbsAddrType and LengthType change from short int to int type for the very long string match. 2004/06/14: [bug] (thanks Greg A. Woods) - fix nmatch argument of regexec() is smaller than + fix nmatch argument of regexec() is smaller than reg->num_mem + 1 case. (POSIX API) 2004/06/14: [spec] (thanks Greg A. Woods) set pmatch to NULL if nmatch is 0 in regexec(). (POSIX API) @@ -1397,7 +1417,7 @@ History 2004/02/23: [new] support ISO-8859-10. (ONIG_ENCODING_ISO_8859_10) 2004/02/20: [bug] fix iso_8859_4_mbc_is_case_ambig(). 2004/02/20: [new] support ISO-8859-9. (ONIG_ENCODING_ISO_8859_9) -2004/02/19: [bug] correct ctype tables for ISO-8859-3, ISO-8859-4, +2004/02/19: [bug] correct ctype tables for ISO-8859-3, ISO-8859-4, ISO-8859-6, ISO-8859-7, ISO-8859-8, KOI8_R. 2004/02/18: [bug] wrong replaced name OnigSyntaxGnuOnigex. 2004/02/17: [spec] check capture status for empty infinite loop. @@ -1570,7 +1590,7 @@ History 2003/11/11: [spec] add syntax op. REG_SYN_OP_VARIABLE_META_CHARS. 2003/11/11: [spec] rename REG_SYN_OP_ESC_CAPITAL_Q_QUOTE to REG_SYN_OP2_ESC_CAPITAL_Q_QUOTE, - REG_SYN_OP_QMARK_GROUP_EFFECT to + REG_SYN_OP_QMARK_GROUP_EFFECT to REG_SYN_OP2_QMARK_GROUP_EFFECT. 2003/11/06: [impl] define THREAD_PASS as rb_thread_schedule() in Ruby mode. 2003/11/05: [spec] add syntax behavior REG_SYN_WARN_REDUNDANT_NESTED_REPEAT. @@ -1587,7 +1607,7 @@ History 2003/10/03: [bug] (thanks nobu) [ruby-dev:21472] sub-anchor of optimization map info was wrong in concat_left_node_opt_info(). - ex. /^(x?y)/ = "xy" fail. + ex. /^(x?y)/ = "xy" fail. 2003/09/17: Version 1.9.4 @@ -1650,7 +1670,7 @@ History 2003/09/01: [dist] update doc/RE and doc/RE.ja. 2003/08/26: [bug] (thanks Guy Decoux) should not double free node at the case TK_CC_CC_OPEN - in parse_char_class(). + in parse_char_class(). 2003/08/19: Version 1.9.3 @@ -1662,8 +1682,8 @@ History REG_SYN_OP2_ATMARK_CAPTURE_HISTORY. 2003/08/18: [spec] (thanks nobu) don't use IMPORT in oniguruma.h and onigposix.h. -2003/08/18: [impl] (thanks nobu) change error output to stdout in testconv.rb. -2003/08/18: [inst] (thanks nobu) lacked $(srcdir) in Makefile.in. +2003/08/18: [impl] (thanks nobu) change error output to stdout in testconv.rb. +2003/08/18: [inst] (thanks nobu) lacked $(srcdir) in Makefile.in. 2003/08/18: [bug] REG_MBLEN_TABLE[SJIS][0xFD-0xFF] should be 1. 2003/08/18: [bug] (thanks nobu) mbctab_sjis[0x80] should be 0. 2003/08/18: [bug] (thanks nobu) @@ -1692,7 +1712,7 @@ History 2003/07/29: [new] add regex_get_encoding(), regex_get_options() and regex_get_syntax(). 2003/07/25: [spec] (thanks akr) - change group(...) to shy-group(?:...) if named group is + change group(...) to shy-group(?:...) if named group is used in the pattern. add REG_SYN_CAPTURE_ONLY_NAMED_GROUP. 2003/07/24: [spec] rename REG_OPTION_CAPTURE_ONLY_NAMED_GROUP to @@ -1720,7 +1740,7 @@ History set option status to effect memory in optimize_node_left(). 2003/07/07: [impl] add opcode OP_ANYCHAR_ML, OP_ANYCHAR_ML_STAR and OP_ANYCHAR_ML_START_PEEK_NEXT. -2003/07/07: [bug] (thanks nobu) REG_MBLEN_TABLE[SJIS][0x80] should be 1. +2003/07/07: [bug] (thanks nobu) REG_MBLEN_TABLE[SJIS][0x80] should be 1. 2003/07/07: [spec] rename REG_SYN_OP_QUOTE to REG_SYN_OP_ESC_Q_QUOTE. 2003/07/04: Version 1.9.1 @@ -1783,7 +1803,7 @@ History 2003/06/12: [spec] add syntax behavior REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED. 2003/06/12: [spec] invalid POSIX bracket should be error. ex. [[:upper :]] 2003/06/11: [new] char-class in char-class (as Java(TM)). -2003/06/11: [spec] change AND operator in char-class from &&[..] to &&. +2003/06/11: [spec] change AND operator in char-class from &&[..] to &&. 2003/06/04: [spec] {n,m}+ should not be possessive operator. ex. a{3}+ should be (?:a{3})+ 2003/06/03: [bug] should compare strings with min-length in is_not_included(). @@ -1947,7 +1967,7 @@ History 2003/02/26: [impl] add -win option to testconv.rb. 2003/02/25: [spec] allow to assign same name to different group. add OP_BACKREF_MULTI. -2003/02/24: [impl] reduce redundant repeat of empty target. +2003/02/24: [impl] reduce redundant repeat of empty target. ex. /()*/ ==> /()?/, /()+/ ==> /()/, /(?:)+/ ==> // 2003/02/24: [impl] change condition in regex_is_allow_reverse_match(). 2003/02/24: [impl] convert i(/../, ...) functions in testconv.rb. @@ -2016,7 +2036,7 @@ History 2003/02/04: [bug] typo miss in regex_region_copy(). 2003/02/04: [impl] change THREAD_PASS macro. (regint.h) 2003/02/04: [dist] add API document file doc/API. -2003/02/04: [tune] if sub_anchor has ANCHOR_BEGIN_LINE then +2003/02/04: [tune] if sub_anchor has ANCHOR_BEGIN_LINE then set REG_OPTIMIZE_EXACT_BM in set_optimize_exact_info(). 2003/02/04: [spec] reimplement regex_clone() and it is obsoleted. 2003/02/04: [bug] add REGERR_OVER_THREAD_PASS_LIMIT_COUNT @@ -2136,7 +2156,7 @@ History 2002/04/01: [dist] add COPYING. 2002/03/30: [spec] warn redundant nested repeat operator in Ruby verbose mode. ex. (?:a*)? -2002/03/30: [spec] nested repeat operator error check should be +2002/03/30: [spec] nested repeat operator error check should be same with GNU regex. (thanks Guy Decoux) 2002/03/30: [new] add \x{hexadecimal-wide-char}. (thanks matz) 2002/03/27: [bug] MBCTYPE_XXX symbol values should be same with GNU regex. @@ -2199,7 +2219,7 @@ History ex. /(?:abc){10}/ 2002/03/06: [new] add a symbol REG_TRANSTABLE_USE_DEFAULT in regex.h. 2002/03/06: [impl] rename RegDefaultCharCode to RegDefaultCharEncoding. -2002/03/06: [bug] if pattern has NULL(\000) char, infinite loop happens +2002/03/06: [bug] if pattern has NULL(\000) char, infinite loop happens in ScanMakeNode(). (beware of strchr(). thanks Nobu) 2002/03/06: [bug] range argument of ForwardSearchRange() is wrong. ex. /\A.a/, /\G.a/ mismatched with "aa". (thanks Nobu) @@ -94,7 +94,7 @@ Usage See doc/API for Oniguruma API. If you want to disable UChar type (== unsigned char) definition - in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then + in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then include oniguruma.h. If you want to disable regex_t type definition in oniguruma.h, @@ -1,4 +1,6 @@ [![Build Status](https://travis-ci.org/kkos/oniguruma.svg?branch=master)](https://travis-ci.org/kkos/oniguruma) +[![Code Quality: Cpp](https://img.shields.io/lgtm/grade/cpp/g/kkos/oniguruma.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/kkos/oniguruma/context:cpp) +[![Total Alerts](https://img.shields.io/lgtm/alerts/g/kkos/oniguruma.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/kkos/oniguruma/alerts) Oniguruma ========= @@ -24,6 +26,12 @@ Supported character encodings: * CP1251: contributed by Byte +New feature of version 6.9.1 +-------------------------- + +* Speed improvement (* especially UTF-8) + + New feature of version 6.9.0 -------------------------- @@ -193,7 +201,7 @@ Usage See doc/API for Oniguruma API. If you want to disable UChar type (== unsigned char) definition - in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then + in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then include oniguruma.h. If you want to disable regex_t type definition in oniguruma.h, @@ -294,4 +302,4 @@ Source Files |utf32_le.c |UTF-32LE encoding | |unicode.c |common codes of Unicode encoding | |unicode_fold_data.c|Unicode folding data | -|windows/testc.c |Test program for Windowns (VC++) | +|windows/testc.c |Test program for Windows (VC++) | diff --git a/configure.ac b/configure.ac index fef00cd..53bd8af 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl Process this file with autoconf to produce a configure script. -AC_INIT(onig, 6.9.0) +AC_INIT(onig, 6.9.1) AC_CONFIG_MACRO_DIR([m4]) @@ -10,7 +10,7 @@ AC_CONFIG_HEADERS([src/config.h]) dnl default value for STATISTICS STATISTICS="" AC_ARG_WITH(statistics, - [ --with-statistics take matching time statistical data], + [ --with-statistics take matching time statistical data], [ STATISTICS=-DONIG_DEBUG_STATISTICS ]) AC_SUBST(STATISTICS) @@ -49,45 +49,16 @@ AC_PROG_MAKE_SET dnl Checks for libraries. dnl Checks for header files. -AC_HEADER_STDC -AC_CHECK_HEADERS(stdlib.h string.h strings.h limits.h sys/time.h unistd.h sys/times.h) +AC_CHECK_HEADERS(sys/time.h unistd.h sys/times.h) dnl Checks for typedefs, structures, and compiler characteristics. AC_CHECK_SIZEOF(int, 4) AC_CHECK_SIZEOF(short, 2) AC_CHECK_SIZEOF(long, 4) -AC_C_CONST -AC_HEADER_TIME dnl Checks for library functions. AC_FUNC_ALLOCA -AC_FUNC_MEMCMP - -AC_CACHE_CHECK(for prototypes, _cv_have_prototypes, - [AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM([[int foo(int x) { return 0; }]], - [[return foo(10);]])], - [_cv_have_prototypes=yes], - [_cv_have_prototypes=no])]) -if test "$_cv_have_prototypes" = yes; then - AC_DEFINE(HAVE_PROTOTYPES,1,[Define if compilerr supports prototypes]) -fi - -AC_CACHE_CHECK(for variable length prototypes and stdarg.h, _cv_stdarg, - [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ -#include <stdarg.h> -int foo(int x, ...) { - va_list va; - va_start(va, x); - va_arg(va, int); - va_arg(va, char *); - va_arg(va, double); - return 0; -} -]], [[return foo(10, "", 3.14);]])],[_cv_stdarg=yes],[_cv_stdarg=no])]) -if test "$_cv_stdarg" = yes; then - AC_DEFINE(HAVE_STDARG_PROTOTYPES,1,[Define if compiler supports stdarg prototypes]) -fi - -AC_OUTPUT([Makefile src/Makefile test/Makefile sample/Makefile onig-config], [chmod +x onig-config]) +AC_CONFIG_FILES([Makefile src/Makefile test/Makefile sample/Makefile onig-config]) +AC_CONFIG_COMMANDS([default],[chmod +x onig-config],[]) +AC_OUTPUT diff --git a/contributed/makefile b/contributed/makefile index c50ab36..f44a3c0 100644 --- a/contributed/makefile +++ b/contributed/makefile @@ -1,4 +1,3 @@ - ONIG_LIB=../src/.libs/libonig.a LIBS=$(ONIG_LIB) /usr/local/lib/libLLVMFuzzerMain.a @@ -18,5 +17,5 @@ $(ONIG_LIB): -clean: +clean: rm -f $(TARGETS) diff --git a/debian/changelog b/debian/changelog index 9206f2e..4d88084 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +libonig (6.9.1-1) unstable; urgency=medium + + * New upstream release: + - Refresh symbols file. + + -- Jテカrg Frings-Fテシrst <debian@jff.email> Fri, 21 Dec 2018 14:09:48 +0100 + libonig (6.9.0-1) unstable; urgency=medium * New upstream release. diff --git a/debian/symbols b/debian/symbols index f00e208..a102b4d 100644 --- a/debian/symbols +++ b/debian/symbols @@ -149,7 +149,7 @@ libonig.so.5 libonig5 #MINVER# onig_node_list_add@Base 6.8.1 onig_node_new_alt@Base 6.8.1 onig_node_new_anchor@Base 6.8.1 - onig_node_new_enclosure@Base 6.8.1 + onig_node_new_bag@Base 6.9.1 onig_node_new_list@Base 6.8.1 onig_node_new_str@Base 6.8.1 onig_node_str_cat@Base 6.8.1 @@ -161,6 +161,7 @@ libonig.so.5 libonig5 #MINVER# onig_number_of_captures@Base 6.8.1 onig_number_of_names@Base 6.8.1 onig_parse_tree@Base 6.8.1 + onig_positive_int_multiply@Base 6.9.1 onig_reduce_nested_quantifier@Base 6.8.1 onig_reg_callout_list_at@Base 6.8.1 onig_reg_init@Base 6.8.1 @@ -182,7 +182,7 @@ Oniguruma API Version 6.8.0 2018/03/13 ci->target_enc: target string character encoding. ci->syntax: address of pattern syntax definition. ci->option: compile time option. - ci->case_fold_flag: character matching case fold bit flag for + ci->case_fold_flag: character matching case fold bit flag for ONIG_OPTION_IGNORECASE mode. ONIGENC_CASE_FOLD_MIN: minimum @@ -54,7 +54,7 @@ \t, \n, \v, \f, \r, \x20 Unicode縺ョ蝣エ蜷: - U+0009, U+000A, U+000B, U+000C, U+000D, U+0085(NEL), + U+0009, U+000A, U+000B, U+000C, U+000D, U+0085(NEL), General_Category -- Line_Separator -- Paragraph_Separator -- Space_Separator @@ -8,7 +8,7 @@ <h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>) <p> -(c) K.Kosako, updated at: 2018/08/31 +(c) K.Kosako, updated at: 2018/12/06 </p> <dl> @@ -16,6 +16,7 @@ <dt><b>What's new</b> </font> <ul> +<li>2018/12/11: Version 6.9.1 released.</li> <li>2018/09/03: Version 6.9.0 released.</li> <li>2018/04/17: Version 6.8.2 released.</li> <li>2018/03/19: Version 6.8.1 released.</li> diff --git a/index_ja.html b/index_ja.html index 06c1753..0ada788 100644 --- a/index_ja.html +++ b/index_ja.html @@ -8,7 +8,7 @@ <h1>鬯シ霆</h1> <p> -(c) K.Kosako, 譛邨よ峩譁ー: 2018/09/03 +(c) K.Kosako, 譛邨よ峩譁ー: 2018/12/06 </p> <dl> @@ -16,6 +16,7 @@ <dt><b>譖エ譁ー諠蝣ア</b> </font> <ul> +<li>2018/12/11: Version 6.9.1 繝ェ繝ェ繝シ繧ケ</li> <li>2018/09/03: Version 6.9.0 繝ェ繝ェ繝シ繧ケ</li> <li>2018/04/17: Version 6.8.2 繝ェ繝ェ繝シ繧ケ</li> <li>2018/03/19: Version 6.8.1 繝ェ繝ェ繝シ繧ケ</li> diff --git a/onig-config.in b/onig-config.in index 57fe3ad..788d6ba 100644 --- a/onig-config.in +++ b/onig-config.in @@ -35,7 +35,7 @@ while test $# -gt 0; do case "$1" in -*=*) val=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; - *) val= + *) val= ;; esac diff --git a/oniguruma.pc.in b/oniguruma.pc.in index c78155d..ab27016 100644 --- a/oniguruma.pc.in +++ b/oniguruma.pc.in @@ -11,4 +11,3 @@ Version: @PACKAGE_VERSION@ Requires: Libs: -L${libdir} -lonig Cflags: -I${includedir} - diff --git a/sample/.gitignore b/sample/.gitignore deleted file mode 100644 index b6b591b..0000000 --- a/sample/.gitignore +++ /dev/null @@ -1,14 +0,0 @@ -/crnl -/encode -/listcap -/names -/posix -/simple -/sql -/syntax -/user_property -/callout -/echo -/count -/bug_fix -/log* diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index 58d6a99..c0bd057 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -1,4 +1,3 @@ - cmake_minimum_required(VERSION 2.8) project(oniguruma_sample C) @@ -34,4 +33,3 @@ target_link_libraries(sql onig) add_executable(syntax syntax.c) target_link_libraries(syntax onig) - diff --git a/sample/crnl.c b/sample/crnl.c index 3f511e9..ee6a516 100644 --- a/sample/crnl.c +++ b/sample/crnl.c @@ -2,7 +2,7 @@ * crnl.c 2007/05/30 K.Kosako * * !!! You should enable USE_CRNL_AS_LINE_TERMINATOR. !!! - * + * * USE_CRNL_AS_LINE_TERMINATOR config test program. */ #include <stdio.h> diff --git a/sample/listcap.c b/sample/listcap.c index 6f9880c..8598d6a 100644 --- a/sample/listcap.c +++ b/sample/listcap.c @@ -98,7 +98,7 @@ extern int main(int argc, char* argv[]) /* enable capture hostory */ onig_copy_syntax(&syn, ONIG_SYNTAX_DEFAULT); - onig_set_syntax_op2(&syn, + onig_set_syntax_op2(&syn, onig_get_syntax_op2(&syn) | ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY); r = ex(str1, pattern1, &syn); diff --git a/src/.gitignore b/src/.gitignore deleted file mode 100644 index 50ae793..0000000 --- a/src/.gitignore +++ /dev/null @@ -1,15 +0,0 @@ -Makefile -config.h -CaseFolding.txt -unicode_fold?_key.gperf -unicode_unfold_key.gperf -UNICODE_PROPERTIES -*.o -*.so -*.lo -*.la -*~ -*.txt -.libs/ -.deps/ -/mktable diff --git a/src/Makefile.windows b/src/Makefile.windows index e98dc2e..762cf07 100644 --- a/src/Makefile.windows +++ b/src/Makefile.windows @@ -1,183 +1,183 @@ -# Oniguruma Makefile for Windows
-
-product_name = oniguruma
-
-CPPFLAGS =
-CFLAGS = -O2 -nologo /W3
-LDFLAGS =
-LOADLIBES =
-ARLIB = lib
-ARLIB_FLAGS = -nologo
-ARDLL = cl
-ARDLL_FLAGS = -nologo -LD $(LINKFLAGS) -dll
-LINKFLAGS = -link -incremental:no -pdb:none
-
-INSTALL = install -c
-CP = copy
-CC = cl
-DEFS = -DHAVE_CONFIG_H
-
-subdirs =
-
-libbase = onig
-libname = $(libbase)_s.lib
-dllname = $(libbase).dll
-dlllib = $(libbase).lib
-
-!IF defined(ENABLE_POSIX_API) && "$(ENABLE_POSIX_API)" == "NO"
-posixobjs =
-!ELSE
-posixobjs = $(BUILD_DIR)/regposix.obj $(BUILD_DIR)/regposerr.obj
-!ENDIF
-
-onigheaders = $(ONIG_DIR)/oniguruma.h $(ONIG_DIR)/regint.h $(ONIG_DIR)/regparse.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/st.h
-posixheaders = $(ONIG_DIR)/onigposix.h
-headers = $(posixheaders) $(onigheaders)
-
-onigobjs = $(BUILD_DIR)/reggnu.obj $(BUILD_DIR)/regerror.obj $(BUILD_DIR)/regparse.obj $(BUILD_DIR)/regext.obj $(BUILD_DIR)/regcomp.obj \
- $(BUILD_DIR)/regexec.obj $(BUILD_DIR)/regenc.obj $(BUILD_DIR)/regsyntax.obj $(BUILD_DIR)/regtrav.obj \
- $(BUILD_DIR)/regversion.obj $(BUILD_DIR)/st.obj $(BUILD_DIR)/onig_init.obj
-libobjs = $(onigobjs) $(posixobjs)
-
-jp_objs = $(BUILD_DIR)/euc_jp.obj $(BUILD_DIR)/sjis.obj
-iso8859_objs = $(BUILD_DIR)/iso8859_1.obj $(BUILD_DIR)/iso8859_2.obj \
- $(BUILD_DIR)/iso8859_3.obj $(BUILD_DIR)/iso8859_4.obj \
- $(BUILD_DIR)/iso8859_5.obj $(BUILD_DIR)/iso8859_6.obj \
- $(BUILD_DIR)/iso8859_7.obj $(BUILD_DIR)/iso8859_8.obj \
- $(BUILD_DIR)/iso8859_9.obj $(BUILD_DIR)/iso8859_10.obj \
- $(BUILD_DIR)/iso8859_11.obj $(BUILD_DIR)/iso8859_13.obj \
- $(BUILD_DIR)/iso8859_14.obj $(BUILD_DIR)/iso8859_15.obj \
- $(BUILD_DIR)/iso8859_16.obj
-
-encobjs = $(BUILD_DIR)/ascii.obj $(BUILD_DIR)/utf8.obj \
- $(BUILD_DIR)/unicode.obj \
- $(BUILD_DIR)/utf16_be.obj $(BUILD_DIR)/utf16_le.obj \
- $(BUILD_DIR)/utf32_be.obj $(BUILD_DIR)/utf32_le.obj \
- $(jp_objs) $(iso8859_objs) \
- $(BUILD_DIR)/euc_tw.obj $(BUILD_DIR)/euc_kr.obj $(BUILD_DIR)/big5.obj \
- $(BUILD_DIR)/gb18030.obj \
- $(BUILD_DIR)/koi8_r.obj \
- $(BUILD_DIR)/cp1251.obj \
- $(BUILD_DIR)/euc_jp_prop.obj $(BUILD_DIR)/sjis_prop.obj \
- $(BUILD_DIR)/unicode_unfold_key.obj $(BUILD_DIR)/unicode_fold1_key.obj \
- $(BUILD_DIR)/unicode_fold2_key.obj $(BUILD_DIR)/unicode_fold3_key.obj # $(BUILD_DIR)/koi8.obj
-
-onigsources = $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regparse.c $(ONIG_DIR)/regext.c $(ONIG_DIR)/regcomp.c $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regenc.c \
- $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regtrav.c $(ONIG_DIR)/regversion.c $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/st.c
-posixsources = $(ONIG_DIR)/regposix.c $(ONIG_DIR)/regposerr.c
-libsources = $(posixsources) $(onigsources)
-
-patchfiles = re.c.168.patch re.c.181.patch
-distfiles = README COPYING HISTORY \
- Makefile.in configure.in config.h.in configure \
- $(headers) $(libsources) $(patchfiles) \
- test.rb testconv.rb
-testc = testc
-testp = testp
-
-makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)'
-
-.SUFFIXES:
-.SUFFIXES: .obj .c .h .ps .dvi .info .texinfo
-
-{$(ONIG_DIR)}.c{$(BUILD_DIR)}.obj:
- $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /I.. /Fo$@ /c $<
-
-# targets
-default: all
-
-setup:
- $(CP) ..\win32\config.h config.h
- $(CP) ..\win32\testc.c testc.c
-
-
-all: $(libname) $(dllname)
-
-$(libname): $(libobjs) $(encobjs)
- $(ARLIB) $(ARLIB_FLAGS) -out:$@ $(libobjs) $(encobjs)
-
-$(dllname): $(libobjs) $(encobjs)
- $(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS)
-
-$(BUILD_DIR)/regparse.obj: $(ONIG_DIR)/regparse.c $(onigheaders) $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regext.obj: $(ONIG_DIR)/regext.c $(onigheaders) $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regtrav.obj: $(ONIG_DIR)/regtrav.c $(onigheaders) $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regcomp.obj: $(ONIG_DIR)/regcomp.c $(onigheaders) $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regexec.obj: $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/reggnu.obj: $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/oniggnu.h
-$(BUILD_DIR)/regerror.obj: $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regenc.obj: $(ONIG_DIR)/regenc.c $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regsyntax.obj: $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regversion.obj: $(ONIG_DIR)/regversion.c $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regposix.obj: $(ONIG_DIR)/regposix.c $(posixheaders) $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regposerr.obj: $(ONIG_DIR)/regposerr.c $(posixheaders) $(BUILD_DIR)/config.h
-$(BUILD_DIR)/st.obj: $(ONIG_DIR)/st.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/st.h
-$(BUILD_DIR)/onig_init.obj: $(ONIG_DIR)/onig_init.c $(ONIG_DIR)/oniguruma.h
-
-$(BUILD_DIR)/ascii.obj: $(ONIG_DIR)/ascii.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/unicode.obj: $(ONIG_DIR)/unicode.c $(ONIG_DIR)/unicode_fold_data.c $(ONIG_DIR)/unicode_property_data.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/utf8.obj: $(ONIG_DIR)/utf8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/utf16_be.obj: $(ONIG_DIR)/utf16_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/utf16_le.obj: $(ONIG_DIR)/utf16_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/utf32_be.obj: $(ONIG_DIR)/utf32_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/utf32_le.obj: $(ONIG_DIR)/utf32_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/euc_jp.obj: $(ONIG_DIR)/euc_jp.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/euc_tw.obj: $(ONIG_DIR)/euc_tw.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/euc_kr.obj: $(ONIG_DIR)/euc_kr.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/sjis.obj: $(ONIG_DIR)/sjis.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_1.obj: $(ONIG_DIR)/iso8859_1.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_2.obj: $(ONIG_DIR)/iso8859_2.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_3.obj: $(ONIG_DIR)/iso8859_3.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_4.obj: $(ONIG_DIR)/iso8859_4.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_5.obj: $(ONIG_DIR)/iso8859_5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_6.obj: $(ONIG_DIR)/iso8859_6.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_7.obj: $(ONIG_DIR)/iso8859_7.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_8.obj: $(ONIG_DIR)/iso8859_8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_9.obj: $(ONIG_DIR)/iso8859_9.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_10.obj: $(ONIG_DIR)/iso8859_10.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_11.obj: $(ONIG_DIR)/iso8859_11.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_13.obj: $(ONIG_DIR)/iso8859_13.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_14.obj: $(ONIG_DIR)/iso8859_14.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_15.obj: $(ONIG_DIR)/iso8859_15.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_16.obj: $(ONIG_DIR)/iso8859_16.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/koi8.obj: $(ONIG_DIR)/koi8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/koi8_r.obj: $(ONIG_DIR)/koi8_r.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/cp1251.obj: $(ONIG_DIR)/cp1251.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/big5.obj: $(ONIG_DIR)/big5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/gb18030.obj: $(ONIG_DIR)/gb18030.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/euc_jp_prop.obj: $(ONIG_DIR)/euc_jp_prop.c $(ONIG_DIR)/regenc.h
-$(BUILD_DIR)/sjis_prop.obj: $(ONIG_DIR)/sjis_prop.c $(ONIG_DIR)/regenc.h
-$(BUILD_DIR)/unicode_unfold_key.obj: $(ONIG_DIR)/unicode_unfold_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/unicode_fold1_key.obj: $(ONIG_DIR)/unicode_fold1_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/unicode_fold2_key.obj: $(ONIG_DIR)/unicode_fold2_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/unicode_fold3_key.obj: $(ONIG_DIR)/unicode_fold3_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-
-# C library test
-ctest: $(testc)
- .\$(testc)
-
-# POSIX C library test
-ptest: $(testp)
- .\$(testp)
-
-$(testc): $(testc).c $(libname)
- $(CC) -nologo /Fe:$(testc) -DONIG_EXTERN=extern $(testc).c $(libname)
-
-$(testp): $(testc).c $(dlllib)
- $(CC) -nologo -DPOSIX_TEST /Fe:$(testp) $(testc).c $(dlllib)
-
-$(testc)u: $(testc)u.c $(libname)
- $(CC) -nologo /Fe:$(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname)
-
-clean:
- del $(BUILD_DIR)\*.obj $(BUILD_DIR)\*.lib $(BUILD_DIR)\*.exp $(BUILD_DIR)\*.dll $(BUILD_DIR)\$(testp).exe $(BUILD_DIR)\$(testc).exe $(BUILD_DIR)\$(testc).obj
-
-
-samples: all
- $(CC) $(CFLAGS) -I. /Fe:simple $(ONIG_DIR)\sample\simple.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:posix $(ONIG_DIR)\sample\posix.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:names $(ONIG_DIR)\sample\names.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:listcap $(ONIG_DIR)\sample\listcap.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:sql $(ONIG_DIR)\sample\sql.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:encode $(ONIG_DIR)\sample\encode.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:syntax $(ONIG_DIR)\sample\syntax.c $(dlllib)
\ No newline at end of file +# Oniguruma Makefile for Windows + +product_name = oniguruma + +CPPFLAGS = +CFLAGS = -O2 -nologo /W3 +LDFLAGS = +LOADLIBES = +ARLIB = lib +ARLIB_FLAGS = -nologo +ARDLL = cl +ARDLL_FLAGS = -nologo -LD $(LINKFLAGS) -dll +LINKFLAGS = -link -incremental:no -pdb:none + +INSTALL = install -c +CP = copy +CC = cl +DEFS = -DHAVE_CONFIG_H + +subdirs = + +libbase = onig +libname = $(libbase)_s.lib +dllname = $(libbase).dll +dlllib = $(libbase).lib + +!IF defined(ENABLE_POSIX_API) && "$(ENABLE_POSIX_API)" == "NO" +posixobjs = +!ELSE +posixobjs = $(BUILD_DIR)/regposix.obj $(BUILD_DIR)/regposerr.obj +!ENDIF + +onigheaders = $(ONIG_DIR)/oniguruma.h $(ONIG_DIR)/regint.h $(ONIG_DIR)/regparse.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/st.h +posixheaders = $(ONIG_DIR)/onigposix.h +headers = $(posixheaders) $(onigheaders) + +onigobjs = $(BUILD_DIR)/reggnu.obj $(BUILD_DIR)/regerror.obj $(BUILD_DIR)/regparse.obj $(BUILD_DIR)/regext.obj $(BUILD_DIR)/regcomp.obj \ + $(BUILD_DIR)/regexec.obj $(BUILD_DIR)/regenc.obj $(BUILD_DIR)/regsyntax.obj $(BUILD_DIR)/regtrav.obj \ + $(BUILD_DIR)/regversion.obj $(BUILD_DIR)/st.obj $(BUILD_DIR)/onig_init.obj +libobjs = $(onigobjs) $(posixobjs) + +jp_objs = $(BUILD_DIR)/euc_jp.obj $(BUILD_DIR)/sjis.obj +iso8859_objs = $(BUILD_DIR)/iso8859_1.obj $(BUILD_DIR)/iso8859_2.obj \ + $(BUILD_DIR)/iso8859_3.obj $(BUILD_DIR)/iso8859_4.obj \ + $(BUILD_DIR)/iso8859_5.obj $(BUILD_DIR)/iso8859_6.obj \ + $(BUILD_DIR)/iso8859_7.obj $(BUILD_DIR)/iso8859_8.obj \ + $(BUILD_DIR)/iso8859_9.obj $(BUILD_DIR)/iso8859_10.obj \ + $(BUILD_DIR)/iso8859_11.obj $(BUILD_DIR)/iso8859_13.obj \ + $(BUILD_DIR)/iso8859_14.obj $(BUILD_DIR)/iso8859_15.obj \ + $(BUILD_DIR)/iso8859_16.obj + +encobjs = $(BUILD_DIR)/ascii.obj $(BUILD_DIR)/utf8.obj \ + $(BUILD_DIR)/unicode.obj \ + $(BUILD_DIR)/utf16_be.obj $(BUILD_DIR)/utf16_le.obj \ + $(BUILD_DIR)/utf32_be.obj $(BUILD_DIR)/utf32_le.obj \ + $(jp_objs) $(iso8859_objs) \ + $(BUILD_DIR)/euc_tw.obj $(BUILD_DIR)/euc_kr.obj $(BUILD_DIR)/big5.obj \ + $(BUILD_DIR)/gb18030.obj \ + $(BUILD_DIR)/koi8_r.obj \ + $(BUILD_DIR)/cp1251.obj \ + $(BUILD_DIR)/euc_jp_prop.obj $(BUILD_DIR)/sjis_prop.obj \ + $(BUILD_DIR)/unicode_unfold_key.obj $(BUILD_DIR)/unicode_fold1_key.obj \ + $(BUILD_DIR)/unicode_fold2_key.obj $(BUILD_DIR)/unicode_fold3_key.obj # $(BUILD_DIR)/koi8.obj + +onigsources = $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regparse.c $(ONIG_DIR)/regext.c $(ONIG_DIR)/regcomp.c $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regenc.c \ + $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regtrav.c $(ONIG_DIR)/regversion.c $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/st.c +posixsources = $(ONIG_DIR)/regposix.c $(ONIG_DIR)/regposerr.c +libsources = $(posixsources) $(onigsources) + +patchfiles = re.c.168.patch re.c.181.patch +distfiles = README COPYING HISTORY \ + Makefile.in configure.in config.h.in configure \ + $(headers) $(libsources) $(patchfiles) \ + test.rb testconv.rb +testc = testc +testp = testp + +makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)' + +.SUFFIXES: +.SUFFIXES: .obj .c .h .ps .dvi .info .texinfo + +{$(ONIG_DIR)}.c{$(BUILD_DIR)}.obj: + $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /I.. /Fo$@ /c $< + +# targets +default: all + +setup: + $(CP) ..\win32\config.h config.h + $(CP) ..\win32\testc.c testc.c + + +all: $(libname) $(dllname) + +$(libname): $(libobjs) $(encobjs) + $(ARLIB) $(ARLIB_FLAGS) -out:$@ $(libobjs) $(encobjs) + +$(dllname): $(libobjs) $(encobjs) + $(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS) + +$(BUILD_DIR)/regparse.obj: $(ONIG_DIR)/regparse.c $(onigheaders) $(BUILD_DIR)/config.h +$(BUILD_DIR)/regext.obj: $(ONIG_DIR)/regext.c $(onigheaders) $(BUILD_DIR)/config.h +$(BUILD_DIR)/regtrav.obj: $(ONIG_DIR)/regtrav.c $(onigheaders) $(BUILD_DIR)/config.h +$(BUILD_DIR)/regcomp.obj: $(ONIG_DIR)/regcomp.c $(onigheaders) $(BUILD_DIR)/config.h +$(BUILD_DIR)/regexec.obj: $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/reggnu.obj: $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/oniggnu.h +$(BUILD_DIR)/regerror.obj: $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/regenc.obj: $(ONIG_DIR)/regenc.c $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/regsyntax.obj: $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/regversion.obj: $(ONIG_DIR)/regversion.c $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/regposix.obj: $(ONIG_DIR)/regposix.c $(posixheaders) $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/regposerr.obj: $(ONIG_DIR)/regposerr.c $(posixheaders) $(BUILD_DIR)/config.h +$(BUILD_DIR)/st.obj: $(ONIG_DIR)/st.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/st.h +$(BUILD_DIR)/onig_init.obj: $(ONIG_DIR)/onig_init.c $(ONIG_DIR)/oniguruma.h + +$(BUILD_DIR)/ascii.obj: $(ONIG_DIR)/ascii.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/unicode.obj: $(ONIG_DIR)/unicode.c $(ONIG_DIR)/unicode_fold_data.c $(ONIG_DIR)/unicode_property_data.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/utf8.obj: $(ONIG_DIR)/utf8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/utf16_be.obj: $(ONIG_DIR)/utf16_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/utf16_le.obj: $(ONIG_DIR)/utf16_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/utf32_be.obj: $(ONIG_DIR)/utf32_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/utf32_le.obj: $(ONIG_DIR)/utf32_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/euc_jp.obj: $(ONIG_DIR)/euc_jp.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/euc_tw.obj: $(ONIG_DIR)/euc_tw.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/euc_kr.obj: $(ONIG_DIR)/euc_kr.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/sjis.obj: $(ONIG_DIR)/sjis.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_1.obj: $(ONIG_DIR)/iso8859_1.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_2.obj: $(ONIG_DIR)/iso8859_2.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_3.obj: $(ONIG_DIR)/iso8859_3.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_4.obj: $(ONIG_DIR)/iso8859_4.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_5.obj: $(ONIG_DIR)/iso8859_5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_6.obj: $(ONIG_DIR)/iso8859_6.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_7.obj: $(ONIG_DIR)/iso8859_7.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_8.obj: $(ONIG_DIR)/iso8859_8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_9.obj: $(ONIG_DIR)/iso8859_9.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_10.obj: $(ONIG_DIR)/iso8859_10.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_11.obj: $(ONIG_DIR)/iso8859_11.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_13.obj: $(ONIG_DIR)/iso8859_13.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_14.obj: $(ONIG_DIR)/iso8859_14.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_15.obj: $(ONIG_DIR)/iso8859_15.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_16.obj: $(ONIG_DIR)/iso8859_16.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/koi8.obj: $(ONIG_DIR)/koi8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/koi8_r.obj: $(ONIG_DIR)/koi8_r.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/cp1251.obj: $(ONIG_DIR)/cp1251.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/big5.obj: $(ONIG_DIR)/big5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/gb18030.obj: $(ONIG_DIR)/gb18030.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/euc_jp_prop.obj: $(ONIG_DIR)/euc_jp_prop.c $(ONIG_DIR)/regenc.h +$(BUILD_DIR)/sjis_prop.obj: $(ONIG_DIR)/sjis_prop.c $(ONIG_DIR)/regenc.h +$(BUILD_DIR)/unicode_unfold_key.obj: $(ONIG_DIR)/unicode_unfold_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/unicode_fold1_key.obj: $(ONIG_DIR)/unicode_fold1_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/unicode_fold2_key.obj: $(ONIG_DIR)/unicode_fold2_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/unicode_fold3_key.obj: $(ONIG_DIR)/unicode_fold3_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h + +# C library test +ctest: $(testc) + .\$(testc) + +# POSIX C library test +ptest: $(testp) + .\$(testp) + +$(testc): $(testc).c $(libname) + $(CC) -nologo /Fe:$(testc) -DONIG_EXTERN=extern $(testc).c $(libname) + +$(testp): $(testc).c $(dlllib) + $(CC) -nologo -DPOSIX_TEST /Fe:$(testp) $(testc).c $(dlllib) + +$(testc)u: $(testc)u.c $(libname) + $(CC) -nologo /Fe:$(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname) + +clean: + del $(BUILD_DIR)\*.obj $(BUILD_DIR)\*.lib $(BUILD_DIR)\*.exp $(BUILD_DIR)\*.dll $(BUILD_DIR)\$(testp).exe $(BUILD_DIR)\$(testc).exe $(BUILD_DIR)\$(testc).obj + + +samples: all + $(CC) $(CFLAGS) -I. /Fe:simple $(ONIG_DIR)\sample\simple.c $(dlllib) + $(CC) $(CFLAGS) -I. /Fe:posix $(ONIG_DIR)\sample\posix.c $(dlllib) + $(CC) $(CFLAGS) -I. /Fe:names $(ONIG_DIR)\sample\names.c $(dlllib) + $(CC) $(CFLAGS) -I. /Fe:listcap $(ONIG_DIR)\sample\listcap.c $(dlllib) + $(CC) $(CFLAGS) -I. /Fe:sql $(ONIG_DIR)\sample\sql.c $(dlllib) + $(CC) $(CFLAGS) -I. /Fe:encode $(ONIG_DIR)\sample\encode.c $(dlllib) + $(CC) $(CFLAGS) -I. /Fe:syntax $(ONIG_DIR)\sample\syntax.c $(dlllib) diff --git a/src/ascii.c b/src/ascii.c index eb38944..e83e4d6 100644 --- a/src/ascii.c +++ b/src/ascii.c @@ -113,6 +113,6 @@ OnigEncodingType OnigEncodingASCII = { init, 0, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; @@ -151,7 +151,7 @@ big5_left_adjust_char_head(const UChar* start, const UChar* s) p++; break; } - } + } } len = enclen(ONIG_ENCODING_BIG5, p); if (p + len > s) return (UChar* )p; @@ -187,6 +187,6 @@ OnigEncodingType OnigEncodingBIG5 = { NULL, /* init */ NULL, /* is_initialized */ is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/config.h.cmake.in b/src/config.h.cmake.in index b59cc8d..f49177f 100644 --- a/src/config.h.cmake.in +++ b/src/config.h.cmake.in @@ -13,27 +13,9 @@ */ #cmakedefine HAVE_ALLOCA_H ${HAVE_ALLOCA_H} -/* Define if compilerr supports prototypes */ -#cmakedefine HAVE_PROTOTYPES ${HAVE_PROTOTYPES} - -/* Define if compiler supports stdarg prototypes */ -#cmakedefine HAVE_STDARG_PROTOTYPES ${HAVE_STDARG_PROTOTYPES} - /* Define to 1 if you have the <stdint.h> header file. */ #cmakedefine HAVE_STDINT_H ${HAVE_STDINT_H} -/* Define to 1 if you have the <stdlib.h> header file. */ -#cmakedefine HAVE_STDLIB_H ${HAVE_STDLIB_H} - -/* Define to 1 if you have the <strings.h> header file. */ -#cmakedefine HAVE_STRINGS_H ${HAVE_STRINGS_H} - -/* Define to 1 if you have the <string.h> header file. */ -#cmakedefine HAVE_STRING_H ${HAVE_STRING_H} - -/* Define to 1 if you have the <limits.h> header file. */ -#cmakedefine HAVE_LIMITS_H ${HAVE_LIMITS_H} - /* Define to 1 if you have the <sys/times.h> header file. */ #cmakedefine HAVE_SYS_TIMES_H ${HAVE_SYS_TIMES_H} @@ -64,9 +46,6 @@ /* The size of `short', as computed by sizeof. */ #cmakedefine SIZEOF_SHORT ${SIZEOF_SHORT} -/* Define to 1 if you have the ANSI C header files. */ -#cmakedefine STDC_HEADERS ${STDC_HEADERS} - /* Define if enable CR+NL as line terminator */ #cmakedefine USE_CRNL_AS_LINE_TERMINATOR ${USE_CRNL_AS_LINE_TERMINATOR} diff --git a/src/config.h.win32 b/src/config.h.win32 index 12609df..a8a8426 100644 --- a/src/config.h.win32 +++ b/src/config.h.win32 @@ -1,81 +1,52 @@ -#define STDC_HEADERS 1
-#define HAVE_SYS_TYPES_H 1
-#define HAVE_SYS_STAT_H 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_MEMORY_H 1
-#define HAVE_FLOAT_H 1
-#define HAVE_OFF_T 1
-#define SIZEOF_INT 4
-#define SIZEOF_SHORT 2
-#define SIZEOF_LONG 4
-#define SIZEOF_LONG_LONG 8
-#define SIZEOF___INT64 8
-#define SIZEOF_OFF_T 4
-#define SIZEOF_VOIDP 4
-#define SIZEOF_FLOAT 4
-#define SIZEOF_DOUBLE 8
-#define HAVE_PROTOTYPES 1
-#define TOKEN_PASTE(x,y) x##y
-#define HAVE_STDARG_PROTOTYPES 1
-#ifndef NORETURN
-#if _MSC_VER > 1100
-#define NORETURN(x) __declspec(noreturn) x
-#else
-#define NORETURN(x) x
-#endif
-#endif
-#define HAVE_DECL_SYS_NERR 1
-#define STDC_HEADERS 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_LIMITS_H 1
-#define HAVE_FCNTL_H 1
-#define HAVE_SYS_UTIME_H 1
-#define HAVE_MEMORY_H 1
-#define uid_t int
-#define gid_t int
-#define GETGROUPS_T int
-#define HAVE_ALLOCA 1
-#define HAVE_DUP2 1
-#define HAVE_MEMCMP 1
-#define HAVE_MEMMOVE 1
-#define HAVE_MKDIR 1
-#define HAVE_STRCASECMP 1
-#define HAVE_STRNCASECMP 1
-#define HAVE_STRERROR 1
-#define HAVE_STRFTIME 1
-#define HAVE_STRCHR 1
-#define HAVE_STRSTR 1
-#define HAVE_STRTOD 1
-#define HAVE_STRTOL 1
-#define HAVE_STRTOUL 1
-#define HAVE_FLOCK 1
-#define HAVE_VSNPRINTF 1
-#define HAVE_FINITE 1
-#define HAVE_FMOD 1
-#define HAVE_FREXP 1
-#define HAVE_HYPOT 1
-#define HAVE_MODF 1
-#define HAVE_WAITPID 1
-#define HAVE_CHSIZE 1
-#define HAVE_TIMES 1
-#define HAVE__SETJMP 1
-#define HAVE_TELLDIR 1
-#define HAVE_SEEKDIR 1
-#define HAVE_MKTIME 1
-#define HAVE_COSH 1
-#define HAVE_SINH 1
-#define HAVE_TANH 1
-#define HAVE_EXECVE 1
-#define HAVE_TZNAME 1
-#define HAVE_DAYLIGHT 1
-#define SETPGRP_VOID 1
-#define inline __inline
-#define NEED_IO_SEEK_BETWEEN_RW 1
-#define RSHIFT(x,y) ((x)>>(int)y)
-#define FILE_COUNT _cnt
-#define FILE_READPTR _ptr
-#define DEFAULT_KCODE KCODE_NONE
-#define DLEXT ".so"
-#define DLEXT2 ".dll"
+#define HAVE_SYS_TYPES_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_MEMORY_H 1 +#define HAVE_OFF_T 1 +#define SIZEOF_INT 4 +#define SIZEOF_SHORT 2 +#define SIZEOF_LONG 4 +#define SIZEOF_LONG_LONG 8 +#define SIZEOF___INT64 8 +#define SIZEOF_OFF_T 4 +#define SIZEOF_VOIDP 4 +#define SIZEOF_FLOAT 4 +#define SIZEOF_DOUBLE 8 +#define SIZEOF_SIZE_T 4 +#define TOKEN_PASTE(x,y) x##y +#ifndef NORETURN +#if _MSC_VER > 1100 +#define NORETURN(x) __declspec(noreturn) x +#else +#define NORETURN(x) x +#endif +#endif +#define HAVE_DECL_SYS_NERR 1 +#define HAVE_FCNTL_H 1 +#define HAVE_SYS_UTIME_H 1 +#define HAVE_MEMORY_H 1 +#define uid_t int +#define gid_t int +#define GETGROUPS_T int +#define HAVE_ALLOCA 1 +#define HAVE_DUP2 1 +#define HAVE_MKDIR 1 +#define HAVE_FLOCK 1 +#define HAVE_VSNPRINTF 1 +#define HAVE_FINITE 1 +#define HAVE_HYPOT 1 +#define HAVE_WAITPID 1 +#define HAVE_CHSIZE 1 +#define HAVE_TIMES 1 +#define HAVE_TELLDIR 1 +#define HAVE_SEEKDIR 1 +#define HAVE_EXECVE 1 +#define HAVE_DAYLIGHT 1 +#define SETPGRP_VOID 1 +#define inline __inline +#define NEED_IO_SEEK_BETWEEN_RW 1 +#define RSHIFT(x,y) ((x)>>(int)y) +#define FILE_COUNT _cnt +#define FILE_READPTR _ptr +#define DEFAULT_KCODE KCODE_NONE +#define DLEXT ".so" +#define DLEXT2 ".dll" diff --git a/src/config.h.win64 b/src/config.h.win64 index e892086..59485fa 100644 --- a/src/config.h.win64 +++ b/src/config.h.win64 @@ -1,81 +1,52 @@ -#define STDC_HEADERS 1
-#define HAVE_SYS_TYPES_H 1
-#define HAVE_SYS_STAT_H 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_MEMORY_H 1
-#define HAVE_FLOAT_H 1
-#define HAVE_OFF_T 1
-#define SIZEOF_INT 4
-#define SIZEOF_SHORT 2
-#define SIZEOF_LONG 4
-#define SIZEOF_LONG_LONG 8
-#define SIZEOF___INT64 8
-#define SIZEOF_OFF_T 4
-#define SIZEOF_VOIDP 8
-#define SIZEOF_FLOAT 4
-#define SIZEOF_DOUBLE 8
-#define HAVE_PROTOTYPES 1
-#define TOKEN_PASTE(x,y) x##y
-#define HAVE_STDARG_PROTOTYPES 1
-#ifndef NORETURN
-#if _MSC_VER > 1100
-#define NORETURN(x) __declspec(noreturn) x
-#else
-#define NORETURN(x) x
-#endif
-#endif
-#define HAVE_DECL_SYS_NERR 1
-#define STDC_HEADERS 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_LIMITS_H 1
-#define HAVE_FCNTL_H 1
-#define HAVE_SYS_UTIME_H 1
-#define HAVE_MEMORY_H 1
-#define uid_t int
-#define gid_t int
-#define GETGROUPS_T int
-#define HAVE_ALLOCA 1
-#define HAVE_DUP2 1
-#define HAVE_MEMCMP 1
-#define HAVE_MEMMOVE 1
-#define HAVE_MKDIR 1
-#define HAVE_STRCASECMP 1
-#define HAVE_STRNCASECMP 1
-#define HAVE_STRERROR 1
-#define HAVE_STRFTIME 1
-#define HAVE_STRCHR 1
-#define HAVE_STRSTR 1
-#define HAVE_STRTOD 1
-#define HAVE_STRTOL 1
-#define HAVE_STRTOUL 1
-#define HAVE_FLOCK 1
-#define HAVE_VSNPRINTF 1
-#define HAVE_FINITE 1
-#define HAVE_FMOD 1
-#define HAVE_FREXP 1
-#define HAVE_HYPOT 1
-#define HAVE_MODF 1
-#define HAVE_WAITPID 1
-#define HAVE_CHSIZE 1
-#define HAVE_TIMES 1
-#define HAVE__SETJMP 1
-#define HAVE_TELLDIR 1
-#define HAVE_SEEKDIR 1
-#define HAVE_MKTIME 1
-#define HAVE_COSH 1
-#define HAVE_SINH 1
-#define HAVE_TANH 1
-#define HAVE_EXECVE 1
-#define HAVE_TZNAME 1
-#define HAVE_DAYLIGHT 1
-#define SETPGRP_VOID 1
-#define inline __inline
-#define NEED_IO_SEEK_BETWEEN_RW 1
-#define RSHIFT(x,y) ((x)>>(int)y)
-#define FILE_COUNT _cnt
-#define FILE_READPTR _ptr
-#define DEFAULT_KCODE KCODE_NONE
-#define DLEXT ".so"
-#define DLEXT2 ".dll"
+#define HAVE_SYS_TYPES_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_MEMORY_H 1 +#define HAVE_OFF_T 1 +#define SIZEOF_INT 4 +#define SIZEOF_SHORT 2 +#define SIZEOF_LONG 4 +#define SIZEOF_LONG_LONG 8 +#define SIZEOF___INT64 8 +#define SIZEOF_OFF_T 4 +#define SIZEOF_VOIDP 8 +#define SIZEOF_FLOAT 4 +#define SIZEOF_DOUBLE 8 +#define SIZEOF_SIZE_T 8 +#define TOKEN_PASTE(x,y) x##y +#ifndef NORETURN +#if _MSC_VER > 1100 +#define NORETURN(x) __declspec(noreturn) x +#else +#define NORETURN(x) x +#endif +#endif +#define HAVE_DECL_SYS_NERR 1 +#define HAVE_FCNTL_H 1 +#define HAVE_SYS_UTIME_H 1 +#define HAVE_MEMORY_H 1 +#define uid_t int +#define gid_t int +#define GETGROUPS_T int +#define HAVE_ALLOCA 1 +#define HAVE_DUP2 1 +#define HAVE_MKDIR 1 +#define HAVE_FLOCK 1 +#define HAVE_VSNPRINTF 1 +#define HAVE_FINITE 1 +#define HAVE_HYPOT 1 +#define HAVE_WAITPID 1 +#define HAVE_CHSIZE 1 +#define HAVE_TIMES 1 +#define HAVE_TELLDIR 1 +#define HAVE_SEEKDIR 1 +#define HAVE_EXECVE 1 +#define HAVE_DAYLIGHT 1 +#define SETPGRP_VOID 1 +#define inline __inline +#define NEED_IO_SEEK_BETWEEN_RW 1 +#define RSHIFT(x,y) ((x)>>(int)y) +#define FILE_COUNT _cnt +#define FILE_READPTR _ptr +#define DEFAULT_KCODE KCODE_NONE +#define DLEXT ".so" +#define DLEXT2 ".dll" diff --git a/src/config.h.windows.in b/src/config.h.windows.in index 57fb426..0a18db8 100644 --- a/src/config.h.windows.in +++ b/src/config.h.windows.in @@ -1,85 +1,57 @@ -#define STDC_HEADERS 1
-#define HAVE_SYS_TYPES_H 1
-#define HAVE_SYS_STAT_H 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_MEMORY_H 1
-#define HAVE_FLOAT_H 1
-#define HAVE_OFF_T 1
-#define SIZEOF_INT 4
-#define SIZEOF_SHORT 2
-#define SIZEOF_LONG 4
-#define SIZEOF_LONG_LONG 8
-#define SIZEOF___INT64 8
-#define SIZEOF_OFF_T 4
-#ifdef _WIN64
-#define SIZEOF_VOIDP 8
-#else
-#define SIZEOF_VOIDP 4
-#endif
-#define SIZEOF_FLOAT 4
-#define SIZEOF_DOUBLE 8
-#define HAVE_PROTOTYPES 1
-#define TOKEN_PASTE(x,y) x##y
-#define HAVE_STDARG_PROTOTYPES 1
-#ifndef NORETURN
-#if _MSC_VER > 1100
-#define NORETURN(x) __declspec(noreturn) x
-#else
-#define NORETURN(x) x
-#endif
-#endif
-#define HAVE_DECL_SYS_NERR 1
-#define STDC_HEADERS 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_LIMITS_H 1
-#define HAVE_FCNTL_H 1
-#define HAVE_SYS_UTIME_H 1
-#define HAVE_MEMORY_H 1
-#define uid_t int
-#define gid_t int
-#define GETGROUPS_T int
-#define HAVE_ALLOCA 1
-#define HAVE_DUP2 1
-#define HAVE_MEMCMP 1
-#define HAVE_MEMMOVE 1
-#define HAVE_MKDIR 1
-#define HAVE_STRCASECMP 1
-#define HAVE_STRNCASECMP 1
-#define HAVE_STRERROR 1
-#define HAVE_STRFTIME 1
-#define HAVE_STRCHR 1
-#define HAVE_STRSTR 1
-#define HAVE_STRTOD 1
-#define HAVE_STRTOL 1
-#define HAVE_STRTOUL 1
-#define HAVE_FLOCK 1
-#define HAVE_VSNPRINTF 1
-#define HAVE_FINITE 1
-#define HAVE_FMOD 1
-#define HAVE_FREXP 1
-#define HAVE_HYPOT 1
-#define HAVE_MODF 1
-#define HAVE_WAITPID 1
-#define HAVE_CHSIZE 1
-#define HAVE_TIMES 1
-#define HAVE__SETJMP 1
-#define HAVE_TELLDIR 1
-#define HAVE_SEEKDIR 1
-#define HAVE_MKTIME 1
-#define HAVE_COSH 1
-#define HAVE_SINH 1
-#define HAVE_TANH 1
-#define HAVE_EXECVE 1
-#define HAVE_TZNAME 1
-#define HAVE_DAYLIGHT 1
-#define SETPGRP_VOID 1
-#define inline __inline
-#define NEED_IO_SEEK_BETWEEN_RW 1
-#define RSHIFT(x,y) ((x)>>(int)y)
-#define FILE_COUNT _cnt
-#define FILE_READPTR _ptr
-#define DEFAULT_KCODE KCODE_NONE
-#define DLEXT ".so"
-#define DLEXT2 ".dll"
+#define HAVE_SYS_TYPES_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_MEMORY_H 1 +#define HAVE_OFF_T 1 +#define SIZEOF_INT 4 +#define SIZEOF_SHORT 2 +#define SIZEOF_LONG 4 +#define SIZEOF_LONG_LONG 8 +#define SIZEOF___INT64 8 +#define SIZEOF_OFF_T 4 +#ifdef _WIN64 +#define SIZEOF_VOIDP 8 +#define SIZEOF_SIZE_T 8 +#else +#define SIZEOF_VOIDP 4 +#define SIZEOF_SIZE_T 4 +#endif +#define SIZEOF_FLOAT 4 +#define SIZEOF_DOUBLE 8 +#define TOKEN_PASTE(x,y) x##y +#ifndef NORETURN +#if _MSC_VER > 1100 +#define NORETURN(x) __declspec(noreturn) x +#else +#define NORETURN(x) x +#endif +#endif +#define HAVE_DECL_SYS_NERR 1 +#define HAVE_FCNTL_H 1 +#define HAVE_SYS_UTIME_H 1 +#define HAVE_MEMORY_H 1 +#define uid_t int +#define gid_t int +#define GETGROUPS_T int +#define HAVE_ALLOCA 1 +#define HAVE_DUP2 1 +#define HAVE_MKDIR 1 +#define HAVE_FLOCK 1 +#define HAVE_VSNPRINTF 1 +#define HAVE_FINITE 1 +#define HAVE_HYPOT 1 +#define HAVE_WAITPID 1 +#define HAVE_CHSIZE 1 +#define HAVE_TIMES 1 +#define HAVE_TELLDIR 1 +#define HAVE_SEEKDIR 1 +#define HAVE_EXECVE 1 +#define HAVE_DAYLIGHT 1 +#define SETPGRP_VOID 1 +#define inline __inline +#define NEED_IO_SEEK_BETWEEN_RW 1 +#define RSHIFT(x,y) ((x)>>(int)y) +#define FILE_COUNT _cnt +#define FILE_READPTR _ptr +#define DEFAULT_KCODE KCODE_NONE +#define DLEXT ".so" +#define DLEXT2 ".dll" diff --git a/src/cp1251.c b/src/cp1251.c index e217037..7b19855 100644 --- a/src/cp1251.c +++ b/src/cp1251.c @@ -200,6 +200,6 @@ OnigEncodingType OnigEncodingCP1251 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/euc_jp.c b/src/euc_jp.c index 5d3c1f9..c1ab89e 100644 --- a/src/euc_jp.c +++ b/src/euc_jp.c @@ -151,7 +151,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf) #if 1 if (enclen(ONIG_ENCODING_EUC_JP, buf) != (p - buf)) return ONIGERR_INVALID_CODE_POINT_VALUE; -#endif +#endif return (int )(p - buf); } @@ -307,6 +307,6 @@ OnigEncodingType OnigEncodingEUC_JP = { NULL, /* init */ NULL, /* is_initialized */ is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1_OR_0, 0, 0 }; diff --git a/src/euc_kr.c b/src/euc_kr.c index def311b..9b62514 100644 --- a/src/euc_kr.c +++ b/src/euc_kr.c @@ -161,7 +161,9 @@ OnigEncodingType OnigEncodingEUC_KR = { euckr_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1_OR_0, + 0, 0 }; /* Same with OnigEncodingEUC_KR except the name */ @@ -185,6 +187,6 @@ OnigEncodingType OnigEncodingEUC_CN = { NULL, /* init */ NULL, /* is_initialized */ is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1_OR_0, 0, 0 }; diff --git a/src/euc_tw.c b/src/euc_tw.c index 8738598..7683336 100644 --- a/src/euc_tw.c +++ b/src/euc_tw.c @@ -168,6 +168,6 @@ OnigEncodingType OnigEncodingEUC_TW = { NULL, /* init */ NULL, /* is_initialized */ is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/gb18030.c b/src/gb18030.c index d4a1108..7654432 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -535,6 +535,6 @@ OnigEncodingType OnigEncodingGB18030 = { NULL, /* init */ NULL, /* is_initialized */ is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/gperf_fold_key_conv.py b/src/gperf_fold_key_conv.py index 376d343..f453186 100755 --- a/src/gperf_fold_key_conv.py +++ b/src/gperf_fold_key_conv.py @@ -54,11 +54,13 @@ def parse_line(s, key_len): def parse_file(f, key_len): print "/* This file was converted by gperf_fold_key_conv.py\n from gperf output file. */" - line = f.readline() - while line: + while True: + line = f.readline() + if not line: + break + s = parse_line(line, key_len) print s - line = f.readline() # main diff --git a/src/iso8859_1.c b/src/iso8859_1.c index ff47b80..0ce70a6 100644 --- a/src/iso8859_1.c +++ b/src/iso8859_1.c @@ -272,6 +272,6 @@ OnigEncodingType OnigEncodingISO_8859_1 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/iso8859_10.c b/src/iso8859_10.c index f9804e2..4a34b38 100644 --- a/src/iso8859_10.c +++ b/src/iso8859_10.c @@ -239,6 +239,6 @@ OnigEncodingType OnigEncodingISO_8859_10 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/iso8859_11.c b/src/iso8859_11.c index 108ee8a..da8fda0 100644 --- a/src/iso8859_11.c +++ b/src/iso8859_11.c @@ -96,6 +96,6 @@ OnigEncodingType OnigEncodingISO_8859_11 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/iso8859_13.c b/src/iso8859_13.c index 9585355..23a0265 100644 --- a/src/iso8859_13.c +++ b/src/iso8859_13.c @@ -228,6 +228,6 @@ OnigEncodingType OnigEncodingISO_8859_13 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/iso8859_14.c b/src/iso8859_14.c index 83fc551..7281e93 100644 --- a/src/iso8859_14.c +++ b/src/iso8859_14.c @@ -241,6 +241,6 @@ OnigEncodingType OnigEncodingISO_8859_14 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/iso8859_15.c b/src/iso8859_15.c index 3a7ad05..3d9f571 100644 --- a/src/iso8859_15.c +++ b/src/iso8859_15.c @@ -235,6 +235,6 @@ OnigEncodingType OnigEncodingISO_8859_15 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/iso8859_16.c b/src/iso8859_16.c index 02022d9..a6977dd 100644 --- a/src/iso8859_16.c +++ b/src/iso8859_16.c @@ -237,6 +237,6 @@ OnigEncodingType OnigEncodingISO_8859_16 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/iso8859_2.c b/src/iso8859_2.c index ecdbb99..4f994c4 100644 --- a/src/iso8859_2.c +++ b/src/iso8859_2.c @@ -235,6 +235,6 @@ OnigEncodingType OnigEncodingISO_8859_2 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/iso8859_3.c b/src/iso8859_3.c index 739f1c9..944a7ae 100644 --- a/src/iso8859_3.c +++ b/src/iso8859_3.c @@ -235,6 +235,6 @@ OnigEncodingType OnigEncodingISO_8859_3 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/iso8859_4.c b/src/iso8859_4.c index 4f2b6a0..3a7c210 100644 --- a/src/iso8859_4.c +++ b/src/iso8859_4.c @@ -237,6 +237,6 @@ OnigEncodingType OnigEncodingISO_8859_4 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/iso8859_5.c b/src/iso8859_5.c index cf41061..0a8b7ec 100644 --- a/src/iso8859_5.c +++ b/src/iso8859_5.c @@ -226,6 +226,6 @@ OnigEncodingType OnigEncodingISO_8859_5 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/iso8859_6.c b/src/iso8859_6.c index 1ffe99f..1c16c79 100644 --- a/src/iso8859_6.c +++ b/src/iso8859_6.c @@ -96,6 +96,6 @@ OnigEncodingType OnigEncodingISO_8859_6 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/iso8859_7.c b/src/iso8859_7.c index 87288c2..0877b6f 100644 --- a/src/iso8859_7.c +++ b/src/iso8859_7.c @@ -222,6 +222,6 @@ OnigEncodingType OnigEncodingISO_8859_7 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/iso8859_8.c b/src/iso8859_8.c index 8f162a4..bd3e94d 100644 --- a/src/iso8859_8.c +++ b/src/iso8859_8.c @@ -96,6 +96,6 @@ OnigEncodingType OnigEncodingISO_8859_8 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/iso8859_9.c b/src/iso8859_9.c index 52589cf..8819f4a 100644 --- a/src/iso8859_9.c +++ b/src/iso8859_9.c @@ -228,6 +228,6 @@ OnigEncodingType OnigEncodingISO_8859_9 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; @@ -250,6 +250,6 @@ OnigEncodingType OnigEncodingKOI8 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/koi8_r.c b/src/koi8_r.c index 8adc399..5994ebe 100644 --- a/src/koi8_r.c +++ b/src/koi8_r.c @@ -212,6 +212,6 @@ OnigEncodingType OnigEncodingKOI8_R = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/make_unicode_fold.sh b/src/make_unicode_fold.sh index 45e9566..d5828e1 100755 --- a/src/make_unicode_fold.sh +++ b/src/make_unicode_fold.sh @@ -23,6 +23,13 @@ ${GPERF} ${GPERF_OPT} -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf ${GPERF} ${GPERF_OPT} -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf > ${TMP3} ./gperf_fold_key_conv.py 3 < ${TMP3} > unicode_fold3_key.c +# remove redundant EOLs before EOF +perl -i -pe 'BEGIN{undef $/}s/\n\n*\z/\n/;' unicode_fold_data.c +perl -i -pe 'BEGIN{undef $/}s/\n\n*\z/\n/;' unicode_fold1_key.c +perl -i -pe 'BEGIN{undef $/}s/\n\n*\z/\n/;' unicode_fold2_key.c +perl -i -pe 'BEGIN{undef $/}s/\n\n*\z/\n/;' unicode_fold3_key.c +perl -i -pe 'BEGIN{undef $/}s/\n\n*\z/\n/;' unicode_unfold_key.c + rm -f ${TMP0} ${TMP1} ${TMP2} ${TMP3} rm -f unicode_unfold_key.gperf unicode_fold1_key.gperf unicode_fold2_key.gperf unicode_fold3_key.gperf diff --git a/src/make_unicode_fold_data.py b/src/make_unicode_fold_data.py index 5c87d4c..783988c 100755 --- a/src/make_unicode_fold_data.py +++ b/src/make_unicode_fold_data.py @@ -5,7 +5,6 @@ import sys import re -import codecs SOURCE_FILE = 'CaseFolding.txt' GPERF_UNFOLD_KEY_FILE = 'unicode_unfold_key.gperf' diff --git a/src/make_unicode_property_data.py b/src/make_unicode_property_data.py index 9a48ced..dc3071a 100755 --- a/src/make_unicode_property_data.py +++ b/src/make_unicode_property_data.py @@ -6,8 +6,6 @@ import sys import re -INCLUDE_GRAPHEME_CLUSTER_DATA = False - POSIX_LIST = [ 'NEWLINE', 'Alpha', 'Blank', 'Cntrl', 'Digit', 'Graph', 'Lower', 'Print', 'Punct', 'Space', 'Upper', 'XDigit', 'Word', 'Alnum', 'ASCII' @@ -427,9 +425,17 @@ argv = sys.argv argc = len(argv) POSIX_ONLY = False -if argc >= 2: - if argv[1] == '-posix': +INCLUDE_GRAPHEME_CLUSTER_DATA = False + +for i in range(1, argc): + arg = argv[i] + if arg == '-posix': POSIX_ONLY = True + elif arg == '-gc': + INCLUDE_GRAPHEME_CLUSTER_DATA = True + else: + print >> sys.stderr, "Invalid argument: %s" % arg + OUTPUT_LIST_MODE = not(POSIX_ONLY) @@ -441,11 +447,11 @@ with open('UnicodeData.txt', 'r') as f: PROPS = DIC.keys() PROPS = list_sub(PROPS, POSIX_LIST) -dic, props = parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property') +parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property') dic, props = parse_and_merge_properties('Scripts.txt', 'Script') DIC['Unknown'] = inverse_ranges(add_ranges_in_dic(dic)) -dic, props = parse_and_merge_properties('PropList.txt', 'Binary Property') -dic, props = parse_and_merge_properties('emoji-data.txt', 'Emoji Property') +parse_and_merge_properties('PropList.txt', 'Binary Property') +parse_and_merge_properties('emoji-data.txt', 'Emoji Property') PROPS.append('Unknown') KDIC['Unknown'] = 'Script' diff --git a/src/oniguruma.h b/src/oniguruma.h index ab917c6..746445a 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -36,9 +36,9 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 6 #define ONIGURUMA_VERSION_MINOR 9 -#define ONIGURUMA_VERSION_TEENY 0 +#define ONIGURUMA_VERSION_TEENY 1 -#define ONIGURUMA_VERSION_INT 60900 +#define ONIGURUMA_VERSION_INT 60901 #ifndef P_ #if defined(__STDC__) || defined(_WIN32) diff --git a/src/regcomp.c b/src/regcomp.c index 83b9252..400368d 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -138,6 +138,17 @@ int_multiply_cmp(int x, int y, int v) return 1; } +extern int +onig_positive_int_multiply(int x, int y) +{ + if (x == 0 || y == 0) return 0; + + if (x < INT_MAX / y) + return x * y; + else + return -1; +} + #ifndef PLATFORM_UNALIGNED_WORD_ACCESS static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; @@ -152,7 +163,7 @@ swap_node(Node* a, Node* b) if (NODE_TYPE(a) == NODE_STRING) { StrNode* sn = STR_(a); - if (sn->capa == 0) { + if (sn->capacity == 0) { int len = (int )(sn->end - sn->s); sn->s = sn->buf; sn->end = sn->s + len; @@ -161,7 +172,7 @@ swap_node(Node* a, Node* b) if (NODE_TYPE(b) == NODE_STRING) { StrNode* sn = STR_(b); - if (sn->capa == 0) { + if (sn->capacity == 0) { int len = (int )(sn->end - sn->s); sn->s = sn->buf; sn->end = sn->s + len; @@ -970,8 +981,9 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) if (r != 0) return r; for (i = 0; i < n; i++) { - r = add_opcode_rel_addr(reg, OP_PUSH, - (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); + int v = onig_positive_int_multiply(n - i, tlen); + if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + r = add_opcode_rel_addr(reg, OP_PUSH, v + (n - i - 1) * SIZE_OP_PUSH); if (r != 0) return r; r = compile_tree(NODE_QUANT_BODY(qn), reg, env); if (r != 0) return r; @@ -991,49 +1003,49 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) } static int -compile_length_option_node(EnclosureNode* node, regex_t* reg) +compile_length_option_node(BagNode* node, regex_t* reg) { int tlen; OnigOptionType prev = reg->options; reg->options = node->o.options; - tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg); + tlen = compile_length_tree(NODE_BAG_BODY(node), reg); reg->options = prev; return tlen; } static int -compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) +compile_option_node(BagNode* node, regex_t* reg, ScanEnv* env) { int r; OnigOptionType prev = reg->options; reg->options = node->o.options; - r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); + r = compile_tree(NODE_BAG_BODY(node), reg, env); reg->options = prev; return r; } static int -compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) +compile_length_bag_node(BagNode* node, regex_t* reg) { int len; int tlen; - if (node->type == ENCLOSURE_OPTION) + if (node->type == BAG_OPTION) return compile_length_option_node(node, reg); - if (NODE_ENCLOSURE_BODY(node)) { - tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg); + if (NODE_BAG_BODY(node)) { + tlen = compile_length_tree(NODE_BAG_BODY(node), reg); if (tlen < 0) return tlen; } else tlen = 0; switch (node->type) { - case ENCLOSURE_MEMORY: + case BAG_MEMORY: #ifdef USE_CALL if (node->m.regnum == 0 && NODE_IS_CALLED(node)) { @@ -1069,23 +1081,27 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) } break; - case ENCLOSURE_STOP_BACKTRACK: + case BAG_STOP_BACKTRACK: if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) { - QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node)); + int v; + QuantNode* qn; + + qn = QUANT_(NODE_BAG_BODY(node)); tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); if (tlen < 0) return tlen; - len = tlen * qn->lower - + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP; + v = onig_positive_int_multiply(qn->lower, tlen); + if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + len = v + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP; } else { len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END; } break; - case ENCLOSURE_IF_ELSE: + case BAG_IF_ELSE: { - Node* cond = NODE_ENCLOSURE_BODY(node); + Node* cond = NODE_BAG_BODY(node); Node* Then = node->te.Then; Node* Else = node->te.Else; @@ -1109,18 +1125,18 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) } break; - default: - return ONIGERR_TYPE_BUG; + case BAG_OPTION: + len = tlen; break; } return len; } -static int get_char_length_tree(Node* node, regex_t* reg, int* len); +static int get_char_len_node(Node* node, regex_t* reg, int* len); static int -compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) +compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env) { int r; int len; @@ -1133,12 +1149,12 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) NODE_STATUS_ADD(node, ADDR_FIXED); r = add_abs_addr(reg, (int )node->m.called_addr); if (r != 0) return r; - len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg); + len = compile_length_tree(NODE_BAG_BODY(node), reg); len += SIZE_OP_RETURN; r = add_opcode_rel_addr(reg, OP_JUMP, len); if (r != 0) return r; - r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); + r = compile_tree(NODE_BAG_BODY(node), reg, env); if (r != 0) return r; r = add_opcode(reg, OP_RETURN); return r; @@ -1151,7 +1167,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) NODE_STATUS_ADD(node, ADDR_FIXED); r = add_abs_addr(reg, (int )node->m.called_addr); if (r != 0) return r; - len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg); + len = compile_length_tree(NODE_BAG_BODY(node), reg); len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)) len += (NODE_IS_RECURSION(node) @@ -1172,7 +1188,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) if (r != 0) return r; r = add_mem_num(reg, node->m.regnum); if (r != 0) return r; - r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); + r = compile_tree(NODE_BAG_BODY(node), reg, env); if (r != 0) return r; #ifdef USE_CALL @@ -1201,22 +1217,22 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) } static int -compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) +compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env) { int r, len; switch (node->type) { - case ENCLOSURE_MEMORY: - r = compile_enclosure_memory_node(node, reg, env); + case BAG_MEMORY: + r = compile_bag_memory_node(node, reg, env); break; - case ENCLOSURE_OPTION: + case BAG_OPTION: r = compile_option_node(node, reg, env); break; - case ENCLOSURE_STOP_BACKTRACK: + case BAG_STOP_BACKTRACK: if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) { - QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node)); + QuantNode* qn = QUANT_(NODE_BAG_BODY(node)); r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); if (r != 0) return r; @@ -1235,16 +1251,16 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) else { r = add_opcode(reg, OP_ATOMIC_START); if (r != 0) return r; - r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); + r = compile_tree(NODE_BAG_BODY(node), reg, env); if (r != 0) return r; r = add_opcode(reg, OP_ATOMIC_END); } break; - case ENCLOSURE_IF_ELSE: + case BAG_IF_ELSE: { int cond_len, then_len, jump_len; - Node* cond = NODE_ENCLOSURE_BODY(node); + Node* cond = NODE_BAG_BODY(node); Node* Then = node->te.Then; Node* Else = node->te.Else; @@ -1283,10 +1299,6 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) } } break; - - default: - return ONIGERR_TYPE_BUG; - break; } return r; @@ -1304,30 +1316,30 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg) } switch (node->type) { - case ANCHOR_PREC_READ: + case ANCR_PREC_READ: len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END; break; - case ANCHOR_PREC_READ_NOT: + case ANCR_PREC_READ_NOT: len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END; break; - case ANCHOR_LOOK_BEHIND: + case ANCR_LOOK_BEHIND: len = SIZE_OP_LOOK_BEHIND + tlen; break; - case ANCHOR_LOOK_BEHIND_NOT: + case ANCR_LOOK_BEHIND_NOT: len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END; break; - case ANCHOR_WORD_BOUNDARY: - case ANCHOR_NO_WORD_BOUNDARY: + case ANCR_WORD_BOUNDARY: + case ANCR_NO_WORD_BOUNDARY: #ifdef USE_WORD_BEGIN_END - case ANCHOR_WORD_BEGIN: - case ANCHOR_WORD_END: + case ANCR_WORD_BEGIN: + case ANCR_WORD_END: #endif len = SIZE_OP_WORD_BOUNDARY; break; - case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: - case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: + case ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: + case ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: len = SIZE_OPCODE; break; @@ -1346,14 +1358,14 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) enum OpCode op; switch (node->type) { - case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break; - case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break; - case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break; - case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break; - case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break; - case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; - - case ANCHOR_WORD_BOUNDARY: + case ANCR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break; + case ANCR_END_BUF: r = add_opcode(reg, OP_END_BUF); break; + case ANCR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break; + case ANCR_END_LINE: r = add_opcode(reg, OP_END_LINE); break; + case ANCR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break; + case ANCR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; + + case ANCR_WORD_BOUNDARY: op = OP_WORD_BOUNDARY; word: r = add_opcode(reg, op); @@ -1361,27 +1373,27 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) r = add_mode(reg, (ModeType )node->ascii_mode); break; - case ANCHOR_NO_WORD_BOUNDARY: + case ANCR_NO_WORD_BOUNDARY: op = OP_NO_WORD_BOUNDARY; goto word; break; #ifdef USE_WORD_BEGIN_END - case ANCHOR_WORD_BEGIN: + case ANCR_WORD_BEGIN: op = OP_WORD_BEGIN; goto word; break; - case ANCHOR_WORD_END: + case ANCR_WORD_END: op = OP_WORD_END; goto word; break; #endif - case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: + case ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: r = add_opcode(reg, OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); break; - case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: + case ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: r = add_opcode(reg, OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); break; - case ANCHOR_PREC_READ: + case ANCR_PREC_READ: r = add_opcode(reg, OP_PREC_READ_START); if (r != 0) return r; r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); @@ -1389,7 +1401,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) r = add_opcode(reg, OP_PREC_READ_END); break; - case ANCHOR_PREC_READ_NOT: + case ANCR_PREC_READ_NOT: len = compile_length_tree(NODE_ANCHOR_BODY(node), reg); if (len < 0) return len; r = add_opcode_rel_addr(reg, OP_PREC_READ_NOT_START, len + SIZE_OP_PREC_READ_NOT_END); @@ -1399,13 +1411,13 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) r = add_opcode(reg, OP_PREC_READ_NOT_END); break; - case ANCHOR_LOOK_BEHIND: + case ANCR_LOOK_BEHIND: { int n; r = add_opcode(reg, OP_LOOK_BEHIND); if (r != 0) return r; if (node->char_len < 0) { - r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n); + r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n); if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; } else @@ -1417,7 +1429,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) } break; - case ANCHOR_LOOK_BEHIND_NOT: + case ANCR_LOOK_BEHIND_NOT: { int n; @@ -1426,7 +1438,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) len + SIZE_OP_LOOK_BEHIND_NOT_END); if (r != 0) return r; if (node->char_len < 0) { - r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n); + r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n); if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; } else @@ -1635,8 +1647,8 @@ compile_length_tree(Node* node, regex_t* reg) r = compile_length_quantifier_node(QUANT_(node), reg); break; - case NODE_ENCLOSURE: - r = compile_length_enclosure_node(ENCLOSURE_(node), reg); + case NODE_BAG: + r = compile_length_bag_node(BAG_(node), reg); break; case NODE_ANCHOR: @@ -1826,8 +1838,8 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) r = compile_quantifier_node(QUANT_(node), reg, env); break; - case NODE_ENCLOSURE: - r = compile_enclosure_node(ENCLOSURE_(node), reg, env); + case NODE_BAG: + r = compile_bag_node(BAG_(node), reg, env); break; case NODE_ANCHOR: @@ -1873,10 +1885,10 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) } break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); - if (en->type == ENCLOSURE_MEMORY) { + BagNode* en = BAG_(node); + if (en->type == BAG_MEMORY) { if (NODE_IS_NAMED_GROUP(node)) { (*counter)++; map[en->m.regnum].new_val = *counter; @@ -1890,8 +1902,8 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) r = noname_disable_map(plink, map, counter); } } - else if (en->type == ENCLOSURE_IF_ELSE) { - r = noname_disable_map(&(NODE_ENCLOSURE_BODY(en)), map, counter); + else if (en->type == BAG_IF_ELSE) { + r = noname_disable_map(&(NODE_BAG_BODY(en)), map, counter); if (r != 0) return r; if (IS_NOT_NULL(en->te.Then)) { r = noname_disable_map(&(en->te.Then), map, counter); @@ -1964,14 +1976,14 @@ renumber_by_map(Node* node, GroupNumRemap* map) r = renumber_by_map(NODE_BODY(node), map); break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); r = renumber_by_map(NODE_BODY(node), map); if (r != 0) return r; - if (en->type == ENCLOSURE_IF_ELSE) { + if (en->type == BAG_IF_ELSE) { if (IS_NOT_NULL(en->te.Then)) { r = renumber_by_map(en->te.Then, map); if (r != 0) return r; @@ -2021,14 +2033,14 @@ numbered_ref_check(Node* node) r = numbered_ref_check(NODE_BODY(node)); break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); r = numbered_ref_check(NODE_BODY(node)); if (r != 0) return r; - if (en->type == ENCLOSURE_IF_ELSE) { + if (en->type == BAG_IF_ELSE) { if (IS_NOT_NULL(en->te.Then)) { r = numbered_ref_check(en->te.Then); if (r != 0) return r; @@ -2099,14 +2111,14 @@ static int fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg) { int i, offset; - EnclosureNode* en; + BagNode* en; AbsAddrType addr; for (i = 0; i < uslist->num; i++) { if (! NODE_IS_ADDR_FIXED(uslist->us[i].target)) return ONIGERR_PARSER_BUG; - en = ENCLOSURE_(uslist->us[i].target); + en = BAG_(uslist->us[i].target); addr = en->m.called_addr; offset = uslist->us[i].offset; @@ -2122,7 +2134,7 @@ fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg) /* fixed size pattern node only */ static int -get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) +get_char_len_node1(Node* node, regex_t* reg, int* len, int level) { int tlen; int r = 0; @@ -2132,7 +2144,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) switch (NODE_TYPE(node)) { case NODE_LIST: do { - r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level); + r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level); if (r == 0) *len = distance_add(*len, tlen); } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); @@ -2143,9 +2155,9 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) int tlen2; int varlen = 0; - r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level); + r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level); while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) { - r = get_char_length_tree1(NODE_CAR(node), reg, &tlen2, level); + r = get_char_len_node1(NODE_CAR(node), reg, &tlen2, level); if (r == 0) { if (tlen != tlen2) varlen = 1; @@ -2185,7 +2197,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) *len = 0; } else { - r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level); + r = get_char_len_node1(NODE_BODY(node), reg, &tlen, level); if (r == 0) *len = distance_multiply(tlen, qn->lower); } @@ -2198,7 +2210,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) #ifdef USE_CALL case NODE_CALL: if (! NODE_IS_RECURSION(node)) - r = get_char_length_tree1(NODE_BODY(node), reg, len, level); + r = get_char_len_node1(NODE_BODY(node), reg, len, level); else r = GET_CHAR_LEN_VARLEN; break; @@ -2209,17 +2221,17 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) *len = 1; break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); switch (en->type) { - case ENCLOSURE_MEMORY: + case BAG_MEMORY: #ifdef USE_CALL if (NODE_IS_CLEN_FIXED(node)) *len = en->char_len; else { - r = get_char_length_tree1(NODE_BODY(node), reg, len, level); + r = get_char_len_node1(NODE_BODY(node), reg, len, level); if (r == 0) { en->char_len = *len; NODE_STATUS_ADD(node, CLEN_FIXED); @@ -2227,23 +2239,23 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } break; #endif - case ENCLOSURE_OPTION: - case ENCLOSURE_STOP_BACKTRACK: - r = get_char_length_tree1(NODE_BODY(node), reg, len, level); + case BAG_OPTION: + case BAG_STOP_BACKTRACK: + r = get_char_len_node1(NODE_BODY(node), reg, len, level); break; - case ENCLOSURE_IF_ELSE: + case BAG_IF_ELSE: { int clen, elen; - r = get_char_length_tree1(NODE_BODY(node), reg, &clen, level); + r = get_char_len_node1(NODE_BODY(node), reg, &clen, level); if (r == 0) { if (IS_NOT_NULL(en->te.Then)) { - r = get_char_length_tree1(en->te.Then, reg, &tlen, level); + r = get_char_len_node1(en->te.Then, reg, &tlen, level); if (r != 0) break; } else tlen = 0; if (IS_NOT_NULL(en->te.Else)) { - r = get_char_length_tree1(en->te.Else, reg, &elen, level); + r = get_char_len_node1(en->te.Else, reg, &elen, level); if (r != 0) break; } else elen = 0; @@ -2257,9 +2269,6 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } } break; - - default: - break; } } break; @@ -2281,9 +2290,9 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } static int -get_char_length_tree(Node* node, regex_t* reg, int* len) +get_char_len_node(Node* node, regex_t* reg, int* len) { - return get_char_length_tree1(node, reg, len, 0); + return get_char_len_node1(node, reg, len, 0); } /* x is not included y ==> 1 : 0 */ @@ -2450,7 +2459,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg) code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); - return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); + return onig_is_code_in_cc(reg->enc, code, cc) == 0; } break; @@ -2520,10 +2529,8 @@ get_head_value_node(Node* node, int exact, regex_t* reg) if (sn->end <= sn->s) break; - if (exact != 0 && - !NODE_STRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { - } - else { + if (exact == 0 || + ! IS_IGNORECASE(reg->options) || NODE_STRING_IS_RAW(node)) { n = node; } } @@ -2541,23 +2548,23 @@ get_head_value_node(Node* node, int exact, regex_t* reg) } break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); switch (en->type) { - case ENCLOSURE_OPTION: + case BAG_OPTION: { OnigOptionType options = reg->options; - reg->options = ENCLOSURE_(node)->o.options; + reg->options = BAG_(node)->o.options; n = get_head_value_node(NODE_BODY(node), exact, reg); reg->options = options; } break; - case ENCLOSURE_MEMORY: - case ENCLOSURE_STOP_BACKTRACK: - case ENCLOSURE_IF_ELSE: + case BAG_MEMORY: + case BAG_STOP_BACKTRACK: + case BAG_IF_ELSE: n = get_head_value_node(NODE_BODY(node), exact, reg); break; } @@ -2565,7 +2572,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) break; case NODE_ANCHOR: - if (ANCHOR_(node)->type == ANCHOR_PREC_READ) + if (ANCHOR_(node)->type == ANCR_PREC_READ) n = get_head_value_node(NODE_BODY(node), exact, reg); break; @@ -2578,7 +2585,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) } static int -check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask) +check_type_tree(Node* node, int type_mask, int bag_mask, int anchor_mask) { NodeType type; int r = 0; @@ -2591,29 +2598,29 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask) case NODE_LIST: case NODE_ALT: do { - r = check_type_tree(NODE_CAR(node), type_mask, enclosure_mask, + r = check_type_tree(NODE_CAR(node), type_mask, bag_mask, anchor_mask); } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); break; case NODE_QUANT: - r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask); + r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask); break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); - if (((1<<en->type) & enclosure_mask) == 0) + BagNode* en = BAG_(node); + if (((1<<en->type) & bag_mask) == 0) return 1; - r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask); - if (r == 0 && en->type == ENCLOSURE_IF_ELSE) { + r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask); + if (r == 0 && en->type == BAG_IF_ELSE) { if (IS_NOT_NULL(en->te.Then)) { - r = check_type_tree(en->te.Then, type_mask, enclosure_mask, anchor_mask); + r = check_type_tree(en->te.Then, type_mask, bag_mask, anchor_mask); if (r != 0) break; } if (IS_NOT_NULL(en->te.Else)) { - r = check_type_tree(en->te.Else, type_mask, enclosure_mask, anchor_mask); + r = check_type_tree(en->te.Else, type_mask, bag_mask, anchor_mask); } } } @@ -2625,7 +2632,7 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask) return 1; if (IS_NOT_NULL(NODE_BODY(node))) - r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask); + r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask); break; case NODE_GIMMICK: @@ -2666,7 +2673,7 @@ tree_min_len(Node* node, ScanEnv* env) Node* t = NODE_BODY(node); if (NODE_IS_RECURSION(node)) { if (NODE_IS_MIN_FIXED(t)) - len = ENCLOSURE_(t)->min_len; + len = BAG_(t)->min_len; } else len = tree_min_len(t, env); @@ -2717,11 +2724,11 @@ tree_min_len(Node* node, ScanEnv* env) } break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); switch (en->type) { - case ENCLOSURE_MEMORY: + case BAG_MEMORY: if (NODE_IS_MIN_FIXED(node)) len = en->min_len; else { @@ -2738,11 +2745,11 @@ tree_min_len(Node* node, ScanEnv* env) } break; - case ENCLOSURE_OPTION: - case ENCLOSURE_STOP_BACKTRACK: + case BAG_OPTION: + case BAG_STOP_BACKTRACK: len = tree_min_len(NODE_BODY(node), env); break; - case ENCLOSURE_IF_ELSE: + case BAG_IF_ELSE: { OnigLen elen; @@ -2854,11 +2861,11 @@ tree_max_len(Node* node, ScanEnv* env) } break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); switch (en->type) { - case ENCLOSURE_MEMORY: + case BAG_MEMORY: if (NODE_IS_MAX_FIXED(node)) len = en->max_len; else { @@ -2875,11 +2882,11 @@ tree_max_len(Node* node, ScanEnv* env) } break; - case ENCLOSURE_OPTION: - case ENCLOSURE_STOP_BACKTRACK: + case BAG_OPTION: + case BAG_STOP_BACKTRACK: len = tree_max_len(NODE_BODY(node), env); break; - case ENCLOSURE_IF_ELSE: + case BAG_IF_ELSE: { OnigLen tlen, elen; @@ -2931,12 +2938,12 @@ check_backrefs(Node* node, ScanEnv* env) r = check_backrefs(NODE_BODY(node), env); break; - case NODE_ENCLOSURE: + case NODE_BAG: r = check_backrefs(NODE_BODY(node), env); { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); - if (en->type == ENCLOSURE_IF_ELSE) { + if (en->type == BAG_IF_ELSE) { if (r != 0) return r; if (IS_NOT_NULL(en->te.Then)) { r = check_backrefs(en->te.Then, env); @@ -3039,11 +3046,11 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head) r = infinite_recursive_call_check(NODE_BODY(node), env, head); break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); - if (en->type == ENCLOSURE_MEMORY) { + if (en->type == BAG_MEMORY) { if (NODE_IS_MARK2(node)) return 0; else if (NODE_IS_MARK1(node)) @@ -3055,7 +3062,7 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head) NODE_STATUS_REMOVE(node, MARK2); } } - else if (en->type == ENCLOSURE_IF_ELSE) { + else if (en->type == BAG_IF_ELSE) { int eret; ret = infinite_recursive_call_check(NODE_BODY(node), env, head); @@ -3116,11 +3123,11 @@ infinite_recursive_call_check_trav(Node* node, ScanEnv* env) r = infinite_recursive_call_check_trav(NODE_BODY(node), env); break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); - if (en->type == ENCLOSURE_MEMORY) { + if (en->type == BAG_MEMORY) { if (NODE_IS_RECURSION(node) && NODE_IS_CALLED(node)) { int ret; @@ -3134,7 +3141,7 @@ infinite_recursive_call_check_trav(Node* node, ScanEnv* env) NODE_STATUS_REMOVE(node, MARK1); } } - else if (en->type == ENCLOSURE_IF_ELSE) { + else if (en->type == BAG_IF_ELSE) { if (IS_NOT_NULL(en->te.Then)) { r = infinite_recursive_call_check_trav(en->te.Then, env); if (r != 0) return r; @@ -3189,11 +3196,11 @@ recursive_call_check(Node* node) } break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); - if (en->type == ENCLOSURE_MEMORY) { + if (en->type == BAG_MEMORY) { if (NODE_IS_MARK2(node)) return 0; else if (NODE_IS_MARK1(node)) @@ -3204,7 +3211,7 @@ recursive_call_check(Node* node) NODE_STATUS_REMOVE(node, MARK2); } } - else if (en->type == ENCLOSURE_IF_ELSE) { + else if (en->type == BAG_IF_ELSE) { r = 0; if (IS_NOT_NULL(en->te.Then)) { r |= recursive_call_check(en->te.Then); @@ -3265,13 +3272,13 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state) } break; - case NODE_ENCLOSURE: + case NODE_BAG: { int ret; int state1; - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); - if (en->type == ENCLOSURE_MEMORY) { + if (en->type == BAG_MEMORY) { if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) { if (! NODE_IS_RECURSION(node)) { NODE_STATUS_ADD(node, MARK1); @@ -3294,7 +3301,7 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state) if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; - if (en->type == ENCLOSURE_IF_ELSE) { + if (en->type == BAG_IF_ELSE) { if (IS_NOT_NULL(en->te.Then)) { ret = recursive_call_check_trav(en->te.Then, env, state1); if (ret == FOUND_CALLED_NODE) @@ -3318,6 +3325,15 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state) #endif +#define IN_ALT (1<<0) +#define IN_NOT (1<<1) +#define IN_REAL_REPEAT (1<<2) +#define IN_VAR_REPEAT (1<<3) +#define IN_ZERO_REPEAT (1<<4) +#define IN_MULTI_ENTRY (1<<5) +#define IN_LOOK_BEHIND (1<<6) + + /* divide different length alternatives in look-behind. (?<=A|B) ==> (?<=A)|(?<=B) (?<!A|B) ==> (?<!A)(?<!B) @@ -3343,7 +3359,7 @@ divide_look_behind_alternatives(Node* node) NODE_CAR(np) = insert_node; } - if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { + if (anc_type == ANCR_LOOK_BEHIND_NOT) { np = node; do { NODE_SET_TYPE(np, NODE_LIST); /* alt -> list */ @@ -3358,7 +3374,7 @@ setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) int r, len; AnchorNode* an = ANCHOR_(node); - r = get_char_length_tree(NODE_ANCHOR_BODY(an), reg, &len); + r = get_char_len_node(NODE_ANCHOR_BODY(an), reg, &len); if (r == 0) an->char_len = len; else if (r == GET_CHAR_LEN_VARLEN) @@ -3398,7 +3414,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg) if (IS_NOT_NULL(x)) { y = get_head_value_node(next_node, 0, reg); if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) { - Node* en = onig_node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); + Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK); CHECK_NULL_RETURN_MEMERR(en); NODE_STATUS_ADD(en, STOP_BT_SIMPLE_REPEAT); swap_node(node, en); @@ -3409,9 +3425,9 @@ next_setup(Node* node, Node* next_node, regex_t* reg) } } } - else if (type == NODE_ENCLOSURE) { - EnclosureNode* en = ENCLOSURE_(node); - if (en->type == ENCLOSURE_MEMORY) { + else if (type == NODE_BAG) { + BagNode* en = BAG_(node); + if (en->type == BAG_MEMORY) { node = NODE_BODY(node); goto retry; } @@ -3527,7 +3543,7 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p for (i = 0; i < item_num; i++) { snode = onig_node_new_str(NULL, NULL); if (IS_NULL(snode)) goto mem_err; - + for (j = 0; j < items[i].code_len; j++) { len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); if (len < 0) { @@ -3544,7 +3560,7 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p goto mem_err2; } - if (items[i].byte_len != slen) { + if (items[i].byte_len != slen && IS_NOT_NULL(var_anode)) { Node *rem; UChar *q = p + items[i].byte_len; @@ -3596,37 +3612,69 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p } static int -expand_case_fold_string(Node* node, regex_t* reg) +is_good_case_fold_items_for_search(OnigEncoding enc, int slen, + int n, OnigCaseFoldCodeItem items[]) { + int i, len; + UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + for (i = 0; i < n; i++) { + OnigCaseFoldCodeItem* item = items + i; + + if (item->code_len != 1) return 0; + if (item->byte_len != slen) return 0; + len = ONIGENC_CODE_TO_MBC(enc, item->code[0], buf); + if (len != slen) return 0; + } + + return 1; +} + #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8 +static int +expand_case_fold_string(Node* node, regex_t* reg, int state) +{ int r, n, len, alt_num; + int fold_len; + int prev_is_ambig, prev_is_good, is_good, is_in_look_behind; UChar *start, *end, *p; + UChar* foldp; Node *top_root, *root, *snode, *prev_node; OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; - StrNode* sn = STR_(node); + UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + StrNode* sn; if (NODE_STRING_IS_AMBIG(node)) return 0; + sn = STR_(node); + start = sn->s; end = sn->end; if (start >= end) return 0; + is_in_look_behind = (state & IN_LOOK_BEHIND) != 0; + r = 0; top_root = root = prev_node = snode = NULL_NODE; alt_num = 1; p = start; while (p < end) { - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, p, end, - items); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, + p, end, items); if (n < 0) { r = n; goto err; } len = enclen(reg->enc, p); + is_good = is_good_case_fold_items_for_search(reg->enc, len, n, items); - if (n == 0) { + if (is_in_look_behind || + (IS_NOT_NULL(snode) || + (is_good + /* expand single char case: ex. /(?i:a)/ */ + && !(p == start && p + len >= end)))) { if (IS_NULL(snode)) { if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { top_root = root = onig_node_list_add(NULL_NODE, prev_node); @@ -3644,10 +3692,49 @@ expand_case_fold_string(Node* node, regex_t* reg) goto mem_err; } } + + prev_is_ambig = -1; /* -1: new */ + prev_is_good = 0; /* escape compiler warning */ + } + else { + prev_is_ambig = NODE_STRING_IS_AMBIG(snode); + prev_is_good = NODE_STRING_IS_GOOD_AMBIG(snode); } - r = onig_node_str_cat(snode, p, p + len); - if (r != 0) goto err; + if (n != 0) { + foldp = p; + fold_len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, + &foldp, end, buf); + foldp = buf; + } + else { + foldp = p; fold_len = len; + } + + if ((prev_is_ambig == 0 && n != 0) || + (prev_is_ambig > 0 && (n == 0 || prev_is_good != is_good))) { + if (IS_NULL(root) /* && IS_NOT_NULL(prev_node) */) { + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(prev_node); + goto mem_err; + } + } + + prev_node = snode = onig_node_new_str(foldp, foldp + fold_len); + if (IS_NULL(snode)) goto mem_err; + if (IS_NULL(onig_node_list_add(root, snode))) { + onig_node_free(snode); + goto mem_err; + } + } + else { + r = onig_node_str_cat(snode, foldp, foldp + fold_len); + if (r != 0) goto err; + } + + if (n != 0) NODE_STRING_SET_AMBIG(snode); + if (is_good != 0) NODE_STRING_SET_GOOD_AMBIG(snode); } else { alt_num *= (n + 1); @@ -3768,22 +3855,22 @@ quantifiers_memory_node_info(Node* node) } break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); switch (en->type) { - case ENCLOSURE_MEMORY: + case BAG_MEMORY: if (NODE_IS_RECURSION(node)) { return QUANT_BODY_IS_EMPTY_REC; } return QUANT_BODY_IS_EMPTY_MEM; break; - case ENCLOSURE_OPTION: - case ENCLOSURE_STOP_BACKTRACK: + case BAG_OPTION: + case BAG_STOP_BACKTRACK: r = quantifiers_memory_node_info(NODE_BODY(node)); break; - case ENCLOSURE_IF_ELSE: + case BAG_IF_ELSE: { int v; r = quantifiers_memory_node_info(NODE_BODY(node)); @@ -3797,8 +3884,6 @@ quantifiers_memory_node_info(Node* node) } } break; - default: - break; } } break; @@ -3818,13 +3903,6 @@ quantifiers_memory_node_info(Node* node) #endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */ -#define IN_ALT (1<<0) -#define IN_NOT (1<<1) -#define IN_REAL_REPEAT (1<<2) -#define IN_VAR_REPEAT (1<<3) -#define IN_ZERO_REPEAT (1<<4) -#define IN_MULTI_ENTRY (1<<5) - #ifdef USE_CALL #ifdef __GNUC__ @@ -3901,18 +3979,18 @@ setup_call2_call(Node* node) setup_call2_call(NODE_BODY(node)); break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); - if (en->type == ENCLOSURE_MEMORY) { + if (en->type == BAG_MEMORY) { if (! NODE_IS_MARK1(node)) { NODE_STATUS_ADD(node, MARK1); setup_call2_call(NODE_BODY(node)); NODE_STATUS_REMOVE(node, MARK1); } } - else if (en->type == ENCLOSURE_IF_ELSE) { + else if (en->type == BAG_IF_ELSE) { setup_call2_call(NODE_BODY(node)); if (IS_NOT_NULL(en->te.Then)) setup_call2_call(en->te.Then); @@ -3935,7 +4013,7 @@ setup_call2_call(Node* node) cn->entry_count++; NODE_STATUS_ADD(called, CALLED); - ENCLOSURE_(called)->m.entry_count++; + BAG_(called)->m.entry_count++; setup_call2_call(called); } NODE_STATUS_REMOVE(node, MARK1); @@ -3974,18 +4052,18 @@ setup_call(Node* node, ScanEnv* env, int state) r = 0; break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); - if (en->type == ENCLOSURE_MEMORY) { + if (en->type == BAG_MEMORY) { if ((state & IN_ZERO_REPEAT) != 0) { NODE_STATUS_ADD(node, IN_ZERO_REPEAT); - ENCLOSURE_(node)->m.entry_count--; + BAG_(node)->m.entry_count--; } r = setup_call(NODE_BODY(node), env, state); } - else if (en->type == ENCLOSURE_IF_ELSE) { + else if (en->type == BAG_IF_ELSE) { r = setup_call(NODE_BODY(node), env, state); if (r != 0) return r; if (IS_NOT_NULL(en->te.Then)) { @@ -4040,15 +4118,15 @@ setup_call2(Node* node) r = setup_call2(NODE_BODY(node)); break; - case NODE_ENCLOSURE: + case NODE_BAG: if (! NODE_IS_IN_ZERO_REPEAT(node)) r = setup_call2(NODE_BODY(node)); { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); if (r != 0) return r; - if (en->type == ENCLOSURE_IF_ELSE) { + if (en->type == BAG_IF_ELSE) { if (IS_NOT_NULL(en->te.Then)) { r = setup_call2(en->te.Then); if (r != 0) return r; @@ -4104,12 +4182,12 @@ setup_called_state_call(Node* node, int state) AnchorNode* an = ANCHOR_(node); switch (an->type) { - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND_NOT: + case ANCR_PREC_READ_NOT: + case ANCR_LOOK_BEHIND_NOT: state |= IN_NOT; /* fall */ - case ANCHOR_PREC_READ: - case ANCHOR_LOOK_BEHIND: + case ANCR_PREC_READ: + case ANCR_LOOK_BEHIND: setup_called_state_call(NODE_ANCHOR_BODY(an), state); break; default: @@ -4118,11 +4196,11 @@ setup_called_state_call(Node* node, int state) } break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); - if (en->type == ENCLOSURE_MEMORY) { + if (en->type == BAG_MEMORY) { if (NODE_IS_MARK1(node)) { if ((~en->m.called_state & state) != 0) { en->m.called_state |= state; @@ -4136,7 +4214,7 @@ setup_called_state_call(Node* node, int state) NODE_STATUS_REMOVE(node, MARK1); } } - else if (en->type == ENCLOSURE_IF_ELSE) { + else if (en->type == BAG_IF_ELSE) { if (IS_NOT_NULL(en->te.Then)) { setup_called_state_call(en->te.Then, state); } @@ -4177,22 +4255,22 @@ setup_called_state(Node* node, int state) break; #endif - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); switch (en->type) { - case ENCLOSURE_MEMORY: + case BAG_MEMORY: if (en->m.entry_count > 1) state |= IN_MULTI_ENTRY; en->m.called_state |= state; /* fall */ - case ENCLOSURE_OPTION: - case ENCLOSURE_STOP_BACKTRACK: + case BAG_OPTION: + case BAG_STOP_BACKTRACK: setup_called_state(NODE_BODY(node), state); break; - case ENCLOSURE_IF_ELSE: + case BAG_IF_ELSE: setup_called_state(NODE_BODY(node), state); if (IS_NOT_NULL(en->te.Then)) setup_called_state(en->te.Then, state); @@ -4221,12 +4299,12 @@ setup_called_state(Node* node, int state) AnchorNode* an = ANCHOR_(node); switch (an->type) { - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND_NOT: + case ANCR_PREC_READ_NOT: + case ANCR_LOOK_BEHIND_NOT: state |= IN_NOT; /* fall */ - case ANCHOR_PREC_READ: - case ANCHOR_LOOK_BEHIND: + case ANCR_PREC_READ: + case ANCR_LOOK_BEHIND: setup_called_state(NODE_ANCHOR_BODY(an), state); break; default: @@ -4259,56 +4337,57 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env) /* allowed node types in look-behind */ #define ALLOWED_TYPE_IN_LB \ ( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \ - | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_ENCLOSURE | NODE_BIT_QUANT \ + | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_BAG | NODE_BIT_QUANT \ | NODE_BIT_CALL | NODE_BIT_GIMMICK) -#define ALLOWED_ENCLOSURE_IN_LB ( 1<<ENCLOSURE_MEMORY | 1<<ENCLOSURE_OPTION ) -#define ALLOWED_ENCLOSURE_IN_LB_NOT (1<<ENCLOSURE_OPTION) +#define ALLOWED_BAG_IN_LB ( 1<<BAG_MEMORY | 1<<BAG_OPTION ) +#define ALLOWED_BAG_IN_LB_NOT (1<<BAG_OPTION) #define ALLOWED_ANCHOR_IN_LB \ - ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF \ - | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY | ANCHOR_NO_WORD_BOUNDARY \ - | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \ - | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \ - | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY ) + ( ANCR_LOOK_BEHIND | ANCR_BEGIN_LINE | ANCR_END_LINE | ANCR_BEGIN_BUF \ + | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY | ANCR_NO_WORD_BOUNDARY \ + | ANCR_WORD_BEGIN | ANCR_WORD_END \ + | ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \ + | ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY ) #define ALLOWED_ANCHOR_IN_LB_NOT \ - ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE \ - | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY \ - | ANCHOR_NO_WORD_BOUNDARY | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \ - | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \ - | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY ) + ( ANCR_LOOK_BEHIND | ANCR_LOOK_BEHIND_NOT | ANCR_BEGIN_LINE \ + | ANCR_END_LINE | ANCR_BEGIN_BUF | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY \ + | ANCR_NO_WORD_BOUNDARY | ANCR_WORD_BEGIN | ANCR_WORD_END \ + | ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \ + | ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY ) int r; AnchorNode* an = ANCHOR_(node); switch (an->type) { - case ANCHOR_PREC_READ: + case ANCR_PREC_READ: r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env); break; - case ANCHOR_PREC_READ_NOT: + case ANCR_PREC_READ_NOT: r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env); break; - case ANCHOR_LOOK_BEHIND: + case ANCR_LOOK_BEHIND: { r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB, - ALLOWED_ENCLOSURE_IN_LB, ALLOWED_ANCHOR_IN_LB); + ALLOWED_BAG_IN_LB, ALLOWED_ANCHOR_IN_LB); if (r < 0) return r; if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env); + r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_LOOK_BEHIND), env); if (r != 0) return r; r = setup_look_behind(node, reg, env); } break; - case ANCHOR_LOOK_BEHIND_NOT: + case ANCR_LOOK_BEHIND_NOT: { r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB, - ALLOWED_ENCLOSURE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); + ALLOWED_BAG_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); if (r < 0) return r; if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env); + r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_NOT|IN_LOOK_BEHIND), + env); if (r != 0) return r; r = setup_look_behind(node, reg, env); } @@ -4346,9 +4425,9 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env) #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT qn->body_empty_info = quantifiers_memory_node_info(body); if (qn->body_empty_info == QUANT_BODY_IS_EMPTY_REC) { - if (NODE_TYPE(body) == NODE_ENCLOSURE && - ENCLOSURE_(body)->type == ENCLOSURE_MEMORY) { - MEM_STATUS_ON(env->bt_mem_end, ENCLOSURE_(body)->m.regnum); + if (NODE_TYPE(body) == NODE_BAG && + BAG_(body)->type == BAG_MEMORY) { + MEM_STATUS_ON(env->bt_mem_end, BAG_(body)->m.regnum); } } #else @@ -4439,7 +4518,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) case NODE_STRING: if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) { - r = expand_case_fold_string(node, reg); + r = expand_case_fold_string(node, reg, state); } break; @@ -4462,21 +4541,21 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) } break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); switch (en->type) { - case ENCLOSURE_OPTION: + case BAG_OPTION: { OnigOptionType options = reg->options; - reg->options = ENCLOSURE_(node)->o.options; + reg->options = BAG_(node)->o.options; r = setup_tree(NODE_BODY(node), reg, state, env); reg->options = options; } break; - case ENCLOSURE_MEMORY: + case BAG_MEMORY: #ifdef USE_CALL state |= en->m.called_state; #endif @@ -4488,7 +4567,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) r = setup_tree(NODE_BODY(node), reg, state, env); break; - case ENCLOSURE_STOP_BACKTRACK: + case BAG_STOP_BACKTRACK: { Node* target = NODE_BODY(node); r = setup_tree(target, reg, state, env); @@ -4503,7 +4582,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) } break; - case ENCLOSURE_IF_ELSE: + case BAG_IF_ELSE: r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env); if (r != 0) return r; if (IS_NOT_NULL(en->te.Then)) { @@ -4538,35 +4617,83 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) return r; } -/* set skip map for Boyer-Moore search */ static int -set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, - UChar skip[], int** int_skip) +set_sunday_quick_search_or_bmh_skip_table(regex_t* reg, int case_expand, + UChar* s, UChar* end, + UChar skip[], int* roffset) { - int i, len; + int i, j, k, len, offset; + int n, clen; + UChar* p; + OnigEncoding enc; + OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; + UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + enc = reg->enc; + offset = ENC_GET_SKIP_OFFSET(enc); + if (offset == ENC_SKIP_OFFSET_1_OR_0) { + UChar* p = s; + while (1) { + len = enclen(enc, p); + if (p + len >= end) { + if (len == 1) offset = 1; + else offset = 0; + break; + } + p += len; + } + } len = (int )(end - s); - if (len < ONIG_CHAR_TABLE_SIZE) { - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len; + if (len + offset >= UCHAR_MAX) + return ONIGERR_PARSER_BUG; - for (i = 0; i < len - 1; i++) - skip[s[i]] = len - 1 - i; + *roffset = offset; + + for (i = 0; i < CHAR_MAP_SIZE; i++) { + skip[i] = (UChar )(len + offset); } - else { - if (IS_NULL(*int_skip)) { - *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; + + for (p = s; p < end; ) { + int z; + + clen = enclen(enc, p); + if (p + clen > end) clen = (int )(end - p); + + len = (int )(end - p); + for (j = 0; j < clen; j++) { + z = len - j + (offset - 1); + if (z <= 0) break; + skip[p[j]] = z; + } + + if (case_expand != 0) { + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, + p, end, items); + for (k = 0; k < n; k++) { + ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf); + for (j = 0; j < clen; j++) { + z = len - j + (offset - 1); + if (z <= 0) break; + if (skip[buf[j]] > z) + skip[buf[j]] = z; + } + } } - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len; - for (i = 0; i < len - 1; i++) - (*int_skip)[s[i]] = len - 1 - i; + p += clen; } + return 0; } + #define OPT_EXACT_MAXLEN 24 +#if OPT_EXACT_MAXLEN >= UCHAR_MAX +#error Too big OPT_EXACT_MAXLEN +#endif + typedef struct { OnigLen min; /* min byte length */ OnigLen max; /* max byte length */ @@ -4589,26 +4716,27 @@ typedef struct { MinMax mmd; /* position */ OptAnc anc; int reach_end; - int ignore_case; + int case_fold; + int good_case_fold; int len; UChar s[OPT_EXACT_MAXLEN]; -} OptExact; +} OptStr; typedef struct { MinMax mmd; /* position */ OptAnc anc; int value; /* weighted value */ - UChar map[ONIG_CHAR_TABLE_SIZE]; + UChar map[CHAR_MAP_SIZE]; } OptMap; typedef struct { - MinMax len; - OptAnc anc; - OptExact exb; /* boundary */ - OptExact exm; /* middle */ - OptExact expr; /* prec read (?=...) */ - OptMap map; /* boundary */ -} NodeOpt; + MinMax len; + OptAnc anc; + OptStr sb; /* boundary */ + OptStr sm; /* middle */ + OptStr spr; /* prec read (?=...) */ + OptMap map; /* boundary */ +} OptNode; static int @@ -4640,15 +4768,15 @@ distance_value(MinMax* mm) { /* 1000 / (min-max-dist + 1) */ static const short int dist_vals[] = { - 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, - 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, - 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, - 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, - 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, - 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, - 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, - 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, - 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, + 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, + 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, + 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, + 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, + 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, + 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, + 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, + 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, + 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 }; @@ -4684,7 +4812,7 @@ comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2) static int is_equal_mml(MinMax* a, MinMax* b) { - return (a->min == b->min && a->max == b->max) ? 1 : 0; + return a->min == b->min && a->max == b->max; } static void @@ -4756,15 +4884,15 @@ concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right, to->right |= left->right; } else { - to->right |= (left->right & ANCHOR_PREC_READ_NOT); + to->right |= (left->right & ANCR_PREC_READ_NOT); } } static int is_left(int a) { - if (a == ANCHOR_END_BUF || a == ANCHOR_SEMI_END_BUF || - a == ANCHOR_END_LINE || a == ANCHOR_PREC_READ || a == ANCHOR_PREC_READ_NOT) + if (a == ANCR_END_BUF || a == ANCR_SEMI_END_BUF || + a == ANCR_END_LINE || a == ANCR_PREC_READ || a == ANCR_PREC_READ_NOT) return 0; return 1; @@ -4804,39 +4932,47 @@ alt_merge_opt_anc_info(OptAnc* to, OptAnc* add) } static int -is_full_opt_exact(OptExact* e) +is_full_opt_exact(OptStr* e) { - return (e->len >= OPT_EXACT_MAXLEN ? 1 : 0); + return e->len >= OPT_EXACT_MAXLEN; } static void -clear_opt_exact(OptExact* e) +clear_opt_exact(OptStr* e) { clear_mml(&e->mmd); clear_opt_anc_info(&e->anc); - e->reach_end = 0; - e->ignore_case = 0; - e->len = 0; - e->s[0] = '\0'; + e->reach_end = 0; + e->case_fold = 0; + e->good_case_fold = 0; + e->len = 0; + e->s[0] = '\0'; } static void -copy_opt_exact(OptExact* to, OptExact* from) +copy_opt_exact(OptStr* to, OptStr* from) { *to = *from; } static int -concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc) +concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc) { int i, j, len, r; UChar *p, *end; OptAnc tanc; - if (! to->ignore_case && add->ignore_case) { - if (to->len >= add->len) return 0; /* avoid */ + if (add->case_fold != 0) { + if (! to->case_fold) { + if (to->len > 1 || to->len >= add->len) return 0; /* avoid */ - to->ignore_case = 1; + to->case_fold = 1; + } + else { + if (to->good_case_fold != 0) { + if (add->good_case_fold == 0) return 0; + } + } } r = 0; @@ -4863,7 +4999,7 @@ concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc) } static void -concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc) +concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc) { int i, j, len; UChar *p; @@ -4876,10 +5012,13 @@ concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc) } to->len = i; + + if (p >= end && to->len == (int )(end - s)) + to->reach_end = 1; } static void -alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env) +alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env) { int i, j, len; @@ -4908,14 +5047,17 @@ alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env) to->reach_end = 0; } to->len = i; - to->ignore_case |= add->ignore_case; + if (add->case_fold != 0) + to->case_fold = 1; + if (add->good_case_fold == 0) + to->good_case_fold = 0; alt_merge_opt_anc_info(&to->anc, &add->anc); if (! to->reach_end) to->anc.right = 0; } static void -select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt) +select_opt_exact(OnigEncoding enc, OptStr* now, OptStr* alt) { int vn, va; @@ -4938,8 +5080,11 @@ select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt) if (alt->len > 1) va += 5; } - if (now->ignore_case == 0) vn *= 2; - if (alt->ignore_case == 0) va *= 2; + if (now->case_fold == 0) vn *= 2; + if (alt->case_fold == 0) va *= 2; + + if (now->good_case_fold != 0) vn *= 4; + if (alt->good_case_fold != 0) va *= 4; if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0) copy_opt_exact(now, alt); @@ -5030,14 +5175,24 @@ select_opt_map(OptMap* now, OptMap* alt) } static int -comp_opt_exact_or_map(OptExact* e, OptMap* m) +comp_opt_exact_or_map(OptStr* e, OptMap* m) { #define COMP_EM_BASE 20 int ae, am; + int case_value; if (m->value <= 0) return -1; - ae = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); + if (e->case_fold != 0) { + if (e->good_case_fold != 0) + case_value = 2; + else + case_value = 1; + } + else + case_value = 3; + + ae = COMP_EM_BASE * e->len * case_value; am = COMP_EM_BASE * 5 * 2 / m->value; return comp_distance_value(&e->mmd, &m->mmd, ae, am); } @@ -5057,7 +5212,7 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add) alt_merge_mml(&to->mmd, &add->mmd); val = 0; - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { + for (i = 0; i < CHAR_MAP_SIZE; i++) { if (add->map[i]) to->map[i] = 1; @@ -5070,42 +5225,42 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add) } static void -set_bound_node_opt_info(NodeOpt* opt, MinMax* plen) +set_bound_node_opt_info(OptNode* opt, MinMax* plen) { - copy_mml(&(opt->exb.mmd), plen); - copy_mml(&(opt->expr.mmd), plen); - copy_mml(&(opt->map.mmd), plen); + copy_mml(&(opt->sb.mmd), plen); + copy_mml(&(opt->spr.mmd), plen); + copy_mml(&(opt->map.mmd), plen); } static void -clear_node_opt_info(NodeOpt* opt) +clear_node_opt_info(OptNode* opt) { clear_mml(&opt->len); clear_opt_anc_info(&opt->anc); - clear_opt_exact(&opt->exb); - clear_opt_exact(&opt->exm); - clear_opt_exact(&opt->expr); + clear_opt_exact(&opt->sb); + clear_opt_exact(&opt->sm); + clear_opt_exact(&opt->spr); clear_opt_map(&opt->map); } static void -copy_node_opt_info(NodeOpt* to, NodeOpt* from) +copy_node_opt_info(OptNode* to, OptNode* from) { *to = *from; } static void -concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add) +concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add) { - int exb_reach, exm_reach; + int sb_reach, sm_reach; OptAnc tanc; concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); copy_opt_anc_info(&to->anc, &tanc); - if (add->exb.len > 0 && to->len.max == 0) { - concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, to->len.max, add->len.max); - copy_opt_anc_info(&add->exb.anc, &tanc); + if (add->sb.len > 0 && to->len.max == 0) { + concat_opt_anc_info(&tanc, &to->anc, &add->sb.anc, to->len.max, add->len.max); + copy_opt_anc_info(&add->sb.anc, &tanc); } if (add->map.value > 0 && to->len.max == 0) { @@ -5113,38 +5268,38 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add) add->map.anc.left |= to->anc.left; } - exb_reach = to->exb.reach_end; - exm_reach = to->exm.reach_end; + sb_reach = to->sb.reach_end; + sm_reach = to->sm.reach_end; if (add->len.max != 0) - to->exb.reach_end = to->exm.reach_end = 0; + to->sb.reach_end = to->sm.reach_end = 0; - if (add->exb.len > 0) { - if (exb_reach) { - concat_opt_exact(&to->exb, &add->exb, enc); - clear_opt_exact(&add->exb); + if (add->sb.len > 0) { + if (sb_reach) { + concat_opt_exact(&to->sb, &add->sb, enc); + clear_opt_exact(&add->sb); } - else if (exm_reach) { - concat_opt_exact(&to->exm, &add->exb, enc); - clear_opt_exact(&add->exb); + else if (sm_reach) { + concat_opt_exact(&to->sm, &add->sb, enc); + clear_opt_exact(&add->sb); } } - select_opt_exact(enc, &to->exm, &add->exb); - select_opt_exact(enc, &to->exm, &add->exm); + select_opt_exact(enc, &to->sm, &add->sb); + select_opt_exact(enc, &to->sm, &add->sm); - if (to->expr.len > 0) { + if (to->spr.len > 0) { if (add->len.max > 0) { - if (to->expr.len > (int )add->len.max) - to->expr.len = add->len.max; + if (to->spr.len > (int )add->len.max) + to->spr.len = add->len.max; - if (to->expr.mmd.max == 0) - select_opt_exact(enc, &to->exb, &to->expr); + if (to->spr.mmd.max == 0) + select_opt_exact(enc, &to->sb, &to->spr); else - select_opt_exact(enc, &to->exm, &to->expr); + select_opt_exact(enc, &to->sm, &to->spr); } } - else if (add->expr.len > 0) { - copy_opt_exact(&to->expr, &add->expr); + else if (add->spr.len > 0) { + copy_opt_exact(&to->spr, &add->spr); } select_opt_map(&to->map, &add->map); @@ -5152,12 +5307,12 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add) } static void -alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env) +alt_merge_node_opt_info(OptNode* to, OptNode* add, OptEnv* env) { alt_merge_opt_anc_info(&to->anc, &add->anc); - alt_merge_opt_exact(&to->exb, &add->exb, env); - alt_merge_opt_exact(&to->exm, &add->exm, env); - alt_merge_opt_exact(&to->expr, &add->expr, env); + alt_merge_opt_exact(&to->sb, &add->sb, env); + alt_merge_opt_exact(&to->sm, &add->sm, env); + alt_merge_opt_exact(&to->spr, &add->spr, env); alt_merge_opt_map(env->enc, &to->map, &add->map); alt_merge_mml(&to->len, &add->len); @@ -5167,11 +5322,11 @@ alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env) #define MAX_NODE_OPT_INFO_REF_COUNT 5 static int -optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) +optimize_nodes(Node* node, OptNode* opt, OptEnv* env) { int i; int r; - NodeOpt xo; + OptNode xo; OnigEncoding enc; r = 0; @@ -5217,7 +5372,7 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) /* int is_raw = NODE_STRING_IS_RAW(node); */ if (! NODE_STRING_IS_AMBIG(node)) { - concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc); + concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc); if (slen > 0) { add_char_opt_map(&opt->map, *(sn->s), enc); } @@ -5231,8 +5386,10 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) max = ONIGENC_MBC_MAXLEN_DIST(enc) * n; } else { - concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc); - opt->exb.ignore_case = 1; + concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc); + opt->sb.case_fold = 1; + if (NODE_STRING_IS_GOOD_AMBIG(node)) + opt->sb.good_case_fold = 1; if (slen > 0) { r = add_char_amb_opt_map(&opt->map, sn->s, sn->end, @@ -5245,9 +5402,6 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) set_mml(&opt->len, slen, max); } - - if (opt->exb.len == slen) - opt->exb.reach_end = 1; } break; @@ -5321,27 +5475,27 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) case NODE_ANCHOR: switch (ANCHOR_(node)->type) { - case ANCHOR_BEGIN_BUF: - case ANCHOR_BEGIN_POSITION: - case ANCHOR_BEGIN_LINE: - case ANCHOR_END_BUF: - case ANCHOR_SEMI_END_BUF: - case ANCHOR_END_LINE: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: + case ANCR_BEGIN_BUF: + case ANCR_BEGIN_POSITION: + case ANCR_BEGIN_LINE: + case ANCR_END_BUF: + case ANCR_SEMI_END_BUF: + case ANCR_END_LINE: + case ANCR_PREC_READ_NOT: + case ANCR_LOOK_BEHIND: add_opt_anc_info(&opt->anc, ANCHOR_(node)->type); break; - case ANCHOR_PREC_READ: + case ANCR_PREC_READ: { r = optimize_nodes(NODE_BODY(node), &xo, env); if (r == 0) { - if (xo.exb.len > 0) - copy_opt_exact(&opt->expr, &xo.exb); - else if (xo.exm.len > 0) - copy_opt_exact(&opt->expr, &xo.exm); + if (xo.sb.len > 0) + copy_opt_exact(&opt->spr, &xo.sb); + else if (xo.sm.len > 0) + copy_opt_exact(&opt->spr, &xo.sm); - opt->expr.reach_end = 0; + opt->spr.reach_end = 0; if (xo.map.value > 0) copy_opt_map(&opt->map, &xo.map); @@ -5349,7 +5503,7 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) } break; - case ANCHOR_LOOK_BEHIND_NOT: + case ANCR_LOOK_BEHIND_NOT: break; } break; @@ -5384,7 +5538,7 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) set_mml(&opt->len, 0, INFINITE_LEN); else { OnigOptionType save = env->options; - env->options = ENCLOSURE_(NODE_BODY(node))->o.options; + env->options = BAG_(NODE_BODY(node))->o.options; r = optimize_nodes(NODE_BODY(node), opt, env); env->options = save; } @@ -5401,31 +5555,31 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) if (qn->lower > 0) { copy_node_opt_info(opt, &xo); - if (xo.exb.len > 0) { - if (xo.exb.reach_end) { - for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) { - int rc = concat_opt_exact(&opt->exb, &xo.exb, enc); + if (xo.sb.len > 0) { + if (xo.sb.reach_end) { + for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->sb); i++) { + int rc = concat_opt_exact(&opt->sb, &xo.sb, enc); if (rc > 0) break; } - if (i < qn->lower) opt->exb.reach_end = 0; + if (i < qn->lower) opt->sb.reach_end = 0; } } if (qn->lower != qn->upper) { - opt->exb.reach_end = 0; - opt->exm.reach_end = 0; + opt->sb.reach_end = 0; + opt->sm.reach_end = 0; } if (qn->lower > 1) - opt->exm.reach_end = 0; + opt->sm.reach_end = 0; } if (IS_REPEAT_INFINITE(qn->upper)) { if (env->mmd.max == 0 && NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) { if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env))) - add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_ML); + add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_ML); else - add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF); + add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF); } max = (xo.len.max > 0 ? INFINITE_LEN : 0); @@ -5439,12 +5593,12 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) } break; - case NODE_ENCLOSURE: + case NODE_BAG: { - EnclosureNode* en = ENCLOSURE_(node); + BagNode* en = BAG_(node); switch (en->type) { - case ENCLOSURE_OPTION: + case BAG_OPTION: { OnigOptionType save = env->options; @@ -5454,7 +5608,7 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) } break; - case ENCLOSURE_MEMORY: + case BAG_MEMORY: #ifdef USE_CALL en->opt_count++; if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { @@ -5470,23 +5624,23 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) #endif { r = optimize_nodes(NODE_BODY(node), opt, env); - if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK)) { + if (is_set_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK)) { if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum)) - remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK); + remove_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK); } } break; - case ENCLOSURE_STOP_BACKTRACK: + case BAG_STOP_BACKTRACK: r = optimize_nodes(NODE_BODY(node), opt, env); break; - case ENCLOSURE_IF_ELSE: + case BAG_IF_ELSE: { OptEnv nenv; copy_opt_env(&nenv, env); - r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &xo, &nenv); + r = optimize_nodes(NODE_BAG_BODY(en), &xo, &nenv); if (r == 0) { add_mml(&nenv.mmd, &xo.len); concat_left_node_opt_info(enc, opt, &xo); @@ -5524,39 +5678,47 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) } static int -set_optimize_exact(regex_t* reg, OptExact* e) +set_optimize_exact(regex_t* reg, OptStr* e) { int r; if (e->len == 0) return 0; - if (e->ignore_case) { - reg->exact = (UChar* )xmalloc(e->len); - CHECK_NULL_RETURN_MEMERR(reg->exact); - xmemcpy(reg->exact, e->s, e->len); - reg->exact_end = reg->exact + e->len; - reg->optimize = OPTIMIZE_EXACT_IC; + reg->exact = (UChar* )xmalloc(e->len); + CHECK_NULL_RETURN_MEMERR(reg->exact); + xmemcpy(reg->exact, e->s, e->len); + reg->exact_end = reg->exact + e->len; + + if (e->case_fold) { + reg->optimize = OPTIMIZE_STR_CASE_FOLD; + if (e->good_case_fold != 0) { + if (e->len >= 2) { + r = set_sunday_quick_search_or_bmh_skip_table(reg, 1, + reg->exact, reg->exact_end, + reg->map, &(reg->map_offset)); + if (r != 0) return r; + reg->optimize = OPTIMIZE_STR_CASE_FOLD_FAST; + } + } } else { int allow_reverse; - reg->exact = onigenc_strdup(reg->enc, e->s, e->s + e->len); - CHECK_NULL_RETURN_MEMERR(reg->exact); - reg->exact_end = reg->exact + e->len; - allow_reverse = ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); - if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { - r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, - reg->map, &(reg->int_map)); + if (e->len >= 2 || (e->len >= 1 && allow_reverse)) { + r = set_sunday_quick_search_or_bmh_skip_table(reg, 0, + reg->exact, reg->exact_end, + reg->map, &(reg->map_offset)); if (r != 0) return r; reg->optimize = (allow_reverse != 0 - ? OPTIMIZE_EXACT_BM : OPTIMIZE_EXACT_BM_NO_REV); + ? OPTIMIZE_STR_FAST + : OPTIMIZE_STR_FAST_STEP_FORWARD); } else { - reg->optimize = OPTIMIZE_EXACT; + reg->optimize = OPTIMIZE_STR; } } @@ -5575,7 +5737,7 @@ set_optimize_map(regex_t* reg, OptMap* m) { int i; - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + for (i = 0; i < CHAR_MAP_SIZE; i++) reg->map[i] = m->map[i]; reg->optimize = OPTIMIZE_MAP; @@ -5590,8 +5752,8 @@ set_optimize_map(regex_t* reg, OptMap* m) static void set_sub_anchor(regex_t* reg, OptAnc* anc) { - reg->sub_anchor |= anc->left & ANCHOR_BEGIN_LINE; - reg->sub_anchor |= anc->right & ANCHOR_END_LINE; + reg->sub_anchor |= anc->left & ANCR_BEGIN_LINE; + reg->sub_anchor |= anc->right & ANCR_END_LINE; } #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) @@ -5602,7 +5764,7 @@ static int set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) { int r; - NodeOpt opt; + OptNode opt; OptEnv env; env.enc = reg->enc; @@ -5614,29 +5776,29 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) r = optimize_nodes(node, &opt, &env); if (r != 0) return r; - reg->anchor = opt.anc.left & (ANCHOR_BEGIN_BUF | - ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML | - ANCHOR_LOOK_BEHIND); + reg->anchor = opt.anc.left & (ANCR_BEGIN_BUF | + ANCR_BEGIN_POSITION | ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML | + ANCR_LOOK_BEHIND); - if ((opt.anc.left & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0) - reg->anchor &= ~ANCHOR_ANYCHAR_INF_ML; + if ((opt.anc.left & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) != 0) + reg->anchor &= ~ANCR_ANYCHAR_INF_ML; - reg->anchor |= opt.anc.right & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF | - ANCHOR_PREC_READ_NOT); + reg->anchor |= opt.anc.right & (ANCR_END_BUF | ANCR_SEMI_END_BUF | + ANCR_PREC_READ_NOT); - if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { + if (reg->anchor & (ANCR_END_BUF | ANCR_SEMI_END_BUF)) { reg->anchor_dmin = opt.len.min; reg->anchor_dmax = opt.len.max; } - if (opt.exb.len > 0 || opt.exm.len > 0) { - select_opt_exact(reg->enc, &opt.exb, &opt.exm); - if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.exb, &opt.map) > 0) { + if (opt.sb.len > 0 || opt.sm.len > 0) { + select_opt_exact(reg->enc, &opt.sb, &opt.sm); + if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.sb, &opt.map) > 0) { goto set_map; } else { - r = set_optimize_exact(reg, &opt.exb); - set_sub_anchor(reg, &opt.exb.anc); + r = set_optimize_exact(reg, &opt.sb); + set_sub_anchor(reg, &opt.sb.anc); } } else if (opt.map.value > 0) { @@ -5645,9 +5807,9 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) set_sub_anchor(reg, &opt.map.anc); } else { - reg->sub_anchor |= opt.anc.left & ANCHOR_BEGIN_LINE; + reg->sub_anchor |= opt.anc.left & ANCR_BEGIN_LINE; if (opt.len.max == 0) - reg->sub_anchor |= opt.anc.right & ANCHOR_END_LINE; + reg->sub_anchor |= opt.anc.right & ANCR_END_LINE; } #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) @@ -5665,6 +5827,7 @@ clear_optimize_info(regex_t* reg) reg->anchor_dmax = 0; reg->sub_anchor = 0; reg->exact_end = (UChar* )NULL; + reg->map_offset = 0; reg->threshold_len = 0; if (IS_NOT_NULL(reg->exact)) { xfree(reg->exact); @@ -5733,41 +5896,41 @@ print_anchor(FILE* f, int anchor) fprintf(f, "["); - if (anchor & ANCHOR_BEGIN_BUF) { + if (anchor & ANCR_BEGIN_BUF) { fprintf(f, "begin-buf"); q = 1; } - if (anchor & ANCHOR_BEGIN_LINE) { + if (anchor & ANCR_BEGIN_LINE) { if (q) fprintf(f, ", "); q = 1; fprintf(f, "begin-line"); } - if (anchor & ANCHOR_BEGIN_POSITION) { + if (anchor & ANCR_BEGIN_POSITION) { if (q) fprintf(f, ", "); q = 1; fprintf(f, "begin-pos"); } - if (anchor & ANCHOR_END_BUF) { + if (anchor & ANCR_END_BUF) { if (q) fprintf(f, ", "); q = 1; fprintf(f, "end-buf"); } - if (anchor & ANCHOR_SEMI_END_BUF) { + if (anchor & ANCR_SEMI_END_BUF) { if (q) fprintf(f, ", "); q = 1; fprintf(f, "semi-end-buf"); } - if (anchor & ANCHOR_END_LINE) { + if (anchor & ANCR_END_LINE) { if (q) fprintf(f, ", "); q = 1; fprintf(f, "end-line"); } - if (anchor & ANCHOR_ANYCHAR_INF) { + if (anchor & ANCR_ANYCHAR_INF) { if (q) fprintf(f, ", "); q = 1; fprintf(f, "anychar-inf"); } - if (anchor & ANCHOR_ANYCHAR_INF_ML) { + if (anchor & ANCR_ANYCHAR_INF_ML) { if (q) fprintf(f, ", "); fprintf(f, "anychar-inf-ml"); } @@ -5778,12 +5941,13 @@ print_anchor(FILE* f, int anchor) static void print_optimize_info(FILE* f, regex_t* reg) { - static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", - "EXACT_IC", "MAP" }; + static const char* on[] = { "NONE", "STR", + "STR_FAST", "STR_FAST_STEP_FORWARD", + "STR_CASE_FOLD_FAST", "STR_CASE_FOLD", "MAP" }; fprintf(f, "optimize: %s\n", on[reg->optimize]); fprintf(f, " anchor: "); print_anchor(f, reg->anchor); - if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0) + if ((reg->anchor & ANCR_END_BUF_MASK) != 0) print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax); fprintf(f, "\n"); @@ -5804,14 +5968,14 @@ print_optimize_info(FILE* f, regex_t* reg) else if (reg->optimize & OPTIMIZE_MAP) { int c, i, n = 0; - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + for (i = 0; i < CHAR_MAP_SIZE; i++) if (reg->map[i]) n++; fprintf(f, "map: n=%d\n", n); if (n > 0) { c = 0; fputc('[', f); - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { + for (i = 0; i < CHAR_MAP_SIZE; i++) { if (reg->map[i] != 0) { if (c > 0) fputs(", ", f); c++; @@ -5832,7 +5996,7 @@ print_optimize_info(FILE* f, regex_t* reg) extern RegexExt* onig_get_regex_ext(regex_t* reg) { - if (IS_NULL(REG_EXTP(reg))) { + if (IS_NULL(reg->extp)) { RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext)); if (IS_NULL(ext)) return 0; @@ -5845,10 +6009,10 @@ onig_get_regex_ext(regex_t* reg) ext->callout_list = 0; #endif - REG_EXTPL(reg) = (void* )ext; + reg->extp = ext; } - return REG_EXTP(reg); + return reg->extp; } static void @@ -5895,12 +6059,10 @@ onig_free_body(regex_t* reg) if (IS_NOT_NULL(reg)) { if (IS_NOT_NULL(reg->p)) xfree(reg->p); if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); - if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); - if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); - if (IS_NOT_NULL(REG_EXTP(reg))) { - free_regex_ext(REG_EXTP(reg)); - REG_EXTPL(reg) = 0; + if (IS_NOT_NULL(reg->extp)) { + free_regex_ext(reg->extp); + reg->extp = 0; } onig_names_free(reg); @@ -6060,7 +6222,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0) #ifdef USE_CALLOUT - || (IS_NOT_NULL(REG_EXTP(reg)) && REG_EXTP(reg)->callout_num != 0) + || (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0) #endif ) reg->stack_pop_level = STACK_POP_LEVEL_ALL; @@ -6152,9 +6314,7 @@ onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_fl (reg)->syntax = syntax; (reg)->optimize = 0; (reg)->exact = (UChar* )NULL; - (reg)->int_map = (int* )NULL; - (reg)->int_map_backward = (int* )NULL; - REG_EXTPL(reg) = NULL; + (reg)->extp = (RegexExt* )NULL; (reg)->p = (UChar* )NULL; (reg)->alloc = 0; @@ -6309,11 +6469,11 @@ onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_ found = 0; } else { - found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); + found = onig_is_in_code_range(cc->mbuf->p, code) != 0; } } else { - found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); + found = BITSET_AT(cc->bs, code) != 0; } if (IS_NCCLASS_NOT(cc)) @@ -6387,12 +6547,35 @@ print_indent_tree(FILE* f, Node* node, int indent) break; case NODE_STRING: - fprintf(f, "<string%s:%p>", (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node); - for (p = STR_(node)->s; p < STR_(node)->end; p++) { - if (*p >= 0x20 && *p < 0x7f) - fputc(*p, f); - else { - fprintf(f, " 0x%02x", *p); + { + char* mode; + char* dont; + char* good; + + if (NODE_STRING_IS_RAW(node)) + mode = "-raw"; + else if (NODE_STRING_IS_AMBIG(node)) + mode = "-ambig"; + else + mode = ""; + + if (NODE_STRING_IS_GOOD_AMBIG(node)) + good = "-good"; + else + good = ""; + + if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) + dont = " (dont-opt)"; + else + dont = ""; + + fprintf(f, "<string%s%s%s:%p>", mode, good, dont, node); + for (p = STR_(node)->s; p < STR_(node)->end; p++) { + if (*p >= 0x20 && *p < 0x7f) + fputc(*p, f); + else { + fprintf(f, " 0x%02x", *p); + } } } break; @@ -6436,36 +6619,36 @@ print_indent_tree(FILE* f, Node* node, int indent) case NODE_ANCHOR: fprintf(f, "<anchor:%p> ", node); switch (ANCHOR_(node)->type) { - case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; - case ANCHOR_END_BUF: fputs("end buf", f); break; - case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; - case ANCHOR_END_LINE: fputs("end line", f); break; - case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break; - case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break; - - case ANCHOR_WORD_BOUNDARY: fputs("word boundary", f); break; - case ANCHOR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break; + case ANCR_BEGIN_BUF: fputs("begin buf", f); break; + case ANCR_END_BUF: fputs("end buf", f); break; + case ANCR_BEGIN_LINE: fputs("begin line", f); break; + case ANCR_END_LINE: fputs("end line", f); break; + case ANCR_SEMI_END_BUF: fputs("semi end buf", f); break; + case ANCR_BEGIN_POSITION: fputs("begin position", f); break; + + case ANCR_WORD_BOUNDARY: fputs("word boundary", f); break; + case ANCR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break; #ifdef USE_WORD_BEGIN_END - case ANCHOR_WORD_BEGIN: fputs("word begin", f); break; - case ANCHOR_WORD_END: fputs("word end", f); break; + case ANCR_WORD_BEGIN: fputs("word begin", f); break; + case ANCR_WORD_END: fputs("word end", f); break; #endif - case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: + case ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: fputs("extended-grapheme-cluster boundary", f); break; - case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: + case ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: fputs("no-extended-grapheme-cluster boundary", f); break; - case ANCHOR_PREC_READ: + case ANCR_PREC_READ: fprintf(f, "prec read\n"); print_indent_tree(f, NODE_BODY(node), indent + add); break; - case ANCHOR_PREC_READ_NOT: + case ANCR_PREC_READ_NOT: fprintf(f, "prec read not\n"); print_indent_tree(f, NODE_BODY(node), indent + add); break; - case ANCHOR_LOOK_BEHIND: + case ANCR_LOOK_BEHIND: fprintf(f, "look behind\n"); print_indent_tree(f, NODE_BODY(node), indent + add); break; - case ANCHOR_LOOK_BEHIND_NOT: + case ANCR_LOOK_BEHIND_NOT: fprintf(f, "look behind not\n"); print_indent_tree(f, NODE_BODY(node), indent + add); break; @@ -6506,20 +6689,20 @@ print_indent_tree(FILE* f, Node* node, int indent) print_indent_tree(f, NODE_BODY(node), indent + add); break; - case NODE_ENCLOSURE: - fprintf(f, "<enclosure:%p> ", node); - switch (ENCLOSURE_(node)->type) { - case ENCLOSURE_OPTION: - fprintf(f, "option:%d", ENCLOSURE_(node)->o.options); + case NODE_BAG: + fprintf(f, "<bag:%p> ", node); + switch (BAG_(node)->type) { + case BAG_OPTION: + fprintf(f, "option:%d", BAG_(node)->o.options); break; - case ENCLOSURE_MEMORY: - fprintf(f, "memory:%d", ENCLOSURE_(node)->m.regnum); + case BAG_MEMORY: + fprintf(f, "memory:%d", BAG_(node)->m.regnum); break; - case ENCLOSURE_STOP_BACKTRACK: + case BAG_STOP_BACKTRACK: fprintf(f, "stop-bt"); break; - - default: + case BAG_IF_ELSE: + fprintf(f, "if-else"); break; } fprintf(f, "\n"); @@ -6561,7 +6744,7 @@ print_indent_tree(FILE* f, Node* node, int indent) } if (type != NODE_LIST && type != NODE_ALT && type != NODE_QUANT && - type != NODE_ENCLOSURE) + type != NODE_BAG) fprintf(f, "\n"); fflush(f); } diff --git a/src/regenc.c b/src/regenc.c index 21f3536..d8f5274 100644 --- a/src/regenc.c +++ b/src/regenc.c @@ -231,7 +231,7 @@ onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end) { int n = 0; UChar* q = (UChar* )p; - + while (q < end) { q += ONIGENC_MBC_ENC_LEN(enc, q); n++; @@ -244,7 +244,7 @@ onigenc_strlen_null(OnigEncoding enc, const UChar* s) { int n = 0; UChar* p = (UChar* )s; - + while (1) { if (*p == '\0') { UChar* q; diff --git a/src/regenc.h b/src/regenc.h index ae8d65e..8a3397d 100644 --- a/src/regenc.h +++ b/src/regenc.h @@ -121,8 +121,20 @@ struct PropertyNameCtype { #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII +#define ENC_SKIP_OFFSET_1_OR_0 7 + #define ENC_FLAG_ASCII_COMPATIBLE (1<<0) #define ENC_FLAG_UNICODE (1<<1) +#define ENC_FLAG_SKIP_OFFSET_MASK (7<<2) +#define ENC_FLAG_SKIP_OFFSET_0 0 +#define ENC_FLAG_SKIP_OFFSET_1 (1<<2) +#define ENC_FLAG_SKIP_OFFSET_2 (2<<2) +#define ENC_FLAG_SKIP_OFFSET_3 (3<<2) +#define ENC_FLAG_SKIP_OFFSET_4 (4<<2) +#define ENC_FLAG_SKIP_OFFSET_1_OR_0 (ENC_SKIP_OFFSET_1_OR_0<<2) + +#define ENC_GET_SKIP_OFFSET(enc) \ + (((enc)->flag & ENC_FLAG_SKIP_OFFSET_MASK)>>2) /* for encoding system implementation (internal) */ @@ -197,7 +209,7 @@ extern int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* else if ((buk)->fold_len == 3)\ addr = OnigUnicodeFolds3 + (buk)->index;\ else\ - addr = 0;\ + return ONIGERR_INVALID_CODE_POINT_VALUE;\ } while (0) extern OnigCodePoint OnigUnicodeFolds1[]; @@ -252,7 +264,7 @@ extern const unsigned short OnigEncAsciiCtypeTable[]; #define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \ (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\ ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER)) - + #define ONIGENC_IS_UNICODE_ENCODING(enc) \ (((enc)->flag & ENC_FLAG_UNICODE) != 0) diff --git a/src/regerror.c b/src/regerror.c index 70efe9a..3fbcdfe 100644 --- a/src/regerror.c +++ b/src/regerror.c @@ -30,13 +30,7 @@ #include "regint.h" #include <stdio.h> /* for vsnprintf() */ -#ifdef HAVE_STDARG_PROTOTYPES #include <stdarg.h> -#define va_init_list(a,b) va_start(a,b) -#else -#include <varargs.h> -#define va_init_list(a,b) va_start(a) -#endif extern UChar* onig_error_code_to_format(int code) @@ -247,7 +241,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, if (len >= buf_size) break; } - *is_over = ((p < end) ? 1 : 0); + *is_over = p < end; } else { len = MIN((int )(end - s), buf_size); @@ -262,15 +256,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, /* for ONIG_MAX_ERROR_MESSAGE_LEN */ #define MAX_ERROR_PAR_LEN 30 -extern int -#ifdef HAVE_STDARG_PROTOTYPES -onig_error_code_to_str(UChar* s, int code, ...) -#else -onig_error_code_to_str(s, code, va_alist) - UChar* s; - int code; - va_dcl -#endif +extern int onig_error_code_to_str(UChar* s, int code, ...) { UChar *p, *q; OnigErrorInfo* einfo; @@ -278,7 +264,7 @@ onig_error_code_to_str(s, code, va_alist) UChar parbuf[MAX_ERROR_PAR_LEN]; va_list vargs; - va_init_list(vargs, code); + va_start(vargs, code); switch (code) { case ONIGERR_UNDEFINED_NAME_REFERENCE: @@ -330,27 +316,15 @@ onig_error_code_to_str(s, code, va_alist) } -void -#ifdef HAVE_STDARG_PROTOTYPES -onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, - UChar* pat, UChar* pat_end, const UChar *fmt, ...) -#else -onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) - UChar buf[]; - int bufsize; - OnigEncoding enc; - UChar* pat; - UChar* pat_end; - const UChar *fmt; - va_dcl -#endif +void onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, + UChar* pat, UChar* pat_end, const UChar *fmt, ...) { int n, need, len; UChar *p, *s, *bp; UChar bs[6]; va_list args; - va_init_list(args, fmt); + va_start(args, fmt); n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args); va_end(args); diff --git a/src/regexec.c b/src/regexec.c index 6c76d85..fa61839 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -782,13 +782,13 @@ static int onig_region_resize_clear(OnigRegion* region, int n) { int r; - + r = onig_region_resize(region, n); if (r != 0) return r; onig_region_clear(region); return 0; } - + extern int onig_region_set(OnigRegion* region, int at, int beg, int end) { @@ -798,7 +798,7 @@ onig_region_set(OnigRegion* region, int at, int beg, int end) int r = onig_region_resize(region, at + 1); if (r < 0) return r; } - + region->beg[at] = beg; region->end[at] = end; return 0; @@ -1225,7 +1225,7 @@ onig_initialize_match_param(OnigMatchParam* mp) static int adjust_match_param(regex_t* reg, OnigMatchParam* mp) { - RegexExt* ext = REG_EXTP(reg); + RegexExt* ext = reg->extp; mp->match_at_call_counter = 0; @@ -2337,6 +2337,79 @@ typedef struct { regoff_t rm_eo; } posix_regmatch_t; + +#ifdef __GNUC__ +#define USE_THREADED_CODE +#endif + +#ifdef USE_THREADED_CODE + +#define BYTECODE_INTERPRETER_START JUMP_OP; +#define BYTECODE_INTERPRETER_END +#define CASE_OP(x) L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(1) +#define DEFAULT_OP /* L_DEFAULT: */ +#define NEXT_OP sprev = sbegin; JUMP_OP +#define JUMP_OP goto *opcode_to_label[*p++] +#define BREAK_OP /* Nothing */ + +#else + +#define BYTECODE_INTERPRETER_START \ + while (1) {\ + MATCH_DEBUG_OUT(0)\ + sbegin = s;\ + switch (*p++) { +#define BYTECODE_INTERPRETER_END } sprev = sbegin; } +#define CASE_OP(x) case OP_##x: SOP_IN(OP_##x); +#define DEFAULT_OP default: +#define NEXT_OP break +#define JUMP_OP continue; break +#define BREAK_OP break + +#endif /* USE_THREADED_CODE */ + +#define NEXT_OUT SOP_OUT; NEXT_OP +#define JUMP_OUT SOP_OUT; JUMP_OP +#define BREAK_OUT SOP_OUT; BREAK_OP +#define CHECK_INTERRUPT_JUMP_OUT SOP_OUT; CHECK_INTERRUPT_IN_MATCH; JUMP_OP + + +#ifdef ONIG_DEBUG_MATCH +#define MATCH_DEBUG_OUT(offset) do {\ + UChar *xp, *q, *bp, buf[50];\ + int len, spos;\ + spos = IS_NOT_NULL(s) ? (int )(s - str) : -1;\ + xp = p - (offset);\ + fprintf(stderr, "%7u: %7ld: %4d> \"",\ + counter, GET_STACK_INDEX(stk), spos);\ + counter++;\ + bp = buf;\ + if (IS_NOT_NULL(s)) {\ + for (i = 0, q = s; i < 7 && q < end; i++) {\ + len = enclen(encode, q);\ + while (len-- > 0) *bp++ = *q++;\ + }\ + if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }\ + else { xmemcpy(bp, "\"", 1); bp += 1; }\ + }\ + else {\ + xmemcpy(bp, "\"", 1); bp += 1;\ + }\ + *bp = 0;\ + fputs((char* )buf, stderr);\ + for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);\ + if (xp == FinishCode)\ + fprintf(stderr, "----: ");\ + else\ + fprintf(stderr, "%4d: ", (int )(xp - reg->p));\ + onig_print_compiled_byte_code(stderr, xp, NULL, reg->p, encode);\ + fprintf(stderr, "\n");\ + } while(0); +#else +#define MATCH_DEBUG_OUT(offset) +#endif + + /* match data(str - end) from position (sstart). */ /* if sstart == str then set sprev to NULL. */ static int @@ -2346,6 +2419,107 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, { static UChar FinishCode[] = { OP_FINISH }; +#ifdef USE_THREADED_CODE + static const void *opcode_to_label[] = { + &&L_FINISH, + &&L_END, + &&L_EXACT1, + &&L_EXACT2, + &&L_EXACT3, + &&L_EXACT4, + &&L_EXACT5, + &&L_EXACTN, + &&L_EXACTMB2N1, + &&L_EXACTMB2N2, + &&L_EXACTMB2N3, + &&L_EXACTMB2N, + &&L_EXACTMB3N, + &&L_EXACTMBN, + &&L_EXACT1_IC, + &&L_EXACTN_IC, + &&L_CCLASS, + &&L_CCLASS_MB, + &&L_CCLASS_MIX, + &&L_CCLASS_NOT, + &&L_CCLASS_MB_NOT, + &&L_CCLASS_MIX_NOT, +#ifdef USE_OP_CCLASS_NODE + &&L_CCLASS_NODE, +#endif + &&L_ANYCHAR, + &&L_ANYCHAR_ML, + &&L_ANYCHAR_STAR, + &&L_ANYCHAR_ML_STAR, + &&L_ANYCHAR_STAR_PEEK_NEXT, + &&L_ANYCHAR_ML_STAR_PEEK_NEXT, + &&L_WORD, + &&L_WORD_ASCII, + &&L_NO_WORD, + &&L_NO_WORD_ASCII, + &&L_WORD_BOUNDARY, + &&L_NO_WORD_BOUNDARY, + &&L_WORD_BEGIN, + &&L_WORD_END, + &&L_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, + &&L_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, + &&L_BEGIN_BUF, + &&L_END_BUF, + &&L_BEGIN_LINE, + &&L_END_LINE, + &&L_SEMI_END_BUF, + &&L_BEGIN_POSITION, + &&L_BACKREF1, + &&L_BACKREF2, + &&L_BACKREF_N, + &&L_BACKREF_N_IC, + &&L_BACKREF_MULTI, + &&L_BACKREF_MULTI_IC, + &&L_BACKREF_WITH_LEVEL, + &&L_BACKREF_CHECK, + &&L_BACKREF_CHECK_WITH_LEVEL, + &&L_MEMORY_START, + &&L_MEMORY_START_PUSH, + &&L_MEMORY_END_PUSH, + &&L_MEMORY_END_PUSH_REC, + &&L_MEMORY_END, + &&L_MEMORY_END_REC, + &&L_FAIL, + &&L_JUMP, + &&L_PUSH, + &&L_PUSH_SUPER, + &&L_POP_OUT, + &&L_PUSH_OR_JUMP_EXACT1, + &&L_PUSH_IF_PEEK_NEXT, + &&L_REPEAT, + &&L_REPEAT_NG, + &&L_REPEAT_INC, + &&L_REPEAT_INC_NG, + &&L_REPEAT_INC_SG, + &&L_REPEAT_INC_NG_SG, + &&L_EMPTY_CHECK_START, + &&L_EMPTY_CHECK_END, + &&L_EMPTY_CHECK_END_MEMST, + &&L_EMPTY_CHECK_END_MEMST_PUSH, + &&L_PREC_READ_START, + &&L_PREC_READ_END, + &&L_PREC_READ_NOT_START, + &&L_PREC_READ_NOT_END, + &&L_ATOMIC_START, + &&L_ATOMIC_END, + &&L_LOOK_BEHIND, + &&L_LOOK_BEHIND_NOT_START, + &&L_LOOK_BEHIND_NOT_END, + &&L_CALL, + &&L_RETURN, + &&L_PUSH_SAVE_VAL, + &&L_UPDATE_VAR, +#ifdef USE_CALLOUT + &&L_CALLOUT_CONTENTS, + &&L_CALLOUT_NAME, +#endif + }; +#endif + int i, n, num_mem, best_len, pop_level; LengthType tlen, tlen2; MemNumType mem; @@ -2374,6 +2548,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, OnigEncoding encode = reg->enc; OnigCaseFoldType case_fold_flag = reg->case_fold_flag; +#ifdef ONIG_DEBUG_MATCH + static unsigned int counter = 1; +#endif + #ifdef USE_CALLOUT msa->mp->match_at_call_counter++; #endif @@ -2406,40 +2584,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, retry_in_match_counter = 0; #endif - while (1) { -#ifdef ONIG_DEBUG_MATCH - { - static unsigned int counter = 1; - - UChar *q, *bp, buf[50]; - int len; - fprintf(stderr, "%7u: %7ld: %4d> \"", - counter, GET_STACK_INDEX(stk), (int )(s - str)); - counter++; - - bp = buf; - for (i = 0, q = s; i < 7 && q < end; i++) { - len = enclen(encode, q); - while (len-- > 0) *bp++ = *q++; - } - if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } - else { xmemcpy(bp, "\"", 1); bp += 1; } - *bp = 0; - fputs((char* )buf, stderr); - - for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); - if (p == FinishCode) - fprintf(stderr, "----: "); - else - fprintf(stderr, "%4d: ", (int )(p - reg->p)); - onig_print_compiled_byte_code(stderr, p, NULL, reg->p, encode); - fprintf(stderr, "\n"); - } -#endif - - sbegin = s; - switch (*p++) { - case OP_END: SOP_IN(OP_END); + BYTECODE_INTERPRETER_START { + CASE_OP(END) n = (int )(s - sstart); if (n > best_len) { OnigRegion* region; @@ -2551,16 +2697,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, /* default behavior: return first-matching result. */ goto finish; - break; - case OP_EXACT1: SOP_IN(OP_EXACT1); + CASE_OP(EXACT1) DATA_ENSURE(1); if (*p != *s) goto fail; p++; s++; - SOP_OUT; - break; + NEXT_OUT; - case OP_EXACT1_IC: SOP_IN(OP_EXACT1_IC); + CASE_OP(EXACT1_IC) { int len; UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -2579,21 +2723,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; q++; } } - SOP_OUT; - break; + NEXT_OUT; - case OP_EXACT2: SOP_IN(OP_EXACT2); + CASE_OP(EXACT2) DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; sprev = s; p++; s++; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACT3: SOP_IN(OP_EXACT3); + CASE_OP(EXACT3) DATA_ENSURE(3); if (*p != *s) goto fail; p++; s++; @@ -2602,11 +2743,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACT4: SOP_IN(OP_EXACT4); + CASE_OP(EXACT4) DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -2617,11 +2756,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACT5: SOP_IN(OP_EXACT5); + CASE_OP(EXACT5) DATA_ENSURE(5); if (*p != *s) goto fail; p++; s++; @@ -2634,22 +2771,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACTN: SOP_IN(OP_EXACTN); + CASE_OP(EXACTN) GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen); while (tlen-- > 0) { if (*p++ != *s++) goto fail; } sprev = s - 1; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACTN_IC: SOP_IN(OP_EXACTN_IC); + CASE_OP(EXACTN_IC) { int len; UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -2673,20 +2806,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACTMB2N1: SOP_IN(OP_EXACTMB2N1); + CASE_OP(EXACTMB2N1) DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; - SOP_OUT; - break; + NEXT_OUT; - case OP_EXACTMB2N2: SOP_IN(OP_EXACTMB2N2); + CASE_OP(EXACTMB2N2) DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -2697,11 +2827,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; if (*p != *s) goto fail; p++; s++; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACTMB2N3: SOP_IN(OP_EXACTMB2N3); + CASE_OP(EXACTMB2N3) DATA_ENSURE(6); if (*p != *s) goto fail; p++; s++; @@ -2716,11 +2844,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; if (*p != *s) goto fail; p++; s++; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACTMB2N: SOP_IN(OP_EXACTMB2N); + CASE_OP(EXACTMB2N) GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 2); while (tlen-- > 0) { @@ -2730,11 +2856,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - 2; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACTMB3N: SOP_IN(OP_EXACTMB3N); + CASE_OP(EXACTMB3N) GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 3); while (tlen-- > 0) { @@ -2746,11 +2870,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - 3; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACTMBN: SOP_IN(OP_EXACTMBN); + CASE_OP(EXACTMBN) GET_LENGTH_INC(tlen, p); /* mb-len */ GET_LENGTH_INC(tlen2, p); /* string len */ tlen2 *= tlen; @@ -2760,19 +2882,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - tlen; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_CCLASS: SOP_IN(OP_CCLASS); + CASE_OP(CCLASS) DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; p += SIZE_BITSET; s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ - SOP_OUT; - break; + NEXT_OUT; - case OP_CCLASS_MB: SOP_IN(OP_CCLASS_MB); + CASE_OP(CCLASS_MB) if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; cclass_mb: @@ -2798,10 +2917,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif } p += tlen; - SOP_OUT; - break; + NEXT_OUT; - case OP_CCLASS_MIX: SOP_IN(OP_CCLASS_MIX); + CASE_OP(CCLASS_MIX) DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; @@ -2816,18 +2934,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; s++; } - SOP_OUT; - break; + NEXT_OUT; - case OP_CCLASS_NOT: SOP_IN(OP_CCLASS_NOT); + CASE_OP(CCLASS_NOT) DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; p += SIZE_BITSET; s += enclen(encode, s); - SOP_OUT; - break; + NEXT_OUT; - case OP_CCLASS_MB_NOT: SOP_IN(OP_CCLASS_MB_NOT); + CASE_OP(CCLASS_MB_NOT) DATA_ENSURE(1); if (! ONIGENC_IS_MBC_HEAD(encode, s)) { s++; @@ -2865,10 +2981,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; cc_mb_not_success: - SOP_OUT; - break; + NEXT_OUT; - case OP_CCLASS_MIX_NOT: SOP_IN(OP_CCLASS_MIX_NOT); + CASE_OP(CCLASS_MIX_NOT) DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; @@ -2883,11 +2998,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; s++; } - SOP_OUT; - break; + NEXT_OUT; #ifdef USE_OP_CCLASS_NODE - case OP_CCLASS_NODE: SOP_IN(OP_CCLASS_NODE); + CASE_OP(CCLASS_NODE) { OnigCodePoint code; void *node; @@ -2903,28 +3017,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, code = ONIGENC_MBC_TO_CODE(encode, ss, s); if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; } - SOP_OUT; - break; + NEXT_OUT; #endif - case OP_ANYCHAR: SOP_IN(OP_ANYCHAR); + CASE_OP(ANYCHAR) DATA_ENSURE(1); n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; - SOP_OUT; - break; + NEXT_OUT; - case OP_ANYCHAR_ML: SOP_IN(OP_ANYCHAR_ML); + CASE_OP(ANYCHAR_ML) DATA_ENSURE(1); n = enclen(encode, s); DATA_ENSURE(n); s += n; - SOP_OUT; - break; + NEXT_OUT; - case OP_ANYCHAR_STAR: SOP_IN(OP_ANYCHAR_STAR); + CASE_OP(ANYCHAR_STAR) while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); n = enclen(encode, s); @@ -2933,11 +3044,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, sprev = s; s += n; } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_ANYCHAR_ML_STAR: SOP_IN(OP_ANYCHAR_ML_STAR); + CASE_OP(ANYCHAR_ML_STAR) while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); n = enclen(encode, s); @@ -2951,11 +3060,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s++; } } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_ANYCHAR_STAR_PEEK_NEXT: SOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); + CASE_OP(ANYCHAR_STAR_PEEK_NEXT) while (DATA_ENSURE_CHECK1) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); @@ -2967,10 +3074,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += n; } p++; - SOP_OUT; - break; + NEXT_OUT; - case OP_ANYCHAR_ML_STAR_PEEK_NEXT:SOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); + CASE_OP(ANYCHAR_ML_STAR_PEEK_NEXT) while (DATA_ENSURE_CHECK1) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); @@ -2987,46 +3093,41 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } p++; - SOP_OUT; - break; + NEXT_OUT; - case OP_WORD: SOP_IN(OP_WORD); + CASE_OP(WORD) DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; s += enclen(encode, s); - SOP_OUT; - break; + NEXT_OUT; - case OP_WORD_ASCII: SOP_IN(OP_WORD_ASCII); + CASE_OP(WORD_ASCII) DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end)) goto fail; s += enclen(encode, s); - SOP_OUT; - break; + NEXT_OUT; - case OP_NO_WORD: SOP_IN(OP_NO_WORD); + CASE_OP(NO_WORD) DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; s += enclen(encode, s); - SOP_OUT; - break; + NEXT_OUT; - case OP_NO_WORD_ASCII: SOP_IN(OP_NO_WORD_ASCII); + CASE_OP(NO_WORD_ASCII) DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end)) goto fail; s += enclen(encode, s); - SOP_OUT; - break; + NEXT_OUT; - case OP_WORD_BOUNDARY: SOP_IN(OP_WORD_BOUNDARY); + CASE_OP(WORD_BOUNDARY) { ModeType mode; GET_MODE_INC(mode, p); /* ascii_mode */ @@ -3046,11 +3147,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; } } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_NO_WORD_BOUNDARY: SOP_IN(OP_NO_WORD_BOUNDARY); + CASE_OP(NO_WORD_BOUNDARY) { ModeType mode; GET_MODE_INC(mode, p); /* ascii_mode */ @@ -3069,189 +3168,150 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; } } - SOP_OUT; - continue; - break; + JUMP_OUT; #ifdef USE_WORD_BEGIN_END - case OP_WORD_BEGIN: SOP_IN(OP_WORD_BEGIN); + CASE_OP(WORD_BEGIN) { ModeType mode; GET_MODE_INC(mode, p); /* ascii_mode */ if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { - SOP_OUT; - continue; + JUMP_OUT; } } } goto fail; - break; - case OP_WORD_END: SOP_IN(OP_WORD_END); + CASE_OP(WORD_END) { ModeType mode; GET_MODE_INC(mode, p); /* ascii_mode */ if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { - SOP_OUT; - continue; + JUMP_OUT; } } } goto fail; - break; #endif - case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: - SOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); + CASE_OP(EXTENDED_GRAPHEME_CLUSTER_BOUNDARY) if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) { - SOP_OUT; - continue; + JUMP_OUT; } goto fail; - break; - case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: - SOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); + CASE_OP(NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY) if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) goto fail; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_BEGIN_BUF: SOP_IN(OP_BEGIN_BUF); + CASE_OP(BEGIN_BUF) if (! ON_STR_BEGIN(s)) goto fail; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_END_BUF: SOP_IN(OP_END_BUF); + CASE_OP(END_BUF) if (! ON_STR_END(s)) goto fail; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_BEGIN_LINE: SOP_IN(OP_BEGIN_LINE); + CASE_OP(BEGIN_LINE) if (ON_STR_BEGIN(s)) { if (IS_NOTBOL(msa->options)) goto fail; - SOP_OUT; - continue; + JUMP_OUT; } else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { - SOP_OUT; - continue; + JUMP_OUT; } goto fail; - break; - case OP_END_LINE: SOP_IN(OP_END_LINE); + CASE_OP(END_LINE) if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (IS_NOTEOL(msa->options)) goto fail; - SOP_OUT; - continue; + JUMP_OUT; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE } #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { - SOP_OUT; - continue; + JUMP_OUT; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - SOP_OUT; - continue; + JUMP_OUT; } #endif goto fail; - break; - case OP_SEMI_END_BUF: SOP_IN(OP_SEMI_END_BUF); + CASE_OP(SEMI_END_BUF) if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (IS_NOTEOL(msa->options)) goto fail; - SOP_OUT; - continue; + JUMP_OUT; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE } #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && ON_STR_END(s + enclen(encode, s))) { - SOP_OUT; - continue; + JUMP_OUT; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { UChar* ss = s + enclen(encode, s); ss += enclen(encode, ss); if (ON_STR_END(ss)) { - SOP_OUT; - continue; + JUMP_OUT; } } #endif goto fail; - break; - case OP_BEGIN_POSITION: SOP_IN(OP_BEGIN_POSITION); + CASE_OP(BEGIN_POSITION) if (s != msa->start) goto fail; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_MEMORY_START_PUSH: SOP_IN(OP_MEMORY_START_PUSH); + CASE_OP(MEMORY_START_PUSH) GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_START(mem, s); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_MEMORY_START: SOP_IN(OP_MEMORY_START); + CASE_OP(MEMORY_START) GET_MEMNUM_INC(mem, p); mem_start_stk[mem] = (StackIndex )((void* )s); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_MEMORY_END_PUSH: SOP_IN(OP_MEMORY_END_PUSH); + CASE_OP(MEMORY_END_PUSH) GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_END(mem, s); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_MEMORY_END: SOP_IN(OP_MEMORY_END); + CASE_OP(MEMORY_END) GET_MEMNUM_INC(mem, p); mem_end_stk[mem] = (StackIndex )((void* )s); - SOP_OUT; - continue; - break; + JUMP_OUT; #ifdef USE_CALL - case OP_MEMORY_END_PUSH_REC: SOP_IN(OP_MEMORY_END_PUSH_REC); + CASE_OP(MEMORY_END_PUSH_REC) GET_MEMNUM_INC(mem, p); STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ STACK_PUSH_MEM_END(mem, s); mem_start_stk[mem] = GET_STACK_INDEX(stkp); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_MEMORY_END_REC: SOP_IN(OP_MEMORY_END_REC); + CASE_OP(MEMORY_END_REC) GET_MEMNUM_INC(mem, p); mem_end_stk[mem] = (StackIndex )((void* )s); STACK_GET_MEM_START(mem, stkp); @@ -3262,22 +3322,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); STACK_PUSH_MEM_END_MARK(mem); - SOP_OUT; - continue; - break; + JUMP_OUT; #endif - case OP_BACKREF1: SOP_IN(OP_BACKREF1); + CASE_OP(BACKREF1) mem = 1; goto backref; - break; - case OP_BACKREF2: SOP_IN(OP_BACKREF2); + CASE_OP(BACKREF2) mem = 2; goto backref; - break; - case OP_BACKREF_N: SOP_IN(OP_BACKREF_N); + CASE_OP(BACKREF_N) GET_MEMNUM_INC(mem, p); backref: { @@ -3301,13 +3357,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STRING_CMP(pstart, s, n); while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; - - SOP_OUT; - continue; } - break; + JUMP_OUT; - case OP_BACKREF_N_IC: SOP_IN(OP_BACKREF_N_IC); + CASE_OP(BACKREF_N_IC) GET_MEMNUM_INC(mem, p); { int len; @@ -3330,13 +3383,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STRING_CMP_IC(case_fold_flag, pstart, &s, n); while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; - - SOP_OUT; - continue; } - break; + JUMP_OUT; - case OP_BACKREF_MULTI: SOP_IN(OP_BACKREF_MULTI); + CASE_OP(BACKREF_MULTI) { int len, is_fail; UChar *pstart, *pend, *swork; @@ -3370,12 +3420,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - SOP_OUT; - continue; } - break; + JUMP_OUT; - case OP_BACKREF_MULTI_IC: SOP_IN(OP_BACKREF_MULTI_IC); + CASE_OP(BACKREF_MULTI_IC) { int len, is_fail; UChar *pstart, *pend, *swork; @@ -3409,13 +3457,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - SOP_OUT; - continue; } - break; + JUMP_OUT; #ifdef USE_BACKREF_WITH_LEVEL - case OP_BACKREF_WITH_LEVEL: + CASE_OP(BACKREF_WITH_LEVEL) { int len; OnigOptionType ic; @@ -3436,14 +3482,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else goto fail; - - SOP_OUT; - continue; } - break; + JUMP_OUT; #endif - case OP_BACKREF_CHECK: SOP_IN(OP_BACKREF_CHECK); + CASE_OP(BACKREF_CHECK) { GET_LENGTH_INC(tlen, p); for (i = 0; i < tlen; i++) { @@ -3456,13 +3499,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - SOP_OUT; - continue; } - break; + JUMP_OUT; #ifdef USE_BACKREF_WITH_LEVEL - case OP_BACKREF_CHECK_WITH_LEVEL: + CASE_OP(BACKREF_CHECK_WITH_LEVEL) { LengthType level; @@ -3475,21 +3516,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else goto fail; - - SOP_OUT; - continue; } - break; + JUMP_OUT; #endif - case OP_EMPTY_CHECK_START: SOP_IN(OP_EMPTY_CHECK_START); + CASE_OP(EMPTY_CHECK_START) GET_MEMNUM_INC(mem, p); /* mem: null check id */ STACK_PUSH_EMPTY_CHECK_START(mem, s); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EMPTY_CHECK_END: SOP_IN(OP_EMPTY_CHECK_END); + CASE_OP(EMPTY_CHECK_END) { int is_empty; @@ -3518,12 +3554,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } } - SOP_OUT; - continue; - break; + JUMP_OUT; #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT - case OP_EMPTY_CHECK_END_MEMST: SOP_IN(OP_EMPTY_CHECK_END_MEMST); + CASE_OP(EMPTY_CHECK_END_MEMST) { int is_empty; @@ -3537,14 +3571,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto empty_check_found; } } - SOP_OUT; - continue; - break; + JUMP_OUT; #endif #ifdef USE_CALL - case OP_EMPTY_CHECK_END_MEMST_PUSH: - SOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH); + CASE_OP(EMPTY_CHECK_END_MEMST_PUSH) { int is_empty; @@ -3566,68 +3597,51 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_EMPTY_CHECK_END(mem); } } - SOP_OUT; - continue; - break; + JUMP_OUT; #endif - case OP_JUMP: SOP_IN(OP_JUMP); + CASE_OP(JUMP) GET_RELADDR_INC(addr, p); p += addr; - SOP_OUT; - CHECK_INTERRUPT_IN_MATCH; - continue; - break; + CHECK_INTERRUPT_JUMP_OUT; - case OP_PUSH: SOP_IN(OP_PUSH); + CASE_OP(PUSH) GET_RELADDR_INC(addr, p); STACK_PUSH_ALT(p + addr, s, sprev); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_PUSH_SUPER: SOP_IN(OP_PUSH_SUPER); + CASE_OP(PUSH_SUPER) GET_RELADDR_INC(addr, p); STACK_PUSH_SUPER_ALT(p + addr, s, sprev); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_POP_OUT: SOP_IN(OP_POP_OUT); + CASE_OP(POP_OUT) STACK_POP_ONE; /* for stop backtrack */ /* CHECK_RETRY_LIMIT_IN_MATCH; */ - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_PUSH_OR_JUMP_EXACT1: SOP_IN(OP_PUSH_OR_JUMP_EXACT1); + CASE_OP(PUSH_OR_JUMP_EXACT1) GET_RELADDR_INC(addr, p); if (*p == *s && DATA_ENSURE_CHECK1) { p++; STACK_PUSH_ALT(p + addr, s, sprev); - SOP_OUT; - continue; + JUMP_OUT; } p += (addr + 1); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_PUSH_IF_PEEK_NEXT: SOP_IN(OP_PUSH_IF_PEEK_NEXT); + CASE_OP(PUSH_IF_PEEK_NEXT) GET_RELADDR_INC(addr, p); if (*p == *s) { p++; STACK_PUSH_ALT(p + addr, s, sprev); - SOP_OUT; - continue; + JUMP_OUT; } p++; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_REPEAT: SOP_IN(OP_REPEAT); + CASE_OP(REPEAT) { GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ GET_RELADDR_INC(addr, p); @@ -3640,11 +3654,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_ALT(p + addr, s, sprev); } } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_REPEAT_NG: SOP_IN(OP_REPEAT_NG); + CASE_OP(REPEAT_NG) { GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ GET_RELADDR_INC(addr, p); @@ -3658,11 +3670,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += addr; } } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_REPEAT_INC: SOP_IN(OP_REPEAT_INC); + CASE_OP(REPEAT_INC) GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -3680,19 +3690,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p = stkp->u.repeat.pcode; } STACK_PUSH_REPEAT_INC(si); - SOP_OUT; - CHECK_INTERRUPT_IN_MATCH; - continue; - break; + CHECK_INTERRUPT_JUMP_OUT; - case OP_REPEAT_INC_SG: SOP_IN(OP_REPEAT_INC_SG); + CASE_OP(REPEAT_INC_SG) GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc; - break; - case OP_REPEAT_INC_NG: SOP_IN(OP_REPEAT_INC_NG); + CASE_OP(REPEAT_INC_NG) GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -3714,68 +3720,51 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { STACK_PUSH_REPEAT_INC(si); } - SOP_OUT; - CHECK_INTERRUPT_IN_MATCH; - continue; - break; + CHECK_INTERRUPT_JUMP_OUT; - case OP_REPEAT_INC_NG_SG: SOP_IN(OP_REPEAT_INC_NG_SG); + CASE_OP(REPEAT_INC_NG_SG) GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc_ng; - break; - case OP_PREC_READ_START: SOP_IN(OP_PREC_READ_START); + CASE_OP(PREC_READ_START) STACK_PUSH_POS(s, sprev); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_PREC_READ_END: SOP_IN(OP_PREC_READ_END); + CASE_OP(PREC_READ_END) { STACK_EXEC_TO_VOID(stkp); s = stkp->u.state.pstr; sprev = stkp->u.state.pstr_prev; } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_PREC_READ_NOT_START: SOP_IN(OP_PREC_READ_NOT_START); + CASE_OP(PREC_READ_NOT_START) GET_RELADDR_INC(addr, p); STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_PREC_READ_NOT_END: SOP_IN(OP_PREC_READ_NOT_END); + CASE_OP(PREC_READ_NOT_END) STACK_POP_TIL_ALT_PREC_READ_NOT; goto fail; - break; - case OP_ATOMIC_START: SOP_IN(OP_ATOMIC_START); + CASE_OP(ATOMIC_START) STACK_PUSH_TO_VOID_START; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_ATOMIC_END: SOP_IN(OP_ATOMIC_END); + CASE_OP(ATOMIC_END) STACK_EXEC_TO_VOID(stkp); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_LOOK_BEHIND: SOP_IN(OP_LOOK_BEHIND); + CASE_OP(LOOK_BEHIND) GET_LENGTH_INC(tlen, p); s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(s)) goto fail; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_LOOK_BEHIND_NOT_START: SOP_IN(OP_LOOK_BEHIND_NOT_START); + CASE_OP(LOOK_BEHIND_NOT_START) GET_RELADDR_INC(addr, p); GET_LENGTH_INC(tlen, p); q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); @@ -3790,33 +3779,26 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s = q; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_LOOK_BEHIND_NOT_END: SOP_IN(OP_LOOK_BEHIND_NOT_END); + CASE_OP(LOOK_BEHIND_NOT_END) STACK_POP_TIL_ALT_LOOK_BEHIND_NOT; goto fail; - break; #ifdef USE_CALL - case OP_CALL: SOP_IN(OP_CALL); + CASE_OP(CALL) GET_ABSADDR_INC(addr, p); STACK_PUSH_CALL_FRAME(p); p = reg->p + addr; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_RETURN: SOP_IN(OP_RETURN); + CASE_OP(RETURN) STACK_RETURN(p); STACK_PUSH_RETURN; - SOP_OUT; - continue; - break; + JUMP_OUT; #endif - case OP_PUSH_SAVE_VAL: SOP_IN(OP_PUSH_SAVE_VAL); + CASE_OP(PUSH_SAVE_VAL) { SaveType type; GET_SAVE_TYPE_INC(type, p); @@ -3835,11 +3817,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; } } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_UPDATE_VAR: SOP_IN(OP_UPDATE_VAR); + CASE_OP(UPDATE_VAR) { UpdateVarType type; enum SaveType save_type; @@ -3867,20 +3847,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; } } - SOP_OUT; - continue; - break; + JUMP_OUT; #ifdef USE_CALLOUT - case OP_CALLOUT_CONTENTS: SOP_IN(OP_CALLOUT_CONTENTS); + CASE_OP(CALLOUT_CONTENTS) of = ONIG_CALLOUT_OF_CONTENTS; goto callout_common_entry; + BREAK_OUT; - SOP_OUT; - continue; - break; - - case OP_CALLOUT_NAME: SOP_IN(OP_CALLOUT_NAME); + CASE_OP(CALLOUT_NAME) { int call_result; int name_id; @@ -3941,34 +3916,34 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } } - SOP_OUT; - continue; - break; + JUMP_OUT; #endif - case OP_FINISH: + CASE_OP(FINISH) goto finish; - break; +#ifdef ONIG_DEBUG_STATISTICS fail: SOP_OUT; - /* fall */ - case OP_FAIL: SOP_IN(OP_FAIL); + goto fail2; +#endif + CASE_OP(FAIL) +#ifdef ONIG_DEBUG_STATISTICS + fail2: +#else + fail: +#endif STACK_POP; p = stk->u.state.pcode; s = stk->u.state.pstr; sprev = stk->u.state.pstr_prev; CHECK_RETRY_LIMIT_IN_MATCH; - SOP_OUT; - continue; - break; + JUMP_OUT; - default: + DEFAULT_OP goto bytecode_error; - } /* end of switch */ - sprev = sbegin; - } /* end of while(1) */ + } BYTECODE_INTERPRETER_END; finish: STACK_SAVE; @@ -4130,150 +4105,143 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, return (UChar* )NULL; } + static UChar* -bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) +sunday_quick_search_step_forward(regex_t* reg, + const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, + const UChar* text_range) { const UChar *s, *se, *t, *p, *end; const UChar *tail; int skip, tlen1; + int map_offset; + OnigEncoding enc; #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_notrev: text: %p, text_end: %p, text_range: %p\n", - text, text_end, text_range); + fprintf(stderr, + "sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range); #endif + enc = reg->enc; + tail = target_end - 1; tlen1 = (int )(tail - target); end = text_range; if (end + tlen1 > text_end) end = text_end - tlen1; + map_offset = reg->map_offset; s = text; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - skip = reg->map[*se]; - t = s; - do { - s += enclen(reg->enc, s); - } while ((s - t) < skip && s < end); - } - } - else { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - skip = reg->int_map[*se]; - t = s; - do { - s += enclen(reg->enc, s); - } while ((s - t) < skip && s < end); + while (s < end) { + p = se = s + tlen1; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )s; + p--; t--; } + if (se + map_offset >= text_end) break; + skip = reg->map[*(se + map_offset)]; +#if 0 + t = s; + do { + s += enclen(enc, s); + } while ((s - t) < skip && s < end); +#else + s += skip; + if (s < end) + s = onigenc_get_right_adjust_char_head(enc, text, s); +#endif } return (UChar* )NULL; } static UChar* -bm_search(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) +sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, + const UChar* text_range) { const UChar *s, *t, *p, *end; const UChar *tail; + int map_offset; - end = text_range + (target_end - target) - 1; + end = text_range + (target_end - target); if (end > text_end) end = text_end; + map_offset = reg->map_offset; tail = target_end - 1; - s = text + (target_end - target) - 1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - s += reg->map[*s]; - } - } - else { /* see int_map[] */ - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - s += reg->int_map[*s]; + s = text + (tail - target); + + while (s < end) { + p = s; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )p; + p--; t--; } + if (s + map_offset >= text_end) break; + s += reg->map[*(s + map_offset)]; } + return (UChar* )NULL; } -#ifdef USE_INT_MAP_BACKWARD -static int -set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, int** skip) +static UChar* +sunday_quick_search_case_fold(regex_t* reg, + const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, + const UChar* text_range) { - int i, len; - - if (IS_NULL(*skip)) { - *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*skip)) return ONIGERR_MEMORY; - } - - len = end - s; - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - (*skip)[i] = len; + const UChar *s, *se, *end; + const UChar *tail; + int skip, tlen1; + int map_offset; + int case_fold_flag; + OnigEncoding enc; - for (i = len - 1; i > 0; i--) - (*skip)[s[i]] = i; +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, + "sunday_quick_search_case_fold: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range); +#endif - return 0; -} + enc = reg->enc; + case_fold_flag = reg->case_fold_flag; -static UChar* -bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) -{ - const UChar *s, *t, *p; + tail = target_end - 1; + tlen1 = (int )(tail - target); + end = text_range; + if (end + tlen1 > text_end) + end = text_end - tlen1; - s = text_end - (target_end - target); - if (text_start < s) - s = text_start; - else - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); + map_offset = reg->map_offset; + s = text; - while (s >= text) { - p = s; - t = target; - while (t < target_end && *p == *t) { - p++; t++; - } - if (t == target_end) + while (s < end) { + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + s, text_end)) return (UChar* )s; - s -= reg->int_map_backward[*s]; - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); + se = s + tlen1; + if (se + map_offset >= text_end) break; + skip = reg->map[*(se + map_offset)]; +#if 0 + p = s; + do { + s += enclen(enc, s); + } while ((s - p) < skip && s < end); +#else + /* This is faster than prev code for long text. ex: /(?i)Twain/ */ + s += skip; + if (s < end) + s = onigenc_get_right_adjust_char_head(enc, text, s); +#endif } return (UChar* )NULL; } -#endif static UChar* map_search(OnigEncoding enc, UChar map[], @@ -4380,20 +4348,26 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, retry: switch (reg->optimize) { - case OPTIMIZE_EXACT: + case OPTIMIZE_STR: p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); break; - case OPTIMIZE_EXACT_IC: + case OPTIMIZE_STR_CASE_FOLD: p = slow_search_ic(reg->enc, reg->case_fold_flag, reg->exact, reg->exact_end, p, end, range); break; - case OPTIMIZE_EXACT_BM: - p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); + case OPTIMIZE_STR_CASE_FOLD_FAST: + p = sunday_quick_search_case_fold(reg, reg->exact, reg->exact_end, p, end, + range); + break; + + case OPTIMIZE_STR_FAST: + p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range); break; - case OPTIMIZE_EXACT_BM_NO_REV: - p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); + case OPTIMIZE_STR_FAST_STEP_FORWARD: + p = sunday_quick_search_step_forward(reg, reg->exact, reg->exact_end, + p, end, range); break; case OPTIMIZE_MAP: @@ -4413,7 +4387,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, UChar* prev; switch (reg->sub_anchor) { - case ANCHOR_BEGIN_LINE: + case ANCR_BEGIN_LINE: if (!ON_STR_BEGIN(p)) { prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p); @@ -4422,7 +4396,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } break; - case ANCHOR_END_LINE: + case ANCR_END_LINE: if (ON_STR_END(p)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE prev = (UChar* )onigenc_get_prev_char_head(reg->enc, @@ -4490,8 +4464,6 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } -#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 - static int backward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, const UChar* range, UChar* adjrange, @@ -4499,41 +4471,29 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, { UChar *p; + if (range == 0) goto fail; + range += reg->dmin; p = s; retry: switch (reg->optimize) { - case OPTIMIZE_EXACT: + case OPTIMIZE_STR: exact_method: p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, range, adjrange, end, p); break; - case OPTIMIZE_EXACT_IC: + case OPTIMIZE_STR_CASE_FOLD: + case OPTIMIZE_STR_CASE_FOLD_FAST: p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, reg->exact, reg->exact_end, range, adjrange, end, p); break; - case OPTIMIZE_EXACT_BM: - case OPTIMIZE_EXACT_BM_NO_REV: -#ifdef USE_INT_MAP_BACKWARD - if (IS_NULL(reg->int_map_backward)) { - int r; - - if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) - goto exact_method; - - r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, - &(reg->int_map_backward)); - if (r != 0) return r; - } - p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, - end, p); -#else + case OPTIMIZE_STR_FAST: + case OPTIMIZE_STR_FAST_STEP_FORWARD: goto exact_method; -#endif break; case OPTIMIZE_MAP: @@ -4546,17 +4506,17 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* prev; switch (reg->sub_anchor) { - case ANCHOR_BEGIN_LINE: + case ANCR_BEGIN_LINE: if (!ON_STR_BEGIN(p)) { prev = onigenc_get_prev_char_head(reg->enc, str, p); - if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + if (IS_NOT_NULL(prev) && !ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { p = prev; goto retry; } } break; - case ANCHOR_END_LINE: + case ANCR_END_LINE: if (ON_STR_END(p)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); @@ -4682,7 +4642,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, if (reg->anchor != 0 && str < end) { UChar *min_semi_end, *max_semi_end; - if (reg->anchor & ANCHOR_BEGIN_POSITION) { + if (reg->anchor & ANCR_BEGIN_POSITION) { /* search start-position only */ begin_position: if (range > start) @@ -4690,7 +4650,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, else range = start; } - else if (reg->anchor & ANCHOR_BEGIN_BUF) { + else if (reg->anchor & ANCR_BEGIN_BUF) { /* search str-position only */ if (range > start) { if (start != str) goto mismatch_no_msa; @@ -4705,7 +4665,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, goto mismatch_no_msa; } } - else if (reg->anchor & ANCHOR_END_BUF) { + else if (reg->anchor & ANCR_END_BUF) { min_semi_end = max_semi_end = (UChar* )end; end_buf: @@ -4737,7 +4697,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, if (range > start) goto mismatch_no_msa; } } - else if (reg->anchor & ANCHOR_SEMI_END_BUF) { + else if (reg->anchor & ANCR_SEMI_END_BUF) { UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1); max_semi_end = (UChar* )end; @@ -4760,7 +4720,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, goto end_buf; } } - else if ((reg->anchor & ANCHOR_ANYCHAR_INF_ML)) { + else if ((reg->anchor & ANCR_ANYCHAR_INF_ML)) { goto begin_position; } } @@ -4833,13 +4793,13 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, if (! forward_search_range(reg, str, end, s, sch_range, &low, &high, (UChar** )NULL)) goto mismatch; - if ((reg->anchor & ANCHOR_ANYCHAR_INF) != 0) { + if ((reg->anchor & ANCR_ANYCHAR_INF) != 0) { do { MATCH_AND_RETURN_CHECK(orig_range); prev = s; s += enclen(reg->enc, s); - if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) { + if ((reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) { while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { prev = s; s += enclen(reg->enc, s); @@ -4862,6 +4822,8 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, } } else { /* backward search */ + if (range < str) goto mismatch; + if (orig_start < end) orig_start += enclen(reg->enc, orig_start); /* is upper range */ diff --git a/src/regint.h b/src/regint.h index c3d1ee1..d6aec9d 100644 --- a/src/regint.h +++ b/src/regint.h @@ -62,7 +62,6 @@ #define USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR - #define USE_RETRY_LIMIT_IN_MATCH /* internal config */ @@ -70,27 +69,13 @@ #define USE_QUANT_PEEK_NEXT #define USE_ST_LIBRARY -#include "regenc.h" - -#ifdef __cplusplus -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */ -#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4 -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -#ifdef HAVE_STDARG_H -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif +#define USE_WORD_BEGIN_END /* "\<", "\>" */ +#define USE_CAPTURE_HISTORY +#define USE_VARIABLE_META_CHARS +#define USE_POSIX_API_REGION_OPTION +#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#include "regenc.h" #define INIT_MATCH_STACK_SIZE 160 #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ @@ -103,12 +88,6 @@ #undef ONIG_ESCAPE_UCHAR_COLLISION #endif -#define USE_WORD_BEGIN_END /* "\<", "\>" */ -#define USE_CAPTURE_HISTORY -#define USE_VARIABLE_META_CHARS -#define USE_POSIX_API_REGION_OPTION -#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - #define xmalloc malloc #define xrealloc realloc #define xcalloc calloc @@ -152,14 +131,8 @@ #include <stddef.h> - -#ifdef HAVE_LIMITS_H #include <limits.h> -#endif - -#ifdef HAVE_STDLIB_H #include <stdlib.h> -#endif #ifdef HAVE_STDINT_H #include <stdint.h> @@ -169,11 +142,7 @@ #include <alloca.h> #endif -#ifdef HAVE_STRING_H -# include <string.h> -#else -# include <strings.h> -#endif +#include <string.h> #include <ctype.h> #ifdef HAVE_SYS_TYPES_H @@ -217,6 +186,7 @@ typedef unsigned int uintptr_t; #define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY #define NULL_UCHARP ((UChar* )0) +#define CHAR_MAP_SIZE 256 #define INFINITE_LEN ONIG_INFINITE_DISTANCE #ifdef PLATFORM_UNALIGNED_WORD_ACCESS @@ -292,9 +262,6 @@ typedef struct { #endif } RegexExt; -#define REG_EXTP(reg) ((RegexExt* )((reg)->chain)) -#define REG_EXTPL(reg) ((reg)->chain) - struct re_pattern_buffer { /* common members of BBuf(bytes-buffer) */ unsigned char* p; /* compiled pattern */ @@ -304,7 +271,6 @@ struct re_pattern_buffer { int num_mem; /* used memory(...) num counted from 1 */ int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */ - int num_comb_exp_check; /* no longer used (combination explosion check) */ int num_call; /* number of subexp call */ unsigned int capture_history; /* (?@...) flag (1-31) */ unsigned int bt_mem_start; /* need backtrack flag */ @@ -323,19 +289,16 @@ struct re_pattern_buffer { int optimize; /* optimize flag */ int threshold_len; /* search str-length for apply optimize */ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ - OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */ - OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */ + OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */ + OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */ int sub_anchor; /* start-anchor for exact or map */ unsigned char *exact; unsigned char *exact_end; - unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ - int *int_map; /* BM skip for exact_len > 255 */ - int *int_map_backward; /* BM skip for backward search */ - OnigLen dmin; /* min-distance of exact or map */ - OnigLen dmax; /* max-distance of exact or map */ - - /* regex_t link chain */ - struct re_pattern_buffer* chain; /* escape compile-conflict */ + unsigned char map[CHAR_MAP_SIZE]; /* used as BMH skip or char-map */ + int map_offset; + OnigLen dmin; /* min-distance of exact or map */ + OnigLen dmax; /* max-distance of exact or map */ + RegexExt* extp; }; @@ -348,12 +311,13 @@ enum StackPopLevel { /* optimize flags */ enum OptimizeType { - OPTIMIZE_NONE = 0, - OPTIMIZE_EXACT = 1, /* Slow Search */ - OPTIMIZE_EXACT_BM = 2, /* Boyer Moore Search */ - OPTIMIZE_EXACT_BM_NO_REV = 3, /* BM (but not simple match) */ - OPTIMIZE_EXACT_IC = 4, /* Slow Search (ignore case) */ - OPTIMIZE_MAP = 5 /* char map */ + OPTIMIZE_NONE = 0, + OPTIMIZE_STR, /* Slow Search */ + OPTIMIZE_STR_FAST, /* Sunday quick search / BMH */ + OPTIMIZE_STR_FAST_STEP_FORWARD, /* Sunday quick search / BMH */ + OPTIMIZE_STR_CASE_FOLD_FAST, /* Sunday quick search / BMH (ignore case) */ + OPTIMIZE_STR_CASE_FOLD, /* Slow Search (ignore case) */ + OPTIMIZE_MAP /* char map */ }; /* bit status */ @@ -541,32 +505,32 @@ typedef struct _BBuf { /* has body */ -#define ANCHOR_PREC_READ (1<<0) -#define ANCHOR_PREC_READ_NOT (1<<1) -#define ANCHOR_LOOK_BEHIND (1<<2) -#define ANCHOR_LOOK_BEHIND_NOT (1<<3) +#define ANCR_PREC_READ (1<<0) +#define ANCR_PREC_READ_NOT (1<<1) +#define ANCR_LOOK_BEHIND (1<<2) +#define ANCR_LOOK_BEHIND_NOT (1<<3) /* no body */ -#define ANCHOR_BEGIN_BUF (1<<4) -#define ANCHOR_BEGIN_LINE (1<<5) -#define ANCHOR_BEGIN_POSITION (1<<6) -#define ANCHOR_END_BUF (1<<7) -#define ANCHOR_SEMI_END_BUF (1<<8) -#define ANCHOR_END_LINE (1<<9) -#define ANCHOR_WORD_BOUNDARY (1<<10) -#define ANCHOR_NO_WORD_BOUNDARY (1<<11) -#define ANCHOR_WORD_BEGIN (1<<12) -#define ANCHOR_WORD_END (1<<13) -#define ANCHOR_ANYCHAR_INF (1<<14) -#define ANCHOR_ANYCHAR_INF_ML (1<<15) -#define ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<16) -#define ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<17) - - -#define ANCHOR_HAS_BODY(a) ((a)->type < ANCHOR_BEGIN_BUF) +#define ANCR_BEGIN_BUF (1<<4) +#define ANCR_BEGIN_LINE (1<<5) +#define ANCR_BEGIN_POSITION (1<<6) +#define ANCR_END_BUF (1<<7) +#define ANCR_SEMI_END_BUF (1<<8) +#define ANCR_END_LINE (1<<9) +#define ANCR_WORD_BOUNDARY (1<<10) +#define ANCR_NO_WORD_BOUNDARY (1<<11) +#define ANCR_WORD_BEGIN (1<<12) +#define ANCR_WORD_END (1<<13) +#define ANCR_ANYCHAR_INF (1<<14) +#define ANCR_ANYCHAR_INF_ML (1<<15) +#define ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<16) +#define ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<17) + + +#define ANCHOR_HAS_BODY(a) ((a)->type < ANCR_BEGIN_BUF) #define IS_WORD_ANCHOR_TYPE(type) \ - ((type) == ANCHOR_WORD_BOUNDARY || (type) == ANCHOR_NO_WORD_BOUNDARY || \ - (type) == ANCHOR_WORD_BEGIN || (type) == ANCHOR_WORD_END) + ((type) == ANCR_WORD_BOUNDARY || (type) == ANCR_NO_WORD_BOUNDARY || \ + (type) == ANCR_WORD_BEGIN || (type) == ANCR_WORD_END) /* operation code */ enum OpCode { @@ -851,6 +815,7 @@ extern void onig_transfer P_((regex_t* to, regex_t* from)); extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc)); extern RegexExt* onig_get_regex_ext(regex_t* reg); extern int onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end); +extern int onig_positive_int_multiply(int x, int y); #ifdef USE_CALLOUT diff --git a/src/regparse.c b/src/regparse.c index fcc05cf..9e42e71 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -71,7 +71,7 @@ OnigSyntaxType OnigSyntaxOniguruma = { ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 ) - , ( SYN_GNU_REGEX_BV | + , ( SYN_GNU_REGEX_BV | ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | @@ -113,7 +113,7 @@ OnigSyntaxType OnigSyntaxRuby = { ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 ) - , ( SYN_GNU_REGEX_BV | + , ( SYN_GNU_REGEX_BV | ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | @@ -198,17 +198,6 @@ onig_set_parse_depth_limit(unsigned int depth) return 0; } -static int -positive_int_multiply(int x, int y) -{ - if (x == 0 || y == 0) return 0; - - if (x < INT_MAX / y) - return x * y; - else - return -1; -} - static void bbuf_free(BBuf* bbuf) { @@ -966,6 +955,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) #ifdef USE_ST_LIBRARY if (IS_NULL(t)) { t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM); + CHECK_NULL_RETURN_MEMERR(t); reg->name_table = (void* )t; } e = (NameEntry* )xmalloc(sizeof(NameEntry)); @@ -1372,6 +1362,7 @@ callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc, #ifdef USE_ST_LIBRARY if (IS_NULL(t)) { t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM); + CHECK_NULL_RETURN_MEMERR(t); GlobalCalloutNameTable = t; } e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry)); @@ -1571,6 +1562,7 @@ onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type, } for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) { if (fe->arg_types[i] == ONIG_TYPE_STRING) { + if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT; OnigValue* val = opt_defaults + j; UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end); CHECK_NULL_RETURN_MEMERR(ds); @@ -1616,6 +1608,7 @@ onig_get_callout_start_func(regex_t* reg, int callout_num) CalloutListEntry* e; e = onig_reg_callout_list_at(reg, callout_num); + CHECK_NULL_RETURN(e); return e->start_func; } @@ -1623,6 +1616,7 @@ extern const UChar* onig_get_callout_tag_start(regex_t* reg, int callout_num) { CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num); + CHECK_NULL_RETURN(e); return e->tag_start; } @@ -1630,6 +1624,7 @@ extern const UChar* onig_get_callout_tag_end(regex_t* reg, int callout_num) { CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num); + CHECK_NULL_RETURN(e); return e->tag_end; } @@ -1736,7 +1731,7 @@ setup_ext_callout_list_values(regex_t* reg) int i, j; RegexExt* ext; - ext = REG_EXTP(reg); + ext = reg->extp; if (IS_NOT_NULL(ext->tag_table)) { onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set, (st_data_t )ext); @@ -1766,13 +1761,13 @@ setup_ext_callout_list_values(regex_t* reg) extern int onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num) { - RegexExt* ext = REG_EXTP(reg); + RegexExt* ext = reg->extp; if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0; if (callout_num > ext->callout_num) return 0; return (ext->callout_list[callout_num].flag & - CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0 ? 1 : 0; + CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0; } static int @@ -1814,7 +1809,7 @@ onig_get_callout_num_by_tag(regex_t* reg, RegexExt* ext; CalloutTagVal e; - ext = REG_EXTP(reg); + ext = reg->extp; if (IS_NULL(ext) || IS_NULL(ext->tag_table)) return ONIGERR_INVALID_CALLOUT_TAG_NAME; @@ -1901,9 +1896,11 @@ callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end, if (r != ONIG_NORMAL) return r; ext = onig_get_regex_ext(reg); + CHECK_NULL_RETURN_MEMERR(ext); r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val); e = onig_reg_callout_list_at(reg, (int )entry_val); + CHECK_NULL_RETURN_MEMERR(e); e->tag_start = name; e->tag_end = name_end; @@ -2008,7 +2005,7 @@ onig_node_free(Node* node) switch (NODE_TYPE(node)) { case NODE_STRING: - if (STR_(node)->capa != 0 && + if (STR_(node)->capacity != 0 && IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) { xfree(STR_(node)->s); } @@ -2040,13 +2037,13 @@ onig_node_free(Node* node) xfree(BACKREF_(node)->back_dynamic); break; - case NODE_ENCLOSURE: + case NODE_BAG: if (NODE_BODY(node)) onig_node_free(NODE_BODY(node)); { - EnclosureNode* en = ENCLOSURE_(node); - if (en->type == ENCLOSURE_IF_ELSE) { + BagNode* en = BAG_(node); + if (en->type == BAG_IF_ELSE) { onig_node_free(en->te.Then); onig_node_free(en->te.Else); } @@ -2082,6 +2079,7 @@ node_new(void) Node* node; node = (Node* )xmalloc(sizeof(Node)); + CHECK_NULL_RETURN(node); xmemset(node, 0, sizeof(*node)); #ifdef DEBUG_NODE_FREE @@ -2138,6 +2136,8 @@ node_new_anychar_with_fixed_option(OnigOptionType option) Node* node; node = node_new_anychar(); + CHECK_NULL_RETURN(node); + ct = CTYPE_(node); ct->options = option; NODE_STATUS_ADD(node, FIXED_OPTION); @@ -2381,62 +2381,62 @@ node_new_quantifier(int lower, int upper, int by_number) } static Node* -node_new_enclosure(enum EnclosureType type) +node_new_bag(enum BagType type) { Node* node = node_new(); CHECK_NULL_RETURN(node); - NODE_SET_TYPE(node, NODE_ENCLOSURE); - ENCLOSURE_(node)->type = type; + NODE_SET_TYPE(node, NODE_BAG); + BAG_(node)->type = type; switch (type) { - case ENCLOSURE_MEMORY: - ENCLOSURE_(node)->m.regnum = 0; - ENCLOSURE_(node)->m.called_addr = -1; - ENCLOSURE_(node)->m.entry_count = 1; - ENCLOSURE_(node)->m.called_state = 0; + case BAG_MEMORY: + BAG_(node)->m.regnum = 0; + BAG_(node)->m.called_addr = -1; + BAG_(node)->m.entry_count = 1; + BAG_(node)->m.called_state = 0; break; - case ENCLOSURE_OPTION: - ENCLOSURE_(node)->o.options = 0; + case BAG_OPTION: + BAG_(node)->o.options = 0; break; - case ENCLOSURE_STOP_BACKTRACK: + case BAG_STOP_BACKTRACK: break; - case ENCLOSURE_IF_ELSE: - ENCLOSURE_(node)->te.Then = 0; - ENCLOSURE_(node)->te.Else = 0; + case BAG_IF_ELSE: + BAG_(node)->te.Then = 0; + BAG_(node)->te.Else = 0; break; } - ENCLOSURE_(node)->opt_count = 0; + BAG_(node)->opt_count = 0; return node; } extern Node* -onig_node_new_enclosure(int type) +onig_node_new_bag(enum BagType type) { - return node_new_enclosure(type); + return node_new_bag(type); } static Node* -node_new_enclosure_if_else(Node* cond, Node* Then, Node* Else) +node_new_bag_if_else(Node* cond, Node* Then, Node* Else) { Node* n; - n = node_new_enclosure(ENCLOSURE_IF_ELSE); + n = node_new_bag(BAG_IF_ELSE); CHECK_NULL_RETURN(n); NODE_BODY(n) = cond; - ENCLOSURE_(n)->te.Then = Then; - ENCLOSURE_(n)->te.Else = Else; + BAG_(n)->te.Then = Then; + BAG_(n)->te.Else = Else; return n; } static Node* node_new_memory(int is_named) { - Node* node = node_new_enclosure(ENCLOSURE_MEMORY); + Node* node = node_new_bag(BAG_MEMORY); CHECK_NULL_RETURN(node); if (is_named != 0) NODE_STATUS_ADD(node, NAMED_GROUP); @@ -2447,12 +2447,37 @@ node_new_memory(int is_named) static Node* node_new_option(OnigOptionType option) { - Node* node = node_new_enclosure(ENCLOSURE_OPTION); + Node* node = node_new_bag(BAG_OPTION); + CHECK_NULL_RETURN(node); + BAG_(node)->o.options = option; + return node; +} + +static Node* +node_new_group(Node* content) +{ + Node* node; + + node = node_new(); CHECK_NULL_RETURN(node); - ENCLOSURE_(node)->o.options = option; + NODE_SET_TYPE(node, NODE_LIST); + NODE_CAR(node) = content; + NODE_CDR(node) = NULL_NODE; + return node; } +static Node* +node_drop_group(Node* group) +{ + Node* content; + + content = NODE_CAR(group); + NODE_CAR(group) = NULL_NODE; + onig_node_free(group); + return content; +} + static int node_new_fail(Node** node, ScanEnv* env) { @@ -2543,7 +2568,7 @@ onig_free_reg_callout_list(int n, CalloutListEntry* list) extern CalloutListEntry* onig_reg_callout_list_at(regex_t* reg, int num) { - RegexExt* ext = REG_EXTP(reg); + RegexExt* ext = reg->extp; CHECK_NULL_RETURN(ext); if (num <= 0 || num > ext->callout_num) @@ -2634,7 +2659,7 @@ make_extended_grapheme_cluster(Node** node, ScanEnv* env) ns[1] = NULL_NODE; r = ONIGERR_MEMORY; - ns[0] = onig_node_new_anchor(ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0); + ns[0] = onig_node_new_anchor(ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0); if (IS_NULL(ns[0])) goto err; r = node_new_true_anychar(&ns[1], env); @@ -2661,7 +2686,7 @@ make_extended_grapheme_cluster(Node** node, ScanEnv* env) ns[0] = x; ns[1] = NULL_NODE; - x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); + x = node_new_bag(BAG_STOP_BACKTRACK); if (IS_NULL(x)) goto err; NODE_BODY(x) = ns[0]; @@ -2721,7 +2746,7 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent, ns[0] = x; if (possessive != 0) { - x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); + x = node_new_bag(BAG_STOP_BACKTRACK); if (IS_NULL(x)) goto err0; NODE_BODY(x) = ns[0]; @@ -2873,11 +2898,11 @@ is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody, quant = node; } else { - if (NODE_TYPE(node) == NODE_ENCLOSURE) { - EnclosureNode* en = ENCLOSURE_(node); - if (en->type == ENCLOSURE_STOP_BACKTRACK) { + if (NODE_TYPE(node) == NODE_BAG) { + BagNode* en = BAG_(node); + if (en->type == BAG_STOP_BACKTRACK) { *is_possessive = 1; - quant = NODE_ENCLOSURE_BODY(en); + quant = NODE_BAG_BODY(en); if (NODE_TYPE(quant) != NODE_QUANT) return 0; } @@ -3054,7 +3079,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter, else { r = make_absent_tail(&ns[5], &ns[6], id1, env); if (r != 0) goto err; - + x = make_list(7, ns); if (IS_NULL(x)) goto err0; } @@ -3066,7 +3091,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter, r = ONIGERR_MEMORY; err: for (i = 0; i < 7; i++) onig_node_free(ns[i]); - return r; + return r; } extern int @@ -3077,11 +3102,11 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end) if (addlen > 0) { int len = (int )(STR_(node)->end - STR_(node)->s); - if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) { + if (STR_(node)->capacity > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) { UChar* p; int capa = len + addlen + NODE_STRING_MARGIN; - if (capa <= STR_(node)->capa) { + if (capa <= STR_(node)->capacity) { onig_strcpy(STR_(node)->s + len, s, end); } else { @@ -3092,8 +3117,8 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end) p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa); CHECK_NULL_RETURN_MEMERR(p); - STR_(node)->s = p; - STR_(node)->capa = capa; + STR_(node)->s = p; + STR_(node)->capacity = capa; } } else { @@ -3125,24 +3150,24 @@ extern void onig_node_conv_to_str_node(Node* node, int flag) { NODE_SET_TYPE(node, NODE_STRING); - STR_(node)->flag = flag; - STR_(node)->capa = 0; - STR_(node)->s = STR_(node)->buf; - STR_(node)->end = STR_(node)->buf; + STR_(node)->flag = flag; + STR_(node)->capacity = 0; + STR_(node)->s = STR_(node)->buf; + STR_(node)->end = STR_(node)->buf; } extern void onig_node_str_clear(Node* node) { - if (STR_(node)->capa != 0 && + if (STR_(node)->capacity != 0 && IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) { xfree(STR_(node)->s); } - STR_(node)->capa = 0; - STR_(node)->flag = 0; - STR_(node)->s = STR_(node)->buf; - STR_(node)->end = STR_(node)->buf; + STR_(node)->capacity = 0; + STR_(node)->flag = 0; + STR_(node)->s = STR_(node)->buf; + STR_(node)->end = STR_(node)->buf; } static Node* @@ -3152,10 +3177,10 @@ node_new_str(const UChar* s, const UChar* end) CHECK_NULL_RETURN(node); NODE_SET_TYPE(node, NODE_STRING); - STR_(node)->capa = 0; - STR_(node)->flag = 0; - STR_(node)->s = STR_(node)->buf; - STR_(node)->end = STR_(node)->buf; + STR_(node)->capacity = 0; + STR_(node)->flag = 0; + STR_(node)->s = STR_(node)->buf; + STR_(node)->end = STR_(node)->buf; if (onig_node_str_cat(node, s, end)) { onig_node_free(node); return NULL; @@ -3173,6 +3198,7 @@ static Node* node_new_str_raw(UChar* s, UChar* end) { Node* node = node_new_str(s, end); + CHECK_NULL_RETURN(node); NODE_STRING_SET_RAW(node); return node; } @@ -3205,6 +3231,7 @@ str_node_split_last_char(Node* node, OnigEncoding enc) p = onigenc_get_prev_char_head(enc, sn->s, sn->end); if (p && p > sn->s) { /* can be split. */ rn = node_new_str(p, sn->end); + CHECK_NULL_RETURN(rn); if (NODE_STRING_IS_RAW(node)) NODE_STRING_SET_RAW(rn); @@ -3795,7 +3822,7 @@ is_invalid_quantifier_target(Node* node) return 1; break; - case NODE_ENCLOSURE: + case NODE_BAG: /* allow enclosed elements */ /* return is_invalid_quantifier_target(NODE_BODY(node)); */ break; @@ -3877,7 +3904,7 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode) if (pnum < 0 || cnum < 0) { if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) { if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) { - int n = positive_int_multiply(p->lower, c->lower); + int n = onig_positive_int_multiply(p->lower, c->lower); if (n >= 0) { p->lower = p->upper = n; NODE_BODY(pnode) = NODE_BODY(cnode); @@ -3972,7 +3999,7 @@ node_new_general_newline(Node** node, ScanEnv* env) if (r != 0) goto err1; } - x = node_new_enclosure_if_else(crnl, 0, ncc); + x = node_new_bag_if_else(crnl, 0, ncc); if (IS_NULL(x)) goto err1; *node = x; @@ -4552,7 +4579,7 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, OnigCodePoint x; UChar *q; UChar *p = from; - + while (p < to) { x = ONIGENC_MBC_TO_CODE(enc, p, to); q = p + enclen(enc, p); @@ -4701,12 +4728,12 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { PINC; tok->type = TK_CHAR_PROPERTY; - tok->u.prop.not = (c == 'P' ? 1 : 0); + tok->u.prop.not = c == 'P'; if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { PFETCH(c2); if (c2 == '^') { - tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + tok->u.prop.not = tok->u.prop.not == 0; } else PUNFETCH; @@ -4986,38 +5013,38 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case 'b': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_BOUNDARY; + tok->u.anchor = ANCR_WORD_BOUNDARY; break; case 'B': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_NO_WORD_BOUNDARY; + tok->u.anchor = ANCR_NO_WORD_BOUNDARY; break; case 'y': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY; + tok->u.anchor = ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY; break; case 'Y': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY; + tok->u.anchor = ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY; break; #ifdef USE_WORD_BEGIN_END case '<': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_BEGIN; + tok->u.anchor = ANCR_WORD_BEGIN; break; case '>': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_END; + tok->u.anchor = ANCR_WORD_END; break; #endif @@ -5092,26 +5119,26 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; begin_buf: tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_BEGIN_BUF; + tok->u.subtype = ANCR_BEGIN_BUF; break; case 'Z': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_SEMI_END_BUF; + tok->u.subtype = ANCR_SEMI_END_BUF; break; case 'z': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; end_buf: tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_END_BUF; + tok->u.subtype = ANCR_END_BUF; break; case 'G': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break; tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_BEGIN_POSITION; + tok->u.subtype = ANCR_BEGIN_POSITION; break; case '`': @@ -5214,7 +5241,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) goto skip_backref; } - if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node)) @@ -5382,13 +5409,13 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { PINC; tok->type = TK_CHAR_PROPERTY; - tok->u.prop.not = (c == 'P' ? 1 : 0); + tok->u.prop.not = c == 'P'; if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { PFETCH(c); if (c == '^') { - tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + tok->u.prop.not = tok->u.prop.not == 0; } else PUNFETCH; @@ -5606,14 +5633,14 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.subtype = (IS_SINGLELINE(env->options) - ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); + ? ANCR_BEGIN_BUF : ANCR_BEGIN_LINE); break; case '$': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.subtype = (IS_SINGLELINE(env->options) - ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); + ? ANCR_SEMI_END_BUF : ANCR_END_LINE); break; case '[': @@ -6509,7 +6536,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* en } static int parse_subexp(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env); + UChar** src, UChar* end, ScanEnv* env, int group_head); #ifdef USE_CALLOUT @@ -6605,6 +6632,7 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv if (r != 0) return r; ext = onig_get_regex_ext(env->reg); + CHECK_NULL_RETURN_MEMERR(ext); if (IS_NULL(ext->pattern)) { r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end); if (r != ONIG_NORMAL) return r; @@ -6625,6 +6653,11 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv } e = onig_reg_callout_list_at(env->reg, num); + if (IS_NULL(e)) { + xfree(contents); + return ONIGERR_MEMORY; + } + e->of = ONIG_CALLOUT_OF_CONTENTS; e->in = in; e->name_id = ONIG_NON_NAME_ID; @@ -6920,6 +6953,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en if (r != 0) return r; ext = onig_get_regex_ext(env->reg); + CHECK_NULL_RETURN_MEMERR(ext); if (IS_NULL(ext->pattern)) { r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end); if (r != ONIG_NORMAL) return r; @@ -6934,6 +6968,8 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en if (r != ONIG_NORMAL) return r; e = onig_reg_callout_list_at(env->reg, num); + CHECK_NULL_RETURN_MEMERR(e); + e->of = ONIG_CALLOUT_OF_NAME; e->in = in; e->name_id = name_id; @@ -6957,8 +6993,8 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en #endif static int -parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env) +parse_bag(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, + ScanEnv* env) { int r, num; Node *target; @@ -6985,20 +7021,20 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, group: r = fetch_token(tok, &p, end, env); if (r < 0) return r; - r = parse_subexp(np, tok, term, &p, end, env); + r = parse_subexp(np, tok, term, &p, end, env, 0); if (r < 0) return r; *src = p; return 1; /* group */ break; case '=': - *np = onig_node_new_anchor(ANCHOR_PREC_READ, 0); + *np = onig_node_new_anchor(ANCR_PREC_READ, 0); break; case '!': /* preceding read */ - *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT, 0); + *np = onig_node_new_anchor(ANCR_PREC_READ_NOT, 0); break; case '>': /* (?>...) stop backtrack */ - *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); + *np = node_new_bag(BAG_STOP_BACKTRACK); break; case '\'': @@ -7013,9 +7049,9 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; PFETCH(c); if (c == '=') - *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND, 0); + *np = onig_node_new_anchor(ANCR_LOOK_BEHIND, 0); else if (c == '!') - *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT, 0); + *np = onig_node_new_anchor(ANCR_LOOK_BEHIND_NOT, 0); else { if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { UChar *name; @@ -7043,7 +7079,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (r != 0) return r; *np = node_new_memory(1); CHECK_NULL_RETURN_MEMERR(*np); - ENCLOSURE_(*np)->m.regnum = num; + BAG_(*np)->m.regnum = num; if (list_capture != 0) MEM_STATUS_ON_SIMPLE(env->capture_history, num); env->num_named++; @@ -7080,7 +7116,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, r = fetch_token(tok, &p, end, env); if (r < 0) return r; - r = parse_subexp(&absent, tok, term, &p, end, env); + r = parse_subexp(&absent, tok, term, &p, end, env, 1); if (r < 0) { onig_node_free(absent); return r; @@ -7258,7 +7294,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, condition_is_checker = 0; r = fetch_token(tok, &p, end, env); if (r < 0) return r; - r = parse_subexp(&condition, tok, term, &p, end, env); + r = parse_subexp(&condition, tok, term, &p, end, env, 0); if (r < 0) { onig_node_free(condition); return r; @@ -7299,7 +7335,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, onig_node_free(condition); return r; } - r = parse_subexp(&target, tok, term, &p, end, env); + r = parse_subexp(&target, tok, term, &p, end, env, 1); if (r < 0) { onig_node_free(condition); onig_node_free(target); @@ -7327,7 +7363,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, } } - *np = node_new_enclosure_if_else(condition, Then, Else); + *np = node_new_bag_if_else(condition, Then, Else); if (IS_NULL(*np)) { onig_node_free(condition); onig_node_free(Then); @@ -7362,7 +7398,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, else if (num >= (int )MEM_STATUS_BITS_NUM) { return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; } - ENCLOSURE_(*np)->m.regnum = num; + BAG_(*np)->m.regnum = num; MEM_STATUS_ON_SIMPLE(env->capture_history, num); } else { @@ -7431,7 +7467,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, env->options = option; r = fetch_token(tok, &p, end, env); if (r < 0) return r; - r = parse_subexp(&target, tok, term, &p, end, env); + r = parse_subexp(&target, tok, term, &p, end, env, 0); env->options = prev; if (r < 0) { onig_node_free(target); @@ -7472,13 +7508,13 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, CHECK_NULL_RETURN_MEMERR(*np); num = scan_env_add_mem_entry(env); if (num < 0) return num; - ENCLOSURE_(*np)->m.regnum = num; + BAG_(*np)->m.regnum = num; } CHECK_NULL_RETURN_MEMERR(*np); r = fetch_token(tok, &p, end, env); if (r < 0) return r; - r = parse_subexp(&target, tok, term, &p, end, env); + r = parse_subexp(&target, tok, term, &p, end, env, 0); if (r < 0) { onig_node_free(target); return r; @@ -7486,10 +7522,10 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, NODE_BODY(*np) = target; - if (NODE_TYPE(*np) == NODE_ENCLOSURE) { - if (ENCLOSURE_(*np)->type == ENCLOSURE_MEMORY) { + if (NODE_TYPE(*np) == NODE_BAG) { + if (BAG_(*np)->type == BAG_MEMORY) { /* Don't move this to previous of parse_subexp() */ - r = scan_env_set_mem_node(env, ENCLOSURE_(*np)->m.regnum, *np); + r = scan_env_set_mem_node(env, BAG_(*np)->m.regnum, *np); if (r != 0) return r; } } @@ -7518,7 +7554,7 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) switch (NODE_TYPE(target)) { case NODE_STRING: - if (! group) { + if (group == 0) { if (str_node_can_be_split(target, env->enc)) { Node* n = str_node_split_last_char(target, env->enc); if (IS_NOT_NULL(n)) { @@ -7710,7 +7746,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg) static int parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env) + ScanEnv* env, int group_head) { int r, len, group = 0; Node* qn; @@ -7724,22 +7760,35 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, case TK_ALT: case TK_EOT: end_of_token: - *np = node_new_empty(); - return tok->type; + *np = node_new_empty(); + CHECK_NULL_RETURN_MEMERR(*np); + return tok->type; break; case TK_SUBEXP_OPEN: - r = parse_enclosure(np, tok, TK_SUBEXP_CLOSE, src, end, env); + r = parse_bag(np, tok, TK_SUBEXP_CLOSE, src, end, env); if (r < 0) return r; - if (r == 1) group = 1; + if (r == 1) { /* group */ + if (group_head == 0) + group = 1; + else { + Node* target = *np; + *np = node_new_group(target); + if (IS_NULL(*np)) { + onig_node_free(target); + return ONIGERR_MEMORY; + } + group = 2; + } + } else if (r == 2) { /* option only */ Node* target; OnigOptionType prev = env->options; - env->options = ENCLOSURE_(*np)->o.options; + env->options = BAG_(*np)->o.options; r = fetch_token(tok, src, end, env); if (r < 0) return r; - r = parse_subexp(&target, tok, term, src, end, env); + r = parse_subexp(&target, tok, term, src, end, env, 0); env->options = prev; if (r < 0) { onig_node_free(target); @@ -7968,6 +8017,7 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, int ascii_mode = IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0; *np = onig_node_new_anchor(tok->u.anchor, ascii_mode); + CHECK_NULL_RETURN_MEMERR(*np); } break; @@ -7976,8 +8026,10 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) { if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS)) return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED; - else + else { *np = node_new_empty(); + CHECK_NULL_RETURN_MEMERR(*np); + } } else { goto tk_byte; @@ -8023,14 +8075,23 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, repeat: if (r == TK_OP_REPEAT || r == TK_INTERVAL) { + Node* target; + if (is_invalid_quantifier_target(*targetp)) return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, - (r == TK_INTERVAL ? 1 : 0)); + r == TK_INTERVAL); CHECK_NULL_RETURN_MEMERR(qn); QUANT_(qn)->greedy = tok->u.repeat.greedy; - r = set_quantifier(qn, *targetp, group, env); + if (group == 2) { + target = node_drop_group(*np); + *np = NULL_NODE; + } + else { + target = *targetp; + } + r = set_quantifier(qn, target, group, env); if (r < 0) { onig_node_free(qn); return r; @@ -8038,7 +8099,7 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (tok->u.repeat.possessive != 0) { Node* en; - en = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); + en = node_new_bag(BAG_STOP_BACKTRACK); if (IS_NULL(en)) { onig_node_free(qn); return ONIGERR_MEMORY; @@ -8077,13 +8138,13 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, static int parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env) + ScanEnv* env, int group_head) { int r; Node *node, **headp; *top = NULL; - r = parse_exp(&node, tok, term, src, end, env); + r = parse_exp(&node, tok, term, src, end, env, group_head); if (r < 0) { onig_node_free(node); return r; @@ -8094,9 +8155,14 @@ parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, } else { *top = node_new_list(node, NULL); + if (IS_NULL(*top)) { + onig_node_free(node); + return ONIGERR_MEMORY; + } + headp = &(NODE_CDR(*top)); while (r != TK_EOT && r != term && r != TK_ALT) { - r = parse_exp(&node, tok, term, src, end, env); + r = parse_exp(&node, tok, term, src, end, env, 0); if (r < 0) { onig_node_free(node); return r; @@ -8120,7 +8186,7 @@ parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ static int parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env) + ScanEnv* env, int group_head) { int r; Node *node, **headp; @@ -8129,7 +8195,8 @@ parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, env->parse_depth++; if (env->parse_depth > ParseDepthLimit) return ONIGERR_PARSE_DEPTH_LIMIT_OVER; - r = parse_branch(&node, tok, term, src, end, env); + + r = parse_branch(&node, tok, term, src, end, env, group_head); if (r < 0) { onig_node_free(node); return r; @@ -8140,16 +8207,27 @@ parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, } else if (r == TK_ALT) { *top = onig_node_new_alt(node, NULL); + if (IS_NULL(*top)) { + onig_node_free(node); + return ONIGERR_MEMORY; + } + headp = &(NODE_CDR(*top)); while (r == TK_ALT) { r = fetch_token(tok, src, end, env); if (r < 0) return r; - r = parse_branch(&node, tok, term, src, end, env); + r = parse_branch(&node, tok, term, src, end, env, 0); if (r < 0) { onig_node_free(node); return r; } *headp = onig_node_new_alt(node, NULL); + if (IS_NULL(*headp)) { + onig_node_free(node); + onig_node_free(*top); + return ONIGERR_MEMORY; + } + headp = &(NODE_CDR(*headp)); } @@ -8177,7 +8255,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) r = fetch_token(&tok, src, end, env); if (r < 0) return r; - r = parse_subexp(top, &tok, TK_EOT, src, end, env); + r = parse_subexp(top, &tok, TK_EOT, src, end, env, 0); if (r < 0) return r; return 0; @@ -8193,7 +8271,7 @@ make_call_zero_body(Node* node, ScanEnv* env, Node** rnode) CHECK_NULL_RETURN_MEMERR(x); NODE_BODY(x) = node; - ENCLOSURE_(x)->m.regnum = 0; + BAG_(x)->m.regnum = 0; r = scan_env_set_mem_node(env, 0, x); if (r != 0) { onig_node_free(x); @@ -8249,7 +8327,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end, reg->num_mem = env->num_mem; #ifdef USE_CALLOUT - ext = REG_EXTP(reg); + ext = reg->extp; if (IS_NOT_NULL(ext) && ext->callout_num > 0) { r = setup_ext_callout_list_values(reg); } diff --git a/src/regparse.h b/src/regparse.h index ff24eeb..ede9bb8 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -31,6 +31,10 @@ #include "regint.h" +#define NODE_STRING_MARGIN 16 +#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ +#define NODE_BACKREFS_SIZE 6 + /* node type */ typedef enum { NODE_STRING = 0, @@ -38,7 +42,7 @@ typedef enum { NODE_CTYPE = 2, NODE_BACKREF = 3, NODE_QUANT = 4, - NODE_ENCLOSURE = 5, + NODE_BAG = 5, NODE_ANCHOR = 6, NODE_LIST = 7, NODE_ALT = 8, @@ -46,95 +50,23 @@ typedef enum { NODE_GIMMICK = 10 } NodeType; +enum BagType { + BAG_MEMORY = 0, + BAG_OPTION = 1, + BAG_STOP_BACKTRACK = 2, + BAG_IF_ELSE = 3, +}; + enum GimmickType { - GIMMICK_FAIL = 0, - GIMMICK_KEEP = 1, - GIMMICK_SAVE = 2, + GIMMICK_FAIL = 0, + GIMMICK_KEEP = 1, + GIMMICK_SAVE = 2, GIMMICK_UPDATE_VAR = 3, #ifdef USE_CALLOUT - GIMMICK_CALLOUT = 4, + GIMMICK_CALLOUT = 4, #endif }; - -/* node type bit */ -#define NODE_TYPE2BIT(type) (1<<(type)) - -#define NODE_BIT_STRING NODE_TYPE2BIT(NODE_STRING) -#define NODE_BIT_CCLASS NODE_TYPE2BIT(NODE_CCLASS) -#define NODE_BIT_CTYPE NODE_TYPE2BIT(NODE_CTYPE) -#define NODE_BIT_BACKREF NODE_TYPE2BIT(NODE_BACKREF) -#define NODE_BIT_QUANT NODE_TYPE2BIT(NODE_QUANT) -#define NODE_BIT_ENCLOSURE NODE_TYPE2BIT(NODE_ENCLOSURE) -#define NODE_BIT_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR) -#define NODE_BIT_LIST NODE_TYPE2BIT(NODE_LIST) -#define NODE_BIT_ALT NODE_TYPE2BIT(NODE_ALT) -#define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL) -#define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK) - -#define NODE_IS_SIMPLE_TYPE(node) \ - ((NODE_TYPE2BIT(NODE_TYPE(node)) & \ - (NODE_BIT_STRING | NODE_BIT_CCLASS | NODE_BIT_CTYPE | NODE_BIT_BACKREF)) != 0) - -#define NODE_TYPE(node) ((node)->u.base.node_type) -#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype) - -#define STR_(node) (&((node)->u.str)) -#define CCLASS_(node) (&((node)->u.cclass)) -#define CTYPE_(node) (&((node)->u.ctype)) -#define BACKREF_(node) (&((node)->u.backref)) -#define QUANT_(node) (&((node)->u.quant)) -#define ENCLOSURE_(node) (&((node)->u.enclosure)) -#define ANCHOR_(node) (&((node)->u.anchor)) -#define CONS_(node) (&((node)->u.cons)) -#define CALL_(node) (&((node)->u.call)) -#define GIMMICK_(node) (&((node)->u.gimmick)) - -#define NODE_CAR(node) (CONS_(node)->car) -#define NODE_CDR(node) (CONS_(node)->cdr) - -#define CTYPE_ANYCHAR -1 -#define NODE_IS_ANYCHAR(node) \ - (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR) - -#define CTYPE_OPTION(node, reg) \ - (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options) - - -#define ANCHOR_ANYCHAR_INF_MASK (ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML) -#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) - -enum EnclosureType { - ENCLOSURE_MEMORY = 0, - ENCLOSURE_OPTION = 1, - ENCLOSURE_STOP_BACKTRACK = 2, - ENCLOSURE_IF_ELSE = 3, -}; - -#define NODE_STRING_MARGIN 16 -#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ -#define NODE_BACKREFS_SIZE 6 - -#define NODE_STRING_RAW (1<<0) /* by backslashed number */ -#define NODE_STRING_AMBIG (1<<1) -#define NODE_STRING_DONT_GET_OPT_INFO (1<<2) - -#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s) -#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= NODE_STRING_RAW -#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NODE_STRING_RAW -#define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= NODE_STRING_AMBIG -#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \ - (node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO -#define NODE_STRING_IS_RAW(node) \ - (((node)->u.str.flag & NODE_STRING_RAW) != 0) -#define NODE_STRING_IS_AMBIG(node) \ - (((node)->u.str.flag & NODE_STRING_AMBIG) != 0) -#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \ - (((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0) - -#define BACKREFS_P(br) \ - (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static) - enum QuantBodyEmpty { QUANT_BODY_IS_NOT_EMPTY = 0, QUANT_BODY_IS_EMPTY = 1, @@ -142,65 +74,6 @@ enum QuantBodyEmpty { QUANT_BODY_IS_EMPTY_REC = 3 }; -/* node status bits */ -#define NODE_ST_MIN_FIXED (1<<0) -#define NODE_ST_MAX_FIXED (1<<1) -#define NODE_ST_CLEN_FIXED (1<<2) -#define NODE_ST_MARK1 (1<<3) -#define NODE_ST_MARK2 (1<<4) -#define NODE_ST_STOP_BT_SIMPLE_REPEAT (1<<5) -#define NODE_ST_RECURSION (1<<6) -#define NODE_ST_CALLED (1<<7) -#define NODE_ST_ADDR_FIXED (1<<8) -#define NODE_ST_NAMED_GROUP (1<<9) -#define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */ -#define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */ -#define NODE_ST_IN_MULTI_ENTRY (1<<12) -#define NODE_ST_NEST_LEVEL (1<<13) -#define NODE_ST_BY_NUMBER (1<<14) /* {n,m} */ -#define NODE_ST_BY_NAME (1<<15) /* backref by name */ -#define NODE_ST_BACKREF (1<<16) -#define NODE_ST_CHECKER (1<<17) -#define NODE_ST_FIXED_OPTION (1<<18) -#define NODE_ST_PROHIBIT_RECURSION (1<<19) -#define NODE_ST_SUPER (1<<20) - - -#define NODE_STATUS(node) (((Node* )node)->u.base.status) -#define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (NODE_ST_ ## f)) -#define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(NODE_ST_ ## f)) - -#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NODE_ST_BY_NUMBER) != 0) -#define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_REAL_REPEAT) != 0) -#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NODE_ST_CALLED) != 0) -#define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NODE_ST_IN_MULTI_ENTRY) != 0) -#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NODE_ST_RECURSION) != 0) -#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0) -#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP) != 0) -#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED) != 0) -#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED) != 0) -#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MIN_FIXED) != 0) -#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MAX_FIXED) != 0) -#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NODE_ST_MARK1) != 0) -#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NODE_ST_MARK2) != 0) -#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL) != 0) -#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NODE_ST_BY_NAME) != 0) -#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NODE_ST_BACKREF) != 0) -#define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NODE_ST_CHECKER) != 0) -#define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0) -#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0) -#define NODE_IS_PROHIBIT_RECURSION(node) \ - ((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0) -#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \ - ((NODE_STATUS(node) & NODE_ST_STOP_BT_SIMPLE_REPEAT) != 0) - -#define NODE_BODY(node) ((node)->u.base.body) -#define NODE_QUANT_BODY(node) ((node)->body) -#define NODE_ENCLOSURE_BODY(node) ((node)->body) -#define NODE_CALL_BODY(node) ((node)->body) -#define NODE_ANCHOR_BODY(node) ((node)->body) - - typedef struct { NodeType node_type; int status; @@ -208,7 +81,7 @@ typedef struct { UChar* s; UChar* end; unsigned int flag; - int capa; /* (allocated size - 1) or 0: use buf[] */ + int capacity; /* (allocated size - 1) or 0: use buf[] */ UChar buf[NODE_STRING_BUF_SIZE]; } StrNode; @@ -240,7 +113,7 @@ typedef struct { int status; struct _Node* body; - enum EnclosureType type; + enum BagType type; union { struct { int regnum; @@ -262,7 +135,7 @@ typedef struct { OnigLen max_len; /* max length (byte) */ int char_len; /* character length */ int opt_count; /* referenced count in optimize_nodes() */ -} EnclosureNode; +} BagNode; #ifdef USE_CALL @@ -280,7 +153,7 @@ typedef struct { typedef struct { NodeType node_type; int status; - struct _Node* body; /* to EnclosureNode : ENCLOSURE_MEMORY */ + struct _Node* body; /* to BagNode : BAG_MEMORY */ int by_number; int group_num; @@ -350,7 +223,7 @@ typedef struct _Node { StrNode str; CClassNode cclass; QuantNode quant; - EnclosureNode enclosure; + BagNode bag; BackRefNode backref; AnchorNode anchor; ConsAltNode cons; @@ -362,9 +235,138 @@ typedef struct _Node { } u; } Node; - #define NULL_NODE ((Node* )0) + +/* node type bit */ +#define NODE_TYPE2BIT(type) (1<<(type)) + +#define NODE_BIT_STRING NODE_TYPE2BIT(NODE_STRING) +#define NODE_BIT_CCLASS NODE_TYPE2BIT(NODE_CCLASS) +#define NODE_BIT_CTYPE NODE_TYPE2BIT(NODE_CTYPE) +#define NODE_BIT_BACKREF NODE_TYPE2BIT(NODE_BACKREF) +#define NODE_BIT_QUANT NODE_TYPE2BIT(NODE_QUANT) +#define NODE_BIT_BAG NODE_TYPE2BIT(NODE_BAG) +#define NODE_BIT_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR) +#define NODE_BIT_LIST NODE_TYPE2BIT(NODE_LIST) +#define NODE_BIT_ALT NODE_TYPE2BIT(NODE_ALT) +#define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL) +#define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK) + +#define NODE_IS_SIMPLE_TYPE(node) \ + ((NODE_TYPE2BIT(NODE_TYPE(node)) & \ + (NODE_BIT_STRING | NODE_BIT_CCLASS | NODE_BIT_CTYPE | NODE_BIT_BACKREF)) != 0) + +#define NODE_TYPE(node) ((node)->u.base.node_type) +#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype) + +#define STR_(node) (&((node)->u.str)) +#define CCLASS_(node) (&((node)->u.cclass)) +#define CTYPE_(node) (&((node)->u.ctype)) +#define BACKREF_(node) (&((node)->u.backref)) +#define QUANT_(node) (&((node)->u.quant)) +#define BAG_(node) (&((node)->u.bag)) +#define ANCHOR_(node) (&((node)->u.anchor)) +#define CONS_(node) (&((node)->u.cons)) +#define CALL_(node) (&((node)->u.call)) +#define GIMMICK_(node) (&((node)->u.gimmick)) + +#define NODE_CAR(node) (CONS_(node)->car) +#define NODE_CDR(node) (CONS_(node)->cdr) + +#define CTYPE_ANYCHAR -1 +#define NODE_IS_ANYCHAR(node) \ + (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR) + +#define CTYPE_OPTION(node, reg) \ + (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options) + + +#define ANCR_ANYCHAR_INF_MASK (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML) +#define ANCR_END_BUF_MASK (ANCR_END_BUF | ANCR_SEMI_END_BUF) + +#define NODE_STRING_RAW (1<<0) /* by backslashed number */ +#define NODE_STRING_AMBIG (1<<1) +#define NODE_STRING_GOOD_AMBIG (1<<2) +#define NODE_STRING_DONT_GET_OPT_INFO (1<<3) + +#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s) +#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= NODE_STRING_RAW +#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NODE_STRING_RAW +#define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= NODE_STRING_AMBIG +#define NODE_STRING_SET_GOOD_AMBIG(node) (node)->u.str.flag |= NODE_STRING_GOOD_AMBIG +#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \ + (node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO +#define NODE_STRING_IS_RAW(node) \ + (((node)->u.str.flag & NODE_STRING_RAW) != 0) +#define NODE_STRING_IS_AMBIG(node) \ + (((node)->u.str.flag & NODE_STRING_AMBIG) != 0) +#define NODE_STRING_IS_GOOD_AMBIG(node) \ + (((node)->u.str.flag & NODE_STRING_GOOD_AMBIG) != 0) +#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \ + (((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0) + +#define BACKREFS_P(br) \ + (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static) + +/* node status bits */ +#define NODE_ST_MIN_FIXED (1<<0) +#define NODE_ST_MAX_FIXED (1<<1) +#define NODE_ST_CLEN_FIXED (1<<2) +#define NODE_ST_MARK1 (1<<3) +#define NODE_ST_MARK2 (1<<4) +#define NODE_ST_STOP_BT_SIMPLE_REPEAT (1<<5) +#define NODE_ST_RECURSION (1<<6) +#define NODE_ST_CALLED (1<<7) +#define NODE_ST_ADDR_FIXED (1<<8) +#define NODE_ST_NAMED_GROUP (1<<9) +#define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */ +#define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */ +#define NODE_ST_IN_MULTI_ENTRY (1<<12) +#define NODE_ST_NEST_LEVEL (1<<13) +#define NODE_ST_BY_NUMBER (1<<14) /* {n,m} */ +#define NODE_ST_BY_NAME (1<<15) /* backref by name */ +#define NODE_ST_BACKREF (1<<16) +#define NODE_ST_CHECKER (1<<17) +#define NODE_ST_FIXED_OPTION (1<<18) +#define NODE_ST_PROHIBIT_RECURSION (1<<19) +#define NODE_ST_SUPER (1<<20) + + +#define NODE_STATUS(node) (((Node* )node)->u.base.status) +#define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (NODE_ST_ ## f)) +#define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(NODE_ST_ ## f)) + +#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NODE_ST_BY_NUMBER) != 0) +#define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_REAL_REPEAT) != 0) +#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NODE_ST_CALLED) != 0) +#define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NODE_ST_IN_MULTI_ENTRY) != 0) +#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NODE_ST_RECURSION) != 0) +#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0) +#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP) != 0) +#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED) != 0) +#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED) != 0) +#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MIN_FIXED) != 0) +#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MAX_FIXED) != 0) +#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NODE_ST_MARK1) != 0) +#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NODE_ST_MARK2) != 0) +#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL) != 0) +#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NODE_ST_BY_NAME) != 0) +#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NODE_ST_BACKREF) != 0) +#define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NODE_ST_CHECKER) != 0) +#define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0) +#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0) +#define NODE_IS_PROHIBIT_RECURSION(node) \ + ((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0) +#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \ + ((NODE_STATUS(node) & NODE_ST_STOP_BT_SIMPLE_REPEAT) != 0) + +#define NODE_BODY(node) ((node)->u.base.body) +#define NODE_QUANT_BODY(node) ((node)->body) +#define NODE_BAG_BODY(node) ((node)->body) +#define NODE_CALL_BODY(node) ((node)->body) +#define NODE_ANCHOR_BODY(node) ((node)->body) + #define SCANENV_MEMENV_SIZE 8 #define SCANENV_MEMENV(senv) \ (IS_NOT_NULL((senv)->mem_env_dynamic) ? \ @@ -434,7 +436,7 @@ extern void onig_node_conv_to_str_node P_((Node* node, int raw)); extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end)); extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end)); extern void onig_node_free P_((Node* node)); -extern Node* onig_node_new_enclosure P_((int type)); +extern Node* onig_node_new_bag P_((enum BagType type)); extern Node* onig_node_new_anchor P_((int type, int ascii_mode)); extern Node* onig_node_new_str P_((const UChar* s, const UChar* end)); extern Node* onig_node_new_list P_((Node* left, Node* right)); diff --git a/src/regposerr.c b/src/regposerr.c index 2e2a8e2..c640a81 100644 --- a/src/regposerr.c +++ b/src/regposerr.c @@ -37,11 +37,7 @@ #include "config.h" #include "onigposix.h" -#ifdef HAVE_STRING_H -# include <string.h> -#else -# include <strings.h> -#endif +#include <string.h> #if defined(__GNUC__) # define ARG_UNUSED __attribute__ ((unused)) diff --git a/src/regsyntax.c b/src/regsyntax.c index aa95479..98b815c 100644 --- a/src/regsyntax.c +++ b/src/regsyntax.c @@ -67,8 +67,8 @@ OnigSyntaxType OnigSyntaxPosixExtended = { ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT ) , 0 - , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | - ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | + , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | + ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) @@ -113,10 +113,7 @@ static int code_to_mbclen(OnigCodePoint code) { if (code < 256) { - if (EncLen_SJIS[(int )code] == 1) - return 1; - else - return 0; + return EncLen_SJIS[(int )code] == 1; } else if (code <= 0xffff) { return 2; @@ -188,7 +185,7 @@ is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end); - + } #endif @@ -223,7 +220,7 @@ left_adjust_char_head(const UChar* start, const UChar* s) p++; break; } - } + } } len = enclen(ONIG_ENCODING_SJIS, p); if (p + len > s) return (UChar* )p; @@ -338,6 +335,6 @@ OnigEncodingType OnigEncodingSJIS = { NULL, /* init */ NULL, /* is_initialized */ is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1_OR_0, 0, 0 }; diff --git a/src/unicode.c b/src/unicode.c index 63bc65c..04944b9 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -657,8 +657,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev, #ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER if (! ONIGENC_IS_UNICODE_ENCODING(enc)) { - if (from == 0x000d && to == 0x000a) return 0; - else return 1; + return from != 0x000d || to != 0x000a; } btype = unicode_egcb_is_break_2code(from, to); @@ -701,8 +700,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev, return 1; #else - if (from == 0x000d && to == 0x000a) return 0; - else return 1; + return from != 0x000d || to != 0x000a; #endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */ } @@ -729,6 +727,7 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges) int len; int c; char* s; + UChar* uname; if (UserDefinedPropertyNum >= USER_DEFINED_PROPERTY_MAX_NUM) return ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS; @@ -741,10 +740,11 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges) if (s == 0) return ONIGERR_MEMORY; + uname = (UChar* )name; n = 0; for (i = 0; i < len; i++) { - c = name[i]; - if (c <= 0 || c >= 0x80) { + c = uname[i]; + if (c < 0x20 || c >= 0x80) { xfree(s); return ONIGERR_INVALID_CHAR_PROPERTY_NAME; } @@ -758,6 +758,10 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges) if (UserDefinedPropertyTable == 0) { UserDefinedPropertyTable = onig_st_init_strend_table_with_size(10); + if (IS_NULL(UserDefinedPropertyTable)) { + xfree(s); + return ONIGERR_MEMORY; + } } e = UserDefinedPropertyRanges + UserDefinedPropertyNum; diff --git a/src/unicode_fold1_key.c b/src/unicode_fold1_key.c index 0f4712a..07916b4 100644 --- a/src/unicode_fold1_key.c +++ b/src/unicode_fold1_key.c @@ -2988,5 +2988,3 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[]) } return -1; } - - diff --git a/src/unicode_fold2_key.c b/src/unicode_fold2_key.c index 1469a46..3bc4bd6 100644 --- a/src/unicode_fold2_key.c +++ b/src/unicode_fold2_key.c @@ -225,5 +225,3 @@ onigenc_unicode_fold2_key(OnigCodePoint codes[]) } return -1; } - - diff --git a/src/unicode_fold3_key.c b/src/unicode_fold3_key.c index 70e70c8..b935db6 100644 --- a/src/unicode_fold3_key.c +++ b/src/unicode_fold3_key.c @@ -135,5 +135,3 @@ onigenc_unicode_fold3_key(OnigCodePoint codes[]) } return -1; } - - diff --git a/src/unicode_fold_data.c b/src/unicode_fold_data.c index 7f7e24e..0b2c519 100644 --- a/src/unicode_fold_data.c +++ b/src/unicode_fold_data.c @@ -1513,4 +1513,3 @@ OnigCodePoint OnigUnicodeFolds3[] = { /* ----- LOCALE ----- */ #define FOLDS3_END_INDEX 72 }; - diff --git a/src/unicode_unfold_key.c b/src/unicode_unfold_key.c index b4c0318..bf71df0 100644 --- a/src/unicode_unfold_key.c +++ b/src/unicode_unfold_key.c @@ -3283,5 +3283,3 @@ onigenc_unicode_unfold_key(OnigCodePoint code) } return 0; } - - diff --git a/src/utf16_be.c b/src/utf16_be.c index 8f5b8bf..a812a32 100644 --- a/src/utf16_be.c +++ b/src/utf16_be.c @@ -278,6 +278,6 @@ OnigEncodingType OnigEncodingUTF16_BE = { init, 0, /* is_initialized */ is_valid_mbc_string, - ENC_FLAG_UNICODE, + ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_2, 0, 0 }; diff --git a/src/utf16_le.c b/src/utf16_le.c index 92bf318..da9571f 100644 --- a/src/utf16_le.c +++ b/src/utf16_le.c @@ -286,6 +286,6 @@ OnigEncodingType OnigEncodingUTF16_LE = { init, 0, /* is_initialized */ is_valid_mbc_string, - ENC_FLAG_UNICODE, + ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; diff --git a/src/utf32_be.c b/src/utf32_be.c index 92476ec..9339b15 100644 --- a/src/utf32_be.c +++ b/src/utf32_be.c @@ -191,6 +191,6 @@ OnigEncodingType OnigEncodingUTF32_BE = { NULL, /* init */ NULL, /* is_initialized */ is_valid_mbc_string, - ENC_FLAG_UNICODE, + ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_4, 0, 0 }; diff --git a/src/utf32_le.c b/src/utf32_le.c index dc3fd92..22e007c 100644 --- a/src/utf32_le.c +++ b/src/utf32_le.c @@ -191,6 +191,6 @@ OnigEncodingType OnigEncodingUTF32_LE = { NULL, /* init */ NULL, /* is_initialized */ is_valid_mbc_string, - ENC_FLAG_UNICODE, + ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1, 0, 0 }; @@ -57,7 +57,7 @@ static const int EncLen_UTF8[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1 + 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; static int @@ -280,7 +280,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncodingType OnigEncodingUTF8 = { mbc_enc_len, "UTF-8", /* name */ - 6, /* max enc length */ + 4, /* max enc length */ 1, /* min enc length */ onigenc_is_mbc_newline_0x0a, mbc_to_code, @@ -297,6 +297,6 @@ OnigEncodingType OnigEncodingUTF8 = { NULL, /* init */ NULL, /* is_initialized */ is_valid_mbc_string, - ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_UNICODE, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1_OR_0, 0, 0 }; diff --git a/test/.gitignore b/test/.gitignore deleted file mode 100644 index 04918c1..0000000 --- a/test/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -/test_utf8 -/testc -/testcu -/testp diff --git a/test/test_utf8.c b/test/test_utf8.c index d5a966b..02aa06b 100644 --- a/test/test_utf8.c +++ b/test/test_utf8.c @@ -9,11 +9,7 @@ #include "oniguruma.h" -#ifdef HAVE_STRING_H -# include <string.h> -#else -# include <strings.h> -#endif +#include <string.h> #define SLEN(s) strlen(s) @@ -276,7 +272,33 @@ extern int main(int argc, char* argv[]) x2("(?i:a)", "a", 0, 1); x2("(?i:a)", "A", 0, 1); x2("(?i:A)", "a", 0, 1); + x2("(?i:i)", "I", 0, 1); + x2("(?i:I)", "i", 0, 1); + x2("(?i:[A-Z])", "i", 0, 1); + x2("(?i:[a-z])", "I", 0, 1); n("(?i:A)", "b"); + x2("(?i:ss)", "ss", 0, 2); + x2("(?i:ss)", "Ss", 0, 2); + x2("(?i:ss)", "SS", 0, 2); + /* 0xc5,0xbf == 017F: # LATIN SMALL LETTER LONG S */ + x2("(?i:ss)", "\xc5\xbfS", 0, 3); + x2("(?i:ss)", "s\xc5\xbf", 0, 3); + /* 0xc3,0x9f == 00DF: # LATIN SMALL LETTER SHARP S */ + x2("(?i:ss)", "\xc3\x9f", 0, 2); + /* 0xe1,0xba,0x9e == 1E9E # LATIN CAPITAL LETTER SHARP S */ + x2("(?i:ss)", "\xe1\xba\x9e", 0, 3); + x2("(?i:xssy)", "xssy", 0, 4); + x2("(?i:xssy)", "xSsy", 0, 4); + x2("(?i:xssy)", "xSSy", 0, 4); + x2("(?i:xssy)", "x\xc5\xbfSy", 0, 5); + x2("(?i:xssy)", "xs\xc5\xbfy", 0, 5); + x2("(?i:xssy)", "x\xc3\x9fy", 0, 4); + x2("(?i:xssy)", "x\xe1\xba\x9ey", 0, 5); + x2("(?i:\xc3\x9f)", "ss", 0, 2); + x2("(?i:\xc3\x9f)", "SS", 0, 2); + x2("(?i:[\xc3\x9f])", "ss", 0, 2); + x2("(?i:[\xc3\x9f])", "SS", 0, 2); + x2("(?i)(?<!ss)z", "qqz", 2, 3); x2("(?i:[A-Z])", "a", 0, 1); x2("(?i:[f-m])", "H", 0, 1); x2("(?i:[f-m])", "h", 0, 1); @@ -603,6 +625,7 @@ extern int main(int argc, char* argv[]) x2("(?:()|())*\\2", "abc", 0, 0); x2("(?:()|()|())*\\3\\1", "abc", 0, 0); x2("(|(?:a(?:\\g'1')*))b|", "abc", 0, 2); + x2("^(\"|)(.*)\\1$", "XX", 0, 2); x2("(?~)", "", 0, 0); x2("(?~)", "A", 0, 0); diff --git a/test/testc.c b/test/testc.c index e37665a..4aa8807 100644 --- a/test/testc.c +++ b/test/testc.c @@ -10,11 +10,7 @@ #include "oniguruma.h" #endif -#ifdef HAVE_STRING_H -# include <string.h> -#else -# include <strings.h> -#endif +#include <string.h> #define SLEN(s) strlen(s) diff --git a/windows/testc.c b/windows/testc.c index e17842f..2e13350 100644 --- a/windows/testc.c +++ b/windows/testc.c @@ -1,869 +1,865 @@ -/*
- * This program was generated by testconv.rb.
- */
-#ifdef ONIG_ESCAPE_UCHAR_COLLISION
-#undef ONIG_ESCAPE_UCHAR_COLLISION
-#endif
-#include <stdio.h>
-
-#ifdef POSIX_TEST
-#include "onigposix.h"
-#else
-#include "oniguruma.h"
-#endif
-
-#ifdef _WIN32
-# include <string.h>
-#else
-# include <strings.h>
-#endif
-
-#define SLEN(s) strlen(s)
-
-static int nsucc = 0;
-static int nfail = 0;
-static int nerror = 0;
-
-static FILE* err_file;
-
-#ifndef POSIX_TEST
-static OnigRegion* region;
-#endif
-
-static void xx(char* pattern, char* str, int from, int to, int mem, int not)
-{
- int r;
-
-#ifdef POSIX_TEST
- regex_t reg;
- char buf[200];
- regmatch_t pmatch[25];
-
- r = regcomp(®, pattern, REG_EXTENDED | REG_NEWLINE);
- if (r) {
- regerror(r, ®, buf, sizeof(buf));
- fprintf(err_file, "ERROR: %s\n", buf);
- nerror++;
- return ;
- }
-
- r = regexec(®, str, reg.re_nsub + 1, pmatch, 0);
- if (r != 0 && r != REG_NOMATCH) {
- regerror(r, ®, buf, sizeof(buf));
- fprintf(err_file, "ERROR: %s\n", buf);
- nerror++;
- return ;
- }
-
- if (r == REG_NOMATCH) {
- if (not) {
- fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
- nsucc++;
- }
- else {
- fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
- nfail++;
- }
- }
- else {
- if (not) {
- fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
- nfail++;
- }
- else {
- if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) {
- fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
- nsucc++;
- }
- else {
- fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
- from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo);
- nfail++;
- }
- }
- }
- regfree(®);
-
-#else
- regex_t* reg;
- OnigErrorInfo einfo;
-
- r = onig_new(®, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)),
- ONIG_OPTION_DEFAULT, ONIG_ENCODING_SJIS, ONIG_SYNTAX_DEFAULT, &einfo);
- if (r) {
- char s[ONIG_MAX_ERROR_MESSAGE_LEN];
- onig_error_code_to_str((UChar* )s, r, &einfo);
- fprintf(err_file, "ERROR: %s\n", s);
- nerror++;
- return ;
- }
-
- r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)),
- (UChar* )str, (UChar* )(str + SLEN(str)),
- region, ONIG_OPTION_NONE);
- if (r < ONIG_MISMATCH) {
- char s[ONIG_MAX_ERROR_MESSAGE_LEN];
- onig_error_code_to_str((UChar* )s, r);
- fprintf(err_file, "ERROR: %s\n", s);
- nerror++;
- return ;
- }
-
- if (r == ONIG_MISMATCH) {
- if (not) {
- fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
- nsucc++;
- }
- else {
- fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
- nfail++;
- }
- }
- else {
- if (not) {
- fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
- nfail++;
- }
- else {
- if (region->beg[mem] == from && region->end[mem] == to) {
- fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
- nsucc++;
- }
- else {
- fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
- from, to, region->beg[mem], region->end[mem]);
- nfail++;
- }
- }
- }
- onig_free(reg);
-#endif
-}
-
-static void x2(char* pattern, char* str, int from, int to)
-{
- xx(pattern, str, from, to, 0, 0);
-}
-
-static void x3(char* pattern, char* str, int from, int to, int mem)
-{
- xx(pattern, str, from, to, mem, 0);
-}
-
-static void n(char* pattern, char* str)
-{
- xx(pattern, str, 0, 0, 0, 1);
-}
-
-extern int main(int argc, char* argv[])
-{
-#ifndef POSIX_TEST
- static OnigEncoding use_encs[1];
-
- use_encs[0] = ONIG_ENCODING_SJIS;
- onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0]));
-#endif
-
- err_file = stdout;
-
-#ifdef POSIX_TEST
- reg_set_encoding(REG_POSIX_ENCODING_SJIS);
-#else
- region = onig_region_new();
-#endif
-
- x2("", "", 0, 0);
- x2("^", "", 0, 0);
- x2("$", "", 0, 0);
- x2("\\G", "", 0, 0);
- x2("\\A", "", 0, 0);
- x2("\\Z", "", 0, 0);
- x2("\\z", "", 0, 0);
- x2("^$", "", 0, 0);
- x2("\\ca", "\001", 0, 1);
- x2("\\C-b", "\002", 0, 1);
- x2("\\c\\\\", "\034", 0, 1);
- x2("q[\\c\\\\]", "q\034", 0, 2);
- x2("", "a", 0, 0);
- x2("a", "a", 0, 1);
- x2("\\x61", "a", 0, 1);
- x2("aa", "aa", 0, 2);
- x2("aaa", "aaa", 0, 3);
- x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35);
- x2("ab", "ab", 0, 2);
- x2("b", "ab", 1, 2);
- x2("bc", "abc", 1, 3);
- x2("(?i:#RET#)", "#INS##RET#", 5, 10);
- x2("\\17", "\017", 0, 1);
- x2("\\x1f", "\x1f", 0, 1);
- x2("a(?#....\\\\JJJJ)b", "ab", 0, 2);
- x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7);
- x2(".", "a", 0, 1);
- n(".", "");
- x2("..", "ab", 0, 2);
- x2("\\w", "e", 0, 1);
- n("\\W", "e");
- x2("\\s", " ", 0, 1);
- x2("\\S", "b", 0, 1);
- x2("\\d", "4", 0, 1);
- n("\\D", "4");
- x2("\\b", "z ", 0, 0);
- x2("\\b", " z", 1, 1);
- x2("\\B", "zz ", 1, 1);
- x2("\\B", "z ", 2, 2);
- x2("\\B", " z", 0, 0);
- x2("[ab]", "b", 0, 1);
- n("[ab]", "c");
- x2("[a-z]", "t", 0, 1);
- n("[^a]", "a");
- x2("[^a]", "\n", 0, 1);
- x2("[]]", "]", 0, 1);
- n("[^]]", "]");
- x2("[\\^]+", "0^^1", 1, 3);
- x2("[b-]", "b", 0, 1);
- x2("[b-]", "-", 0, 1);
- x2("[\\w]", "z", 0, 1);
- n("[\\w]", " ");
- x2("[\\W]", "b$", 1, 2);
- x2("[\\d]", "5", 0, 1);
- n("[\\d]", "e");
- x2("[\\D]", "t", 0, 1);
- n("[\\D]", "3");
- x2("[\\s]", " ", 0, 1);
- n("[\\s]", "a");
- x2("[\\S]", "b", 0, 1);
- n("[\\S]", " ");
- x2("[\\w\\d]", "2", 0, 1);
- n("[\\w\\d]", " ");
- x2("[[:upper:]]", "B", 0, 1);
- x2("[*[:xdigit:]+]", "+", 0, 1);
- x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7);
- x2("[*[:xdigit:]+]", "-@^+", 3, 4);
- n("[[:upper]]", "A");
- x2("[[:upper]]", ":", 0, 1);
- x2("[\\044-\\047]", "\046", 0, 1);
- x2("[\\x5a-\\x5c]", "\x5b", 0, 1);
- x2("[\\x6A-\\x6D]", "\x6c", 0, 1);
- n("[\\x6A-\\x6D]", "\x6E");
- n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply");
- x2("[\\[]", "[", 0, 1);
- x2("[\\]]", "]", 0, 1);
- x2("[&]", "&", 0, 1);
- x2("[[ab]]", "b", 0, 1);
- x2("[[ab]c]", "c", 0, 1);
- n("[[^a]]", "a");
- n("[^[a]]", "a");
- x2("[[ab]&&bc]", "b", 0, 1);
- n("[[ab]&&bc]", "a");
- n("[[ab]&&bc]", "c");
- x2("[a-z&&b-y&&c-x]", "w", 0, 1);
- n("[^a-z&&b-y&&c-x]", "w");
- x2("[[^a&&a]&&a-z]", "b", 0, 1);
- n("[[^a&&a]&&a-z]", "a");
- x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1);
- n("[[^a-z&&bcdef]&&[^c-g]]", "c");
- x2("[^[^abc]&&[^cde]]", "c", 0, 1);
- x2("[^[^abc]&&[^cde]]", "e", 0, 1);
- n("[^[^abc]&&[^cde]]", "f");
- x2("[a-&&-a]", "-", 0, 1);
- n("[a\\-&&\\-a]", "&");
- n("\\wabc", " abc");
- x2("a\\Wbc", "a bc", 0, 4);
- x2("a.b.c", "aabbc", 0, 5);
- x2(".\\wb\\W..c", "abb bcc", 0, 7);
- x2("\\s\\wzzz", " zzzz", 0, 5);
- x2("aa.b", "aabb", 0, 4);
- n(".a", "ab");
- x2(".a", "aa", 0, 2);
- x2("^a", "a", 0, 1);
- x2("^a$", "a", 0, 1);
- x2("^\\w$", "a", 0, 1);
- n("^\\w$", " ");
- x2("^\\wab$", "zab", 0, 3);
- x2("^\\wabcdef$", "zabcdef", 0, 7);
- x2("^\\w...def$", "zabcdef", 0, 7);
- x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8);
- x2("\\A\\Z", "", 0, 0);
- x2("\\Axyz", "xyz", 0, 3);
- x2("xyz\\Z", "xyz", 0, 3);
- x2("xyz\\z", "xyz", 0, 3);
- x2("a\\Z", "a", 0, 1);
- x2("\\Gaz", "az", 0, 2);
- n("\\Gz", "bza");
- n("az\\G", "az");
- n("az\\A", "az");
- n("a\\Az", "az");
- x2("\\^\\$", "^$", 0, 2);
- x2("^x?y", "xy", 0, 2);
- x2("^(x?y)", "xy", 0, 2);
- x2("\\w", "_", 0, 1);
- n("\\W", "_");
- x2("(?=z)z", "z", 0, 1);
- n("(?=z).", "a");
- x2("(?!z)a", "a", 0, 1);
- n("(?!z)a", "z");
- x2("(?i:a)", "a", 0, 1);
- x2("(?i:a)", "A", 0, 1);
- x2("(?i:A)", "a", 0, 1);
- n("(?i:A)", "b");
- x2("(?i:[A-Z])", "a", 0, 1);
- x2("(?i:[f-m])", "H", 0, 1);
- x2("(?i:[f-m])", "h", 0, 1);
- n("(?i:[f-m])", "e");
- x2("(?i:[A-c])", "D", 0, 1);
- n("(?i:[^a-z])", "A");
- n("(?i:[^a-z])", "a");
- x2("(?i:[!-k])", "Z", 0, 1);
- x2("(?i:[!-k])", "7", 0, 1);
- x2("(?i:[T-}])", "b", 0, 1);
- x2("(?i:[T-}])", "{", 0, 1);
- x2("(?i:\\?a)", "?A", 0, 2);
- x2("(?i:\\*A)", "*a", 0, 2);
- n(".", "\n");
- x2("(?m:.)", "\n", 0, 1);
- x2("(?m:a.)", "a\n", 0, 2);
- x2("(?m:.b)", "a\nb", 1, 3);
- x2(".*abc", "dddabdd\nddabc", 8, 13);
- x2("(?m:.*abc)", "dddabddabc", 0, 10);
- n("(?i)(?-i)a", "A");
- n("(?i)(?-i:a)", "A");
- x2("a?", "", 0, 0);
- x2("a?", "b", 0, 0);
- x2("a?", "a", 0, 1);
- x2("a*", "", 0, 0);
- x2("a*", "a", 0, 1);
- x2("a*", "aaa", 0, 3);
- x2("a*", "baaaa", 0, 0);
- n("a+", "");
- x2("a+", "a", 0, 1);
- x2("a+", "aaaa", 0, 4);
- x2("a+", "aabbb", 0, 2);
- x2("a+", "baaaa", 1, 5);
- x2(".?", "", 0, 0);
- x2(".?", "f", 0, 1);
- x2(".?", "\n", 0, 0);
- x2(".*", "", 0, 0);
- x2(".*", "abcde", 0, 5);
- x2(".+", "z", 0, 1);
- x2(".+", "zdswer\n", 0, 6);
- x2("(.*)a\\1f", "babfbac", 0, 4);
- x2("(.*)a\\1f", "bacbabf", 3, 7);
- x2("((.*)a\\2f)", "bacbabf", 3, 7);
- x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23);
- x2("a|b", "a", 0, 1);
- x2("a|b", "b", 0, 1);
- x2("|a", "a", 0, 0);
- x2("(|a)", "a", 0, 0);
- x2("ab|bc", "ab", 0, 2);
- x2("ab|bc", "bc", 0, 2);
- x2("z(?:ab|bc)", "zbc", 0, 3);
- x2("a(?:ab|bc)c", "aabc", 0, 4);
- x2("ab|(?:ac|az)", "az", 0, 2);
- x2("a|b|c", "dc", 1, 2);
- x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2);
- n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn");
- x2("a|^z", "ba", 1, 2);
- x2("a|^z", "za", 0, 1);
- x2("a|\\Gz", "bza", 2, 3);
- x2("a|\\Gz", "za", 0, 1);
- x2("a|\\Az", "bza", 2, 3);
- x2("a|\\Az", "za", 0, 1);
- x2("a|b\\Z", "ba", 1, 2);
- x2("a|b\\Z", "b", 0, 1);
- x2("a|b\\z", "ba", 1, 2);
- x2("a|b\\z", "b", 0, 1);
- x2("\\w|\\s", " ", 0, 1);
- n("\\w|\\w", " ");
- x2("\\w|%", "%", 0, 1);
- x2("\\w|[&$]", "&", 0, 1);
- x2("[b-d]|[^e-z]", "a", 0, 1);
- x2("(?:a|[c-f])|bz", "dz", 0, 1);
- x2("(?:a|[c-f])|bz", "bz", 0, 2);
- x2("abc|(?=zz)..f", "zzf", 0, 3);
- x2("abc|(?!zz)..f", "abf", 0, 3);
- x2("(?=za)..a|(?=zz)..a", "zza", 0, 3);
- n("(?>a|abd)c", "abdc");
- x2("(?>abd|a)c", "abdc", 0, 4);
- x2("a?|b", "a", 0, 1);
- x2("a?|b", "b", 0, 0);
- x2("a?|b", "", 0, 0);
- x2("a*|b", "aa", 0, 2);
- x2("a*|b*", "ba", 0, 0);
- x2("a*|b*", "ab", 0, 1);
- x2("a+|b*", "", 0, 0);
- x2("a+|b*", "bbb", 0, 3);
- x2("a+|b*", "abbb", 0, 1);
- n("a+|b+", "");
- x2("(a|b)?", "b", 0, 1);
- x2("(a|b)*", "ba", 0, 2);
- x2("(a|b)+", "bab", 0, 3);
- x2("(ab|ca)+", "caabbc", 0, 4);
- x2("(ab|ca)+", "aabca", 1, 5);
- x2("(ab|ca)+", "abzca", 0, 2);
- x2("(a|bab)+", "ababa", 0, 5);
- x2("(a|bab)+", "ba", 1, 2);
- x2("(a|bab)+", "baaaba", 1, 4);
- x2("(?:a|b)(?:a|b)", "ab", 0, 2);
- x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3);
- x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6);
- x2("(?:a+|b+){2}", "aaabbb", 0, 6);
- x2("h{0,}", "hhhh", 0, 4);
- x2("(?:a+|b+){1,2}", "aaabbb", 0, 6);
- n("ax{2}*a", "0axxxa1");
- n("a.{0,2}a", "0aXXXa0");
- n("a.{0,2}?a", "0aXXXa0");
- n("a.{0,2}?a", "0aXXXXa0");
- x2("^a{2,}?a$", "aaa", 0, 3);
- x2("^[a-z]{2,}?$", "aaa", 0, 3);
- x2("(?:a+|\\Ab*)cc", "cc", 0, 2);
- n("(?:a+|\\Ab*)cc", "abcc");
- x2("(?:^a+|b+)*c", "aabbbabc", 6, 8);
- x2("(?:^a+|b+)*c", "aabbbbc", 0, 7);
- x2("a|(?i)c", "C", 0, 1);
- x2("(?i)c|a", "C", 0, 1);
- x2("(?i)c|a", "A", 0, 1);
- x2("(?i:c)|a", "C", 0, 1);
- n("(?i:c)|a", "A");
- x2("[abc]?", "abc", 0, 1);
- x2("[abc]*", "abc", 0, 3);
- x2("[^abc]*", "abc", 0, 0);
- n("[^abc]+", "abc");
- x2("a?\?", "aaa", 0, 0);
- x2("ba?\?b", "bab", 0, 3);
- x2("a*?", "aaa", 0, 0);
- x2("ba*?", "baa", 0, 1);
- x2("ba*?b", "baab", 0, 4);
- x2("a+?", "aaa", 0, 1);
- x2("ba+?", "baa", 0, 2);
- x2("ba+?b", "baab", 0, 4);
- x2("(?:a?)?\?", "a", 0, 0);
- x2("(?:a?\?)?", "a", 0, 0);
- x2("(?:a?)+?", "aaa", 0, 1);
- x2("(?:a+)?\?", "aaa", 0, 0);
- x2("(?:a+)?\?b", "aaab", 0, 4);
- x2("(?:ab)?{2}", "", 0, 0);
- x2("(?:ab)?{2}", "ababa", 0, 4);
- x2("(?:ab)*{0}", "ababa", 0, 0);
- x2("(?:ab){3,}", "abababab", 0, 8);
- n("(?:ab){3,}", "abab");
- x2("(?:ab){2,4}", "ababab", 0, 6);
- x2("(?:ab){2,4}", "ababababab", 0, 8);
- x2("(?:ab){2,4}?", "ababababab", 0, 4);
- x2("(?:ab){,}", "ab{,}", 0, 5);
- x2("(?:abc)+?{2}", "abcabcabc", 0, 6);
- x2("(?:X*)(?i:xa)", "XXXa", 0, 4);
- x2("(d+)([^abc]z)", "dddz", 0, 4);
- x2("([^abc]*)([^abc]z)", "dddz", 0, 4);
- x2("(\\w+)(\\wz)", "dddz", 0, 4);
- x3("(a)", "a", 0, 1, 1);
- x3("(ab)", "ab", 0, 2, 1);
- x2("((ab))", "ab", 0, 2);
- x3("((ab))", "ab", 0, 2, 1);
- x3("((ab))", "ab", 0, 2, 2);
- x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20);
- x3("(ab)(cd)", "abcd", 0, 2, 1);
- x3("(ab)(cd)", "abcd", 2, 4, 2);
- x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3);
- x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4);
- x2("(^a)", "a", 0, 1);
- x3("(a)|(a)", "ba", 1, 2, 1);
- x3("(^a)|(a)", "ba", 1, 2, 2);
- x3("(a?)", "aaa", 0, 1, 1);
- x3("(a*)", "aaa", 0, 3, 1);
- x3("(a*)", "", 0, 0, 1);
- x3("(a+)", "aaaaaaa", 0, 7, 1);
- x3("(a+|b*)", "bbbaa", 0, 3, 1);
- x3("(a+|b?)", "bbbaa", 0, 1, 1);
- x3("(abc)?", "abc", 0, 3, 1);
- x3("(abc)*", "abc", 0, 3, 1);
- x3("(abc)+", "abc", 0, 3, 1);
- x3("(xyz|abc)+", "abc", 0, 3, 1);
- x3("([xyz][abc]|abc)+", "abc", 0, 3, 1);
- x3("((?i:abc))", "AbC", 0, 3, 1);
- x2("(abc)(?i:\\1)", "abcABC", 0, 6);
- x3("((?m:a.c))", "a\nc", 0, 3, 1);
- x3("((?=az)a)", "azb", 0, 1, 1);
- x3("abc|(.abd)", "zabd", 0, 4, 1);
- x2("(?:abc)|(ABC)", "abc", 0, 3);
- x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1);
- x3("a*(.)", "aaaaz", 4, 5, 1);
- x3("a*?(.)", "aaaaz", 0, 1, 1);
- x3("a*?(c)", "aaaac", 4, 5, 1);
- x3("[bcd]a*(.)", "caaaaz", 5, 6, 1);
- x3("(\\Abb)cc", "bbcc", 0, 2, 1);
- n("(\\Abb)cc", "zbbcc");
- x3("(^bb)cc", "bbcc", 0, 2, 1);
- n("(^bb)cc", "zbbcc");
- x3("cc(bb$)", "ccbb", 2, 4, 1);
- n("cc(bb$)", "ccbbb");
- n("(\\1)", "");
- n("\\1(a)", "aa");
- n("(a(b)\\1)\\2+", "ababb");
- n("(?:(?:\\1|z)(a))+$", "zaa");
- x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4);
- x2("(a)(?=\\1)", "aa", 0, 1);
- n("(a)$|\\1", "az");
- x2("(a)\\1", "aa", 0, 2);
- n("(a)\\1", "ab");
- x2("(a?)\\1", "aa", 0, 2);
- x2("(a?\?)\\1", "aa", 0, 0);
- x2("(a*)\\1", "aaaaa", 0, 4);
- x3("(a*)\\1", "aaaaa", 0, 2, 1);
- x2("a(b*)\\1", "abbbb", 0, 5);
- x2("a(b*)\\1", "ab", 0, 1);
- x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10);
- x2("(a*)(b*)\\2", "aaabbbb", 0, 7);
- x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8);
- x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7);
- x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6);
- x2("([a-d])\\1", "cc", 0, 2);
- x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6);
- n("(\\w\\d\\s)\\1", "f5 f5");
- x2("(who|[a-c]{3})\\1", "whowho", 0, 6);
- x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9);
- x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6);
- x2("(^a)\\1", "aa", 0, 2);
- n("(^a)\\1", "baa");
- n("(a$)\\1", "aa");
- n("(ab\\Z)\\1", "ab");
- x2("(a*\\Z)\\1", "a", 1, 1);
- x2(".(a*\\Z)\\1", "ba", 1, 2);
- x3("(.(abc)\\2)", "zabcabc", 0, 7, 1);
- x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1);
- x2("((?i:az))\\1", "AzAz", 0, 4);
- n("((?i:az))\\1", "Azaz");
- x2("(?<=a)b", "ab", 1, 2);
- n("(?<=a)b", "bb");
- x2("(?<=a|b)b", "bb", 1, 2);
- x2("(?<=a|bc)b", "bcb", 2, 3);
- x2("(?<=a|bc)b", "ab", 1, 2);
- x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2);
- x2("(a)\\g<1>", "aa", 0, 2);
- x2("(?<!a)b", "cb", 1, 2);
- n("(?<!a)b", "ab");
- x2("(?<!a|bc)b", "bbb", 0, 1);
- n("(?<!a|bc)z", "bcz");
- x2("(?<name1>a)", "a", 0, 1);
- x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4);
- x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8);
- x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3);
- x2("(?<n>|a\\g<n>)+", "", 0, 0);
- x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6);
- x3("\\g<n>(?<n>.){0}", "X", 0, 1, 1);
- x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3);
- x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4);
- x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8);
- x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18);
- x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1);
- x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3);
- x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2);
- x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0);
- x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9);
- n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg");
- x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10);
- x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14);
- x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16);
- x2("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1);
- x2("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13);
- x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1);
- x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9);
- x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3);
- x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1);
- x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7);
- x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4);
- x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5);
- x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10);
- x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5);
- x2("()*\\1", "", 0, 0);
- x2("(?:()|())*\\1\\2", "", 0, 0);
- x3("(?:\\1a|())*", "a", 0, 0, 1);
- x2("x((.)*)*x", "0x1x2x3", 1, 6);
- x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
- x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0);
- x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1);
- x2("\\xED\\xF2", "\xed\xf2", 0, 2);
- x2("", "あ", 0, 0);
- x2("あ", "あ", 0, 2);
- n("い", "あ");
- x2("うう", "うう", 0, 4);
- x2("あいう", "あいう", 0, 6);
- x2("こここここここここここここここここここここここここここここここここここ", "こここここここここここここここここここここここここここここここここここ", 0, 70);
- x2("あ", "いあ", 2, 4);
- x2("いう", "あいう", 2, 6);
- x2("\\xca\\xb8", "\xca\xb8", 0, 2);
- x2(".", "あ", 0, 2);
- x2("..", "かき", 0, 4);
- x2("\\w", "お", 0, 2);
- n("\\W", "あ");
- x2("[\\W]", "う$", 2, 3);
- x2("\\S", "そ", 0, 2);
- x2("\\S", "漢", 0, 2);
- x2("\\b", "気 ", 0, 0);
- x2("\\b", " ほ", 1, 1);
- x2("\\B", "せそ ", 2, 2);
- x2("\\B", "う ", 3, 3);
- x2("\\B", " い", 0, 0);
- x2("[たち]", "ち", 0, 2);
- n("[なに]", "ぬ");
- x2("[う-お]", "え", 0, 2);
- n("[^け]", "け");
- x2("[\\w]", "ね", 0, 2);
- n("[\\d]", "ふ");
- x2("[\\D]", "は", 0, 2);
- n("[\\s]", "く");
- x2("[\\S]", "へ", 0, 2);
- x2("[\\w\\d]", "よ", 0, 2);
- x2("[\\w\\d]", " よ", 3, 5);
- n("\\w鬼車", " 鬼車");
- x2("鬼\\W車", "鬼 車", 0, 5);
- x2("あ.い.う", "ああいいう", 0, 10);
- x2(".\\wう\\W..ぞ", "えうう うぞぞ", 0, 13);
- x2("\\s\\wこここ", " ここここ", 0, 9);
- x2("ああ.け", "ああけけ", 0, 8);
- n(".い", "いえ");
- x2(".お", "おお", 0, 4);
- x2("^あ", "あ", 0, 2);
- x2("^む$", "む", 0, 2);
- x2("^\\w$", "に", 0, 2);
- x2("^\\wかきくけこ$", "zかきくけこ", 0, 11);
- x2("^\\w...うえお$", "zあいううえお", 0, 13);
- x2("\\w\\w\\s\\Wおおお\\d", "aお おおお4", 0, 12);
- x2("\\Aたちつ", "たちつ", 0, 6);
- x2("むめも\\Z", "むめも", 0, 6);
- x2("かきく\\z", "かきく", 0, 6);
- x2("かきく\\Z", "かきく\n", 0, 6);
- x2("\\Gぽぴ", "ぽぴ", 0, 4);
- n("\\Gえ", "うえお");
- n("とて\\G", "とて");
- n("まみ\\A", "まみ");
- n("ま\\Aみ", "まみ");
- x2("(?=せ)せ", "せ", 0, 2);
- n("(?=う).", "い");
- x2("(?!う)か", "か", 0, 2);
- n("(?!と)あ", "と");
- x2("(?i:あ)", "あ", 0, 2);
- x2("(?i:ぶべ)", "ぶべ", 0, 4);
- n("(?i:い)", "う");
- x2("(?m:よ.)", "よ\n", 0, 3);
- x2("(?m:.め)", "ま\nめ", 2, 5);
- x2("あ?", "", 0, 0);
- x2("変?", "化", 0, 0);
- x2("変?", "変", 0, 2);
- x2("量*", "", 0, 0);
- x2("量*", "量", 0, 2);
- x2("子*", "子子子", 0, 6);
- x2("馬*", "鹿馬馬馬馬", 0, 0);
- n("山+", "");
- x2("河+", "河", 0, 2);
- x2("時+", "時時時時", 0, 8);
- x2("え+", "ええううう", 0, 4);
- x2("う+", "おうううう", 2, 10);
- x2(".?", "た", 0, 2);
- x2(".*", "ぱぴぷぺ", 0, 8);
- x2(".+", "ろ", 0, 2);
- x2(".+", "いうえか\n", 0, 8);
- x2("あ|い", "あ", 0, 2);
- x2("あ|い", "い", 0, 2);
- x2("あい|いう", "あい", 0, 4);
- x2("あい|いう", "いう", 0, 4);
- x2("を(?:かき|きく)", "をかき", 0, 6);
- x2("を(?:かき|きく)け", "をきくけ", 0, 8);
- x2("あい|(?:あう|あを)", "あを", 0, 4);
- x2("あ|い|う", "えう", 2, 4);
- x2("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "しすせ", 0, 6);
- n("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "すせ");
- x2("あ|^わ", "ぶあ", 2, 4);
- x2("あ|^を", "をあ", 0, 2);
- x2("鬼|\\G車", "け車鬼", 4, 6);
- x2("鬼|\\G車", "車鬼", 0, 2);
- x2("鬼|\\A車", "b車鬼", 3, 5);
- x2("鬼|\\A車", "車", 0, 2);
- x2("鬼|車\\Z", "車鬼", 2, 4);
- x2("鬼|車\\Z", "車", 0, 2);
- x2("鬼|車\\Z", "車\n", 0, 2);
- x2("鬼|車\\z", "車鬼", 2, 4);
- x2("鬼|車\\z", "車", 0, 2);
- x2("\\w|\\s", "お", 0, 2);
- x2("\\w|%", "%お", 0, 1);
- x2("\\w|[&$]", "う&", 0, 2);
- x2("[い-け]", "う", 0, 2);
- x2("[い-け]|[^か-こ]", "あ", 0, 2);
- x2("[い-け]|[^か-こ]", "か", 0, 2);
- x2("[^あ]", "\n", 0, 1);
- x2("(?:あ|[う-き])|いを", "うを", 0, 2);
- x2("(?:あ|[う-き])|いを", "いを", 0, 4);
- x2("あいう|(?=けけ)..ほ", "けけほ", 0, 6);
- x2("あいう|(?!けけ)..ほ", "あいほ", 0, 6);
- x2("(?=をあ)..あ|(?=をを)..あ", "ををあ", 0, 6);
- x2("(?<=あ|いう)い", "いうい", 4, 6);
- n("(?>あ|あいえ)う", "あいえう");
- x2("(?>あいえ|あ)う", "あいえう", 0, 8);
- x2("あ?|い", "あ", 0, 2);
- x2("あ?|い", "い", 0, 0);
- x2("あ?|い", "", 0, 0);
- x2("あ*|い", "ああ", 0, 4);
- x2("あ*|い*", "いあ", 0, 0);
- x2("あ*|い*", "あい", 0, 2);
- x2("[aあ]*|い*", "aあいいい", 0, 3);
- x2("あ+|い*", "", 0, 0);
- x2("あ+|い*", "いいい", 0, 6);
- x2("あ+|い*", "あいいい", 0, 2);
- x2("あ+|い*", "aあいいい", 0, 0);
- n("あ+|い+", "");
- x2("(あ|い)?", "い", 0, 2);
- x2("(あ|い)*", "いあ", 0, 4);
- x2("(あ|い)+", "いあい", 0, 6);
- x2("(あい|うあ)+", "うああいうえ", 0, 8);
- x2("(あい|うえ)+", "うああいうえ", 4, 12);
- x2("(あい|うあ)+", "ああいうあ", 2, 10);
- x2("(あい|うあ)+", "あいをうあ", 0, 4);
- x2("(あい|うあ)+", "$$zzzzあいをうあ", 6, 10);
- x2("(あ|いあい)+", "あいあいあ", 0, 10);
- x2("(あ|いあい)+", "いあ", 2, 4);
- x2("(あ|いあい)+", "いあああいあ", 2, 8);
- x2("(?:あ|い)(?:あ|い)", "あい", 0, 4);
- x2("(?:あ*|い*)(?:あ*|い*)", "あああいいい", 0, 6);
- x2("(?:あ*|い*)(?:あ+|い+)", "あああいいい", 0, 12);
- x2("(?:あ+|い+){2}", "あああいいい", 0, 12);
- x2("(?:あ+|い+){1,2}", "あああいいい", 0, 12);
- x2("(?:あ+|\\Aい*)うう", "うう", 0, 4);
- n("(?:あ+|\\Aい*)うう", "あいうう");
- x2("(?:^あ+|い+)*う", "ああいいいあいう", 12, 16);
- x2("(?:^あ+|い+)*う", "ああいいいいう", 0, 14);
- x2("う{0,}", "うううう", 0, 8);
- x2("あ|(?i)c", "C", 0, 1);
- x2("(?i)c|あ", "C", 0, 1);
- x2("(?i:あ)|a", "a", 0, 1);
- n("(?i:あ)|a", "A");
- x2("[あいう]?", "あいう", 0, 2);
- x2("[あいう]*", "あいう", 0, 6);
- x2("[^あいう]*", "あいう", 0, 0);
- n("[^あいう]+", "あいう");
- x2("あ?\?", "あああ", 0, 0);
- x2("いあ?\?い", "いあい", 0, 6);
- x2("あ*?", "あああ", 0, 0);
- x2("いあ*?", "いああ", 0, 2);
- x2("いあ*?い", "いああい", 0, 8);
- x2("あ+?", "あああ", 0, 2);
- x2("いあ+?", "いああ", 0, 4);
- x2("いあ+?い", "いああい", 0, 8);
- x2("(?:天?)?\?", "天", 0, 0);
- x2("(?:天?\?)?", "天", 0, 0);
- x2("(?:夢?)+?", "夢夢夢", 0, 2);
- x2("(?:風+)?\?", "風風風", 0, 0);
- x2("(?:雪+)?\?霜", "雪雪雪霜", 0, 8);
- x2("(?:あい)?{2}", "", 0, 0);
- x2("(?:鬼車)?{2}", "鬼車鬼車鬼", 0, 8);
- x2("(?:鬼車)*{0}", "鬼車鬼車鬼", 0, 0);
- x2("(?:鬼車){3,}", "鬼車鬼車鬼車鬼車", 0, 16);
- n("(?:鬼車){3,}", "鬼車鬼車");
- x2("(?:鬼車){2,4}", "鬼車鬼車鬼車", 0, 12);
- x2("(?:鬼車){2,4}", "鬼車鬼車鬼車鬼車鬼車", 0, 16);
- x2("(?:鬼車){2,4}?", "鬼車鬼車鬼車鬼車鬼車", 0, 8);
- x2("(?:鬼車){,}", "鬼車{,}", 0, 7);
- x2("(?:かきく)+?{2}", "かきくかきくかきく", 0, 12);
- x3("(火)", "火", 0, 2, 1);
- x3("(火水)", "火水", 0, 4, 1);
- x2("((時間))", "時間", 0, 4);
- x3("((風水))", "風水", 0, 4, 1);
- x3("((昨日))", "昨日", 0, 4, 2);
- x3("((((((((((((((((((((量子))))))))))))))))))))", "量子", 0, 4, 20);
- x3("(あい)(うえ)", "あいうえ", 0, 4, 1);
- x3("(あい)(うえ)", "あいうえ", 4, 8, 2);
- x3("()(あ)いう(えおか)きくけこ", "あいうえおかきくけこ", 6, 12, 3);
- x3("(()(あ)いう(えおか)きくけこ)", "あいうえおかきくけこ", 6, 12, 4);
- x3(".*(フォ)ン・マ(ン()シュタ)イン", "フォン・マンシュタイン", 10, 18, 2);
- x2("(^あ)", "あ", 0, 2);
- x3("(あ)|(あ)", "いあ", 2, 4, 1);
- x3("(^あ)|(あ)", "いあ", 2, 4, 2);
- x3("(あ?)", "あああ", 0, 2, 1);
- x3("(ま*)", "ままま", 0, 6, 1);
- x3("(と*)", "", 0, 0, 1);
- x3("(る+)", "るるるるるるる", 0, 14, 1);
- x3("(ふ+|へ*)", "ふふふへへ", 0, 6, 1);
- x3("(あ+|い?)", "いいいああ", 0, 2, 1);
- x3("(あいう)?", "あいう", 0, 6, 1);
- x3("(あいう)*", "あいう", 0, 6, 1);
- x3("(あいう)+", "あいう", 0, 6, 1);
- x3("(さしす|あいう)+", "あいう", 0, 6, 1);
- x3("([なにぬ][かきく]|かきく)+", "かきく", 0, 6, 1);
- x3("((?i:あいう))", "あいう", 0, 6, 1);
- x3("((?m:あ.う))", "あ\nう", 0, 5, 1);
- x3("((?=あん)あ)", "あんい", 0, 2, 1);
- x3("あいう|(.あいえ)", "んあいえ", 0, 8, 1);
- x3("あ*(.)", "ああああん", 8, 10, 1);
- x3("あ*?(.)", "ああああん", 0, 2, 1);
- x3("あ*?(ん)", "ああああん", 8, 10, 1);
- x3("[いうえ]あ*(.)", "えああああん", 10, 12, 1);
- x3("(\\Aいい)うう", "いいうう", 0, 4, 1);
- n("(\\Aいい)うう", "んいいうう");
- x3("(^いい)うう", "いいうう", 0, 4, 1);
- n("(^いい)うう", "んいいうう");
- x3("ろろ(るる$)", "ろろるる", 4, 8, 1);
- n("ろろ(るる$)", "ろろるるる");
- x2("(無)\\1", "無無", 0, 4);
- n("(無)\\1", "無武");
- x2("(空?)\\1", "空空", 0, 4);
- x2("(空?\?)\\1", "空空", 0, 0);
- x2("(空*)\\1", "空空空空空", 0, 8);
- x3("(空*)\\1", "空空空空空", 0, 4, 1);
- x2("あ(い*)\\1", "あいいいい", 0, 10);
- x2("あ(い*)\\1", "あい", 0, 2);
- x2("(あ*)(い*)\\1\\2", "あああいいあああいい", 0, 20);
- x2("(あ*)(い*)\\2", "あああいいいい", 0, 14);
- x3("(あ*)(い*)\\2", "あああいいいい", 6, 10, 2);
- x2("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 16);
- x3("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 6, 7);
- x2("(は)(ひ)(ふ)\\2\\1\\3", "はひふひはふ", 0, 12);
- x2("([き-け])\\1", "くく", 0, 4);
- x2("(\\w\\d\\s)\\1", "あ5 あ5 ", 0, 8);
- n("(\\w\\d\\s)\\1", "あ5 あ5");
- x2("(誰?|[あ-う]{3})\\1", "誰?誰?", 0, 8);
- x2("...(誰?|[あ-う]{3})\\1", "あaあ誰?誰?", 0, 13);
- x2("(誰?|[あ-う]{3})\\1", "ういうういう", 0, 12);
- x2("(^こ)\\1", "ここ", 0, 4);
- n("(^む)\\1", "めむむ");
- n("(あ$)\\1", "ああ");
- n("(あい\\Z)\\1", "あい");
- x2("(あ*\\Z)\\1", "あ", 2, 2);
- x2(".(あ*\\Z)\\1", "いあ", 2, 4);
- x3("(.(やいゆ)\\2)", "zやいゆやいゆ", 0, 13, 1);
- x3("(.(..\\d.)\\2)", "あ12341234", 0, 10, 1);
- x2("((?i:あvず))\\1", "あvずあvず", 0, 10);
- x2("(?<愚か>変|\\(\\g<愚か>\\))", "((((((変))))))", 0, 14);
- x2("\\A(?:\\g<阿_1>|\\g<云_2>|\\z終了 (?<阿_1>観|自\\g<云_2>自)(?<云_2>在|菩薩\\g<阿_1>菩薩))$", "菩薩自菩薩自在自菩薩自菩薩", 0, 26);
- x2("[[ひふ]]", "ふ", 0, 2);
- x2("[[いおう]か]", "か", 0, 2);
- n("[[^あ]]", "あ");
- n("[^[あ]]", "あ");
- x2("[^[^あ]]", "あ", 0, 2);
- x2("[[かきく]&&きく]", "く", 0, 2);
- n("[[かきく]&&きく]", "か");
- n("[[かきく]&&きく]", "け");
- x2("[あ-ん&&い-を&&う-ゑ]", "ゑ", 0, 2);
- n("[^あ-ん&&い-を&&う-ゑ]", "ゑ");
- x2("[[^あ&&あ]&&あ-ん]", "い", 0, 2);
- n("[[^あ&&あ]&&あ-ん]", "あ");
- x2("[[^あ-ん&&いうえお]&&[^う-か]]", "き", 0, 2);
- n("[[^あ-ん&&いうえお]&&[^う-か]]", "い");
- x2("[^[^あいう]&&[^うえお]]", "う", 0, 2);
- x2("[^[^あいう]&&[^うえお]]", "え", 0, 2);
- n("[^[^あいう]&&[^うえお]]", "か");
- x2("[あ-&&-あ]", "-", 0, 1);
- x2("[^[^a-zあいう]&&[^bcdefgうえお]q-w]", "え", 0, 2);
- x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "f", 0, 1);
- x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "g", 0, 1);
- n("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "2");
- x2("a<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 32);
- x2(".<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 32);
- fprintf(stdout,
- "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n",
- nsucc, nfail, nerror, onig_version());
-
-#ifndef POSIX_TEST
- onig_region_free(region, 1);
- onig_end();
-#endif
-
- return ((nfail == 0 && nerror == 0) ? 0 : -1);
-}
+/* + * This program was generated by testconv.rb. + */ +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif +#include <stdio.h> + +#ifdef POSIX_TEST +#include "onigposix.h" +#else +#include "oniguruma.h" +#endif + +#include <string.h> + +#define SLEN(s) strlen(s) + +static int nsucc = 0; +static int nfail = 0; +static int nerror = 0; + +static FILE* err_file; + +#ifndef POSIX_TEST +static OnigRegion* region; +#endif + +static void xx(char* pattern, char* str, int from, int to, int mem, int not) +{ + int r; + +#ifdef POSIX_TEST + regex_t reg; + char buf[200]; + regmatch_t pmatch[25]; + + r = regcomp(®, pattern, REG_EXTENDED | REG_NEWLINE); + if (r) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + r = regexec(®, str, reg.re_nsub + 1, pmatch, 0); + if (r != 0 && r != REG_NOMATCH) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + if (r == REG_NOMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str); + nfail++; + } + else { + if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str, + from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo); + nfail++; + } + } + } + regfree(®); + +#else + regex_t* reg; + OnigErrorInfo einfo; + + r = onig_new(®, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_SJIS, ONIG_SYNTAX_DEFAULT, &einfo); + if (r) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(err_file, "ERROR: %s\n", s); + nerror++; + return ; + } + + r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)), + (UChar* )str, (UChar* )(str + SLEN(str)), + region, ONIG_OPTION_NONE); + if (r < ONIG_MISMATCH) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r); + fprintf(err_file, "ERROR: %s\n", s); + nerror++; + return ; + } + + if (r == ONIG_MISMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str); + nfail++; + } + else { + if (region->beg[mem] == from && region->end[mem] == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str, + from, to, region->beg[mem], region->end[mem]); + nfail++; + } + } + } + onig_free(reg); +#endif +} + +static void x2(char* pattern, char* str, int from, int to) +{ + xx(pattern, str, from, to, 0, 0); +} + +static void x3(char* pattern, char* str, int from, int to, int mem) +{ + xx(pattern, str, from, to, mem, 0); +} + +static void n(char* pattern, char* str) +{ + xx(pattern, str, 0, 0, 0, 1); +} + +extern int main(int argc, char* argv[]) +{ +#ifndef POSIX_TEST + static OnigEncoding use_encs[1]; + + use_encs[0] = ONIG_ENCODING_SJIS; + onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); +#endif + + err_file = stdout; + +#ifdef POSIX_TEST + reg_set_encoding(REG_POSIX_ENCODING_SJIS); +#else + region = onig_region_new(); +#endif + + x2("", "", 0, 0); + x2("^", "", 0, 0); + x2("$", "", 0, 0); + x2("\\G", "", 0, 0); + x2("\\A", "", 0, 0); + x2("\\Z", "", 0, 0); + x2("\\z", "", 0, 0); + x2("^$", "", 0, 0); + x2("\\ca", "\001", 0, 1); + x2("\\C-b", "\002", 0, 1); + x2("\\c\\\\", "\034", 0, 1); + x2("q[\\c\\\\]", "q\034", 0, 2); + x2("", "a", 0, 0); + x2("a", "a", 0, 1); + x2("\\x61", "a", 0, 1); + x2("aa", "aa", 0, 2); + x2("aaa", "aaa", 0, 3); + x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35); + x2("ab", "ab", 0, 2); + x2("b", "ab", 1, 2); + x2("bc", "abc", 1, 3); + x2("(?i:#RET#)", "#INS##RET#", 5, 10); + x2("\\17", "\017", 0, 1); + x2("\\x1f", "\x1f", 0, 1); + x2("a(?#....\\\\JJJJ)b", "ab", 0, 2); + x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7); + x2(".", "a", 0, 1); + n(".", ""); + x2("..", "ab", 0, 2); + x2("\\w", "e", 0, 1); + n("\\W", "e"); + x2("\\s", " ", 0, 1); + x2("\\S", "b", 0, 1); + x2("\\d", "4", 0, 1); + n("\\D", "4"); + x2("\\b", "z ", 0, 0); + x2("\\b", " z", 1, 1); + x2("\\B", "zz ", 1, 1); + x2("\\B", "z ", 2, 2); + x2("\\B", " z", 0, 0); + x2("[ab]", "b", 0, 1); + n("[ab]", "c"); + x2("[a-z]", "t", 0, 1); + n("[^a]", "a"); + x2("[^a]", "\n", 0, 1); + x2("[]]", "]", 0, 1); + n("[^]]", "]"); + x2("[\\^]+", "0^^1", 1, 3); + x2("[b-]", "b", 0, 1); + x2("[b-]", "-", 0, 1); + x2("[\\w]", "z", 0, 1); + n("[\\w]", " "); + x2("[\\W]", "b$", 1, 2); + x2("[\\d]", "5", 0, 1); + n("[\\d]", "e"); + x2("[\\D]", "t", 0, 1); + n("[\\D]", "3"); + x2("[\\s]", " ", 0, 1); + n("[\\s]", "a"); + x2("[\\S]", "b", 0, 1); + n("[\\S]", " "); + x2("[\\w\\d]", "2", 0, 1); + n("[\\w\\d]", " "); + x2("[[:upper:]]", "B", 0, 1); + x2("[*[:xdigit:]+]", "+", 0, 1); + x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7); + x2("[*[:xdigit:]+]", "-@^+", 3, 4); + n("[[:upper]]", "A"); + x2("[[:upper]]", ":", 0, 1); + x2("[\\044-\\047]", "\046", 0, 1); + x2("[\\x5a-\\x5c]", "\x5b", 0, 1); + x2("[\\x6A-\\x6D]", "\x6c", 0, 1); + n("[\\x6A-\\x6D]", "\x6E"); + n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply"); + x2("[\\[]", "[", 0, 1); + x2("[\\]]", "]", 0, 1); + x2("[&]", "&", 0, 1); + x2("[[ab]]", "b", 0, 1); + x2("[[ab]c]", "c", 0, 1); + n("[[^a]]", "a"); + n("[^[a]]", "a"); + x2("[[ab]&&bc]", "b", 0, 1); + n("[[ab]&&bc]", "a"); + n("[[ab]&&bc]", "c"); + x2("[a-z&&b-y&&c-x]", "w", 0, 1); + n("[^a-z&&b-y&&c-x]", "w"); + x2("[[^a&&a]&&a-z]", "b", 0, 1); + n("[[^a&&a]&&a-z]", "a"); + x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1); + n("[[^a-z&&bcdef]&&[^c-g]]", "c"); + x2("[^[^abc]&&[^cde]]", "c", 0, 1); + x2("[^[^abc]&&[^cde]]", "e", 0, 1); + n("[^[^abc]&&[^cde]]", "f"); + x2("[a-&&-a]", "-", 0, 1); + n("[a\\-&&\\-a]", "&"); + n("\\wabc", " abc"); + x2("a\\Wbc", "a bc", 0, 4); + x2("a.b.c", "aabbc", 0, 5); + x2(".\\wb\\W..c", "abb bcc", 0, 7); + x2("\\s\\wzzz", " zzzz", 0, 5); + x2("aa.b", "aabb", 0, 4); + n(".a", "ab"); + x2(".a", "aa", 0, 2); + x2("^a", "a", 0, 1); + x2("^a$", "a", 0, 1); + x2("^\\w$", "a", 0, 1); + n("^\\w$", " "); + x2("^\\wab$", "zab", 0, 3); + x2("^\\wabcdef$", "zabcdef", 0, 7); + x2("^\\w...def$", "zabcdef", 0, 7); + x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8); + x2("\\A\\Z", "", 0, 0); + x2("\\Axyz", "xyz", 0, 3); + x2("xyz\\Z", "xyz", 0, 3); + x2("xyz\\z", "xyz", 0, 3); + x2("a\\Z", "a", 0, 1); + x2("\\Gaz", "az", 0, 2); + n("\\Gz", "bza"); + n("az\\G", "az"); + n("az\\A", "az"); + n("a\\Az", "az"); + x2("\\^\\$", "^$", 0, 2); + x2("^x?y", "xy", 0, 2); + x2("^(x?y)", "xy", 0, 2); + x2("\\w", "_", 0, 1); + n("\\W", "_"); + x2("(?=z)z", "z", 0, 1); + n("(?=z).", "a"); + x2("(?!z)a", "a", 0, 1); + n("(?!z)a", "z"); + x2("(?i:a)", "a", 0, 1); + x2("(?i:a)", "A", 0, 1); + x2("(?i:A)", "a", 0, 1); + n("(?i:A)", "b"); + x2("(?i:[A-Z])", "a", 0, 1); + x2("(?i:[f-m])", "H", 0, 1); + x2("(?i:[f-m])", "h", 0, 1); + n("(?i:[f-m])", "e"); + x2("(?i:[A-c])", "D", 0, 1); + n("(?i:[^a-z])", "A"); + n("(?i:[^a-z])", "a"); + x2("(?i:[!-k])", "Z", 0, 1); + x2("(?i:[!-k])", "7", 0, 1); + x2("(?i:[T-}])", "b", 0, 1); + x2("(?i:[T-}])", "{", 0, 1); + x2("(?i:\\?a)", "?A", 0, 2); + x2("(?i:\\*A)", "*a", 0, 2); + n(".", "\n"); + x2("(?m:.)", "\n", 0, 1); + x2("(?m:a.)", "a\n", 0, 2); + x2("(?m:.b)", "a\nb", 1, 3); + x2(".*abc", "dddabdd\nddabc", 8, 13); + x2("(?m:.*abc)", "dddabddabc", 0, 10); + n("(?i)(?-i)a", "A"); + n("(?i)(?-i:a)", "A"); + x2("a?", "", 0, 0); + x2("a?", "b", 0, 0); + x2("a?", "a", 0, 1); + x2("a*", "", 0, 0); + x2("a*", "a", 0, 1); + x2("a*", "aaa", 0, 3); + x2("a*", "baaaa", 0, 0); + n("a+", ""); + x2("a+", "a", 0, 1); + x2("a+", "aaaa", 0, 4); + x2("a+", "aabbb", 0, 2); + x2("a+", "baaaa", 1, 5); + x2(".?", "", 0, 0); + x2(".?", "f", 0, 1); + x2(".?", "\n", 0, 0); + x2(".*", "", 0, 0); + x2(".*", "abcde", 0, 5); + x2(".+", "z", 0, 1); + x2(".+", "zdswer\n", 0, 6); + x2("(.*)a\\1f", "babfbac", 0, 4); + x2("(.*)a\\1f", "bacbabf", 3, 7); + x2("((.*)a\\2f)", "bacbabf", 3, 7); + x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23); + x2("a|b", "a", 0, 1); + x2("a|b", "b", 0, 1); + x2("|a", "a", 0, 0); + x2("(|a)", "a", 0, 0); + x2("ab|bc", "ab", 0, 2); + x2("ab|bc", "bc", 0, 2); + x2("z(?:ab|bc)", "zbc", 0, 3); + x2("a(?:ab|bc)c", "aabc", 0, 4); + x2("ab|(?:ac|az)", "az", 0, 2); + x2("a|b|c", "dc", 1, 2); + x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2); + n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn"); + x2("a|^z", "ba", 1, 2); + x2("a|^z", "za", 0, 1); + x2("a|\\Gz", "bza", 2, 3); + x2("a|\\Gz", "za", 0, 1); + x2("a|\\Az", "bza", 2, 3); + x2("a|\\Az", "za", 0, 1); + x2("a|b\\Z", "ba", 1, 2); + x2("a|b\\Z", "b", 0, 1); + x2("a|b\\z", "ba", 1, 2); + x2("a|b\\z", "b", 0, 1); + x2("\\w|\\s", " ", 0, 1); + n("\\w|\\w", " "); + x2("\\w|%", "%", 0, 1); + x2("\\w|[&$]", "&", 0, 1); + x2("[b-d]|[^e-z]", "a", 0, 1); + x2("(?:a|[c-f])|bz", "dz", 0, 1); + x2("(?:a|[c-f])|bz", "bz", 0, 2); + x2("abc|(?=zz)..f", "zzf", 0, 3); + x2("abc|(?!zz)..f", "abf", 0, 3); + x2("(?=za)..a|(?=zz)..a", "zza", 0, 3); + n("(?>a|abd)c", "abdc"); + x2("(?>abd|a)c", "abdc", 0, 4); + x2("a?|b", "a", 0, 1); + x2("a?|b", "b", 0, 0); + x2("a?|b", "", 0, 0); + x2("a*|b", "aa", 0, 2); + x2("a*|b*", "ba", 0, 0); + x2("a*|b*", "ab", 0, 1); + x2("a+|b*", "", 0, 0); + x2("a+|b*", "bbb", 0, 3); + x2("a+|b*", "abbb", 0, 1); + n("a+|b+", ""); + x2("(a|b)?", "b", 0, 1); + x2("(a|b)*", "ba", 0, 2); + x2("(a|b)+", "bab", 0, 3); + x2("(ab|ca)+", "caabbc", 0, 4); + x2("(ab|ca)+", "aabca", 1, 5); + x2("(ab|ca)+", "abzca", 0, 2); + x2("(a|bab)+", "ababa", 0, 5); + x2("(a|bab)+", "ba", 1, 2); + x2("(a|bab)+", "baaaba", 1, 4); + x2("(?:a|b)(?:a|b)", "ab", 0, 2); + x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3); + x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6); + x2("(?:a+|b+){2}", "aaabbb", 0, 6); + x2("h{0,}", "hhhh", 0, 4); + x2("(?:a+|b+){1,2}", "aaabbb", 0, 6); + n("ax{2}*a", "0axxxa1"); + n("a.{0,2}a", "0aXXXa0"); + n("a.{0,2}?a", "0aXXXa0"); + n("a.{0,2}?a", "0aXXXXa0"); + x2("^a{2,}?a$", "aaa", 0, 3); + x2("^[a-z]{2,}?$", "aaa", 0, 3); + x2("(?:a+|\\Ab*)cc", "cc", 0, 2); + n("(?:a+|\\Ab*)cc", "abcc"); + x2("(?:^a+|b+)*c", "aabbbabc", 6, 8); + x2("(?:^a+|b+)*c", "aabbbbc", 0, 7); + x2("a|(?i)c", "C", 0, 1); + x2("(?i)c|a", "C", 0, 1); + x2("(?i)c|a", "A", 0, 1); + x2("(?i:c)|a", "C", 0, 1); + n("(?i:c)|a", "A"); + x2("[abc]?", "abc", 0, 1); + x2("[abc]*", "abc", 0, 3); + x2("[^abc]*", "abc", 0, 0); + n("[^abc]+", "abc"); + x2("a?\?", "aaa", 0, 0); + x2("ba?\?b", "bab", 0, 3); + x2("a*?", "aaa", 0, 0); + x2("ba*?", "baa", 0, 1); + x2("ba*?b", "baab", 0, 4); + x2("a+?", "aaa", 0, 1); + x2("ba+?", "baa", 0, 2); + x2("ba+?b", "baab", 0, 4); + x2("(?:a?)?\?", "a", 0, 0); + x2("(?:a?\?)?", "a", 0, 0); + x2("(?:a?)+?", "aaa", 0, 1); + x2("(?:a+)?\?", "aaa", 0, 0); + x2("(?:a+)?\?b", "aaab", 0, 4); + x2("(?:ab)?{2}", "", 0, 0); + x2("(?:ab)?{2}", "ababa", 0, 4); + x2("(?:ab)*{0}", "ababa", 0, 0); + x2("(?:ab){3,}", "abababab", 0, 8); + n("(?:ab){3,}", "abab"); + x2("(?:ab){2,4}", "ababab", 0, 6); + x2("(?:ab){2,4}", "ababababab", 0, 8); + x2("(?:ab){2,4}?", "ababababab", 0, 4); + x2("(?:ab){,}", "ab{,}", 0, 5); + x2("(?:abc)+?{2}", "abcabcabc", 0, 6); + x2("(?:X*)(?i:xa)", "XXXa", 0, 4); + x2("(d+)([^abc]z)", "dddz", 0, 4); + x2("([^abc]*)([^abc]z)", "dddz", 0, 4); + x2("(\\w+)(\\wz)", "dddz", 0, 4); + x3("(a)", "a", 0, 1, 1); + x3("(ab)", "ab", 0, 2, 1); + x2("((ab))", "ab", 0, 2); + x3("((ab))", "ab", 0, 2, 1); + x3("((ab))", "ab", 0, 2, 2); + x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20); + x3("(ab)(cd)", "abcd", 0, 2, 1); + x3("(ab)(cd)", "abcd", 2, 4, 2); + x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3); + x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4); + x2("(^a)", "a", 0, 1); + x3("(a)|(a)", "ba", 1, 2, 1); + x3("(^a)|(a)", "ba", 1, 2, 2); + x3("(a?)", "aaa", 0, 1, 1); + x3("(a*)", "aaa", 0, 3, 1); + x3("(a*)", "", 0, 0, 1); + x3("(a+)", "aaaaaaa", 0, 7, 1); + x3("(a+|b*)", "bbbaa", 0, 3, 1); + x3("(a+|b?)", "bbbaa", 0, 1, 1); + x3("(abc)?", "abc", 0, 3, 1); + x3("(abc)*", "abc", 0, 3, 1); + x3("(abc)+", "abc", 0, 3, 1); + x3("(xyz|abc)+", "abc", 0, 3, 1); + x3("([xyz][abc]|abc)+", "abc", 0, 3, 1); + x3("((?i:abc))", "AbC", 0, 3, 1); + x2("(abc)(?i:\\1)", "abcABC", 0, 6); + x3("((?m:a.c))", "a\nc", 0, 3, 1); + x3("((?=az)a)", "azb", 0, 1, 1); + x3("abc|(.abd)", "zabd", 0, 4, 1); + x2("(?:abc)|(ABC)", "abc", 0, 3); + x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1); + x3("a*(.)", "aaaaz", 4, 5, 1); + x3("a*?(.)", "aaaaz", 0, 1, 1); + x3("a*?(c)", "aaaac", 4, 5, 1); + x3("[bcd]a*(.)", "caaaaz", 5, 6, 1); + x3("(\\Abb)cc", "bbcc", 0, 2, 1); + n("(\\Abb)cc", "zbbcc"); + x3("(^bb)cc", "bbcc", 0, 2, 1); + n("(^bb)cc", "zbbcc"); + x3("cc(bb$)", "ccbb", 2, 4, 1); + n("cc(bb$)", "ccbbb"); + n("(\\1)", ""); + n("\\1(a)", "aa"); + n("(a(b)\\1)\\2+", "ababb"); + n("(?:(?:\\1|z)(a))+$", "zaa"); + x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4); + x2("(a)(?=\\1)", "aa", 0, 1); + n("(a)$|\\1", "az"); + x2("(a)\\1", "aa", 0, 2); + n("(a)\\1", "ab"); + x2("(a?)\\1", "aa", 0, 2); + x2("(a?\?)\\1", "aa", 0, 0); + x2("(a*)\\1", "aaaaa", 0, 4); + x3("(a*)\\1", "aaaaa", 0, 2, 1); + x2("a(b*)\\1", "abbbb", 0, 5); + x2("a(b*)\\1", "ab", 0, 1); + x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10); + x2("(a*)(b*)\\2", "aaabbbb", 0, 7); + x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8); + x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7); + x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6); + x2("([a-d])\\1", "cc", 0, 2); + x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6); + n("(\\w\\d\\s)\\1", "f5 f5"); + x2("(who|[a-c]{3})\\1", "whowho", 0, 6); + x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9); + x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6); + x2("(^a)\\1", "aa", 0, 2); + n("(^a)\\1", "baa"); + n("(a$)\\1", "aa"); + n("(ab\\Z)\\1", "ab"); + x2("(a*\\Z)\\1", "a", 1, 1); + x2(".(a*\\Z)\\1", "ba", 1, 2); + x3("(.(abc)\\2)", "zabcabc", 0, 7, 1); + x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1); + x2("((?i:az))\\1", "AzAz", 0, 4); + n("((?i:az))\\1", "Azaz"); + x2("(?<=a)b", "ab", 1, 2); + n("(?<=a)b", "bb"); + x2("(?<=a|b)b", "bb", 1, 2); + x2("(?<=a|bc)b", "bcb", 2, 3); + x2("(?<=a|bc)b", "ab", 1, 2); + x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2); + x2("(a)\\g<1>", "aa", 0, 2); + x2("(?<!a)b", "cb", 1, 2); + n("(?<!a)b", "ab"); + x2("(?<!a|bc)b", "bbb", 0, 1); + n("(?<!a|bc)z", "bcz"); + x2("(?<name1>a)", "a", 0, 1); + x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4); + x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8); + x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3); + x2("(?<n>|a\\g<n>)+", "", 0, 0); + x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6); + x3("\\g<n>(?<n>.){0}", "X", 0, 1, 1); + x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3); + x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4); + x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8); + x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18); + x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1); + x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3); + x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2); + x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0); + x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9); + n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg"); + x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10); + x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14); + x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16); + x2("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1); + x2("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13); + x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1); + x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9); + x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3); + x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1); + x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7); + x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4); + x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5); + x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10); + x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5); + x2("()*\\1", "", 0, 0); + x2("(?:()|())*\\1\\2", "", 0, 0); + x3("(?:\\1a|())*", "a", 0, 0, 1); + x2("x((.)*)*x", "0x1x2x3", 1, 6); + x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9); + x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0); + x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1); + x2("\\xED\\xF2", "\xed\xf2", 0, 2); + x2("", "あ", 0, 0); + x2("あ", "あ", 0, 2); + n("い", "あ"); + x2("うう", "うう", 0, 4); + x2("あいう", "あいう", 0, 6); + x2("こここここここここここここここここここここここここここここここここここ", "こここここここここここここここここここここここここここここここここここ", 0, 70); + x2("あ", "いあ", 2, 4); + x2("いう", "あいう", 2, 6); + x2("\\xca\\xb8", "\xca\xb8", 0, 2); + x2(".", "あ", 0, 2); + x2("..", "かき", 0, 4); + x2("\\w", "お", 0, 2); + n("\\W", "あ"); + x2("[\\W]", "う$", 2, 3); + x2("\\S", "そ", 0, 2); + x2("\\S", "漢", 0, 2); + x2("\\b", "気 ", 0, 0); + x2("\\b", " ほ", 1, 1); + x2("\\B", "せそ ", 2, 2); + x2("\\B", "う ", 3, 3); + x2("\\B", " い", 0, 0); + x2("[たち]", "ち", 0, 2); + n("[なに]", "ぬ"); + x2("[う-お]", "え", 0, 2); + n("[^け]", "け"); + x2("[\\w]", "ね", 0, 2); + n("[\\d]", "ふ"); + x2("[\\D]", "は", 0, 2); + n("[\\s]", "く"); + x2("[\\S]", "へ", 0, 2); + x2("[\\w\\d]", "よ", 0, 2); + x2("[\\w\\d]", " よ", 3, 5); + n("\\w鬼車", " 鬼車"); + x2("鬼\\W車", "鬼 車", 0, 5); + x2("あ.い.う", "ああいいう", 0, 10); + x2(".\\wう\\W..ぞ", "えうう うぞぞ", 0, 13); + x2("\\s\\wこここ", " ここここ", 0, 9); + x2("ああ.け", "ああけけ", 0, 8); + n(".い", "いえ"); + x2(".お", "おお", 0, 4); + x2("^あ", "あ", 0, 2); + x2("^む$", "む", 0, 2); + x2("^\\w$", "に", 0, 2); + x2("^\\wかきくけこ$", "zかきくけこ", 0, 11); + x2("^\\w...うえお$", "zあいううえお", 0, 13); + x2("\\w\\w\\s\\Wおおお\\d", "aお おおお4", 0, 12); + x2("\\Aたちつ", "たちつ", 0, 6); + x2("むめも\\Z", "むめも", 0, 6); + x2("かきく\\z", "かきく", 0, 6); + x2("かきく\\Z", "かきく\n", 0, 6); + x2("\\Gぽぴ", "ぽぴ", 0, 4); + n("\\Gえ", "うえお"); + n("とて\\G", "とて"); + n("まみ\\A", "まみ"); + n("ま\\Aみ", "まみ"); + x2("(?=せ)せ", "せ", 0, 2); + n("(?=う).", "い"); + x2("(?!う)か", "か", 0, 2); + n("(?!と)あ", "と"); + x2("(?i:あ)", "あ", 0, 2); + x2("(?i:ぶべ)", "ぶべ", 0, 4); + n("(?i:い)", "う"); + x2("(?m:よ.)", "よ\n", 0, 3); + x2("(?m:.め)", "ま\nめ", 2, 5); + x2("あ?", "", 0, 0); + x2("変?", "化", 0, 0); + x2("変?", "変", 0, 2); + x2("量*", "", 0, 0); + x2("量*", "量", 0, 2); + x2("子*", "子子子", 0, 6); + x2("馬*", "鹿馬馬馬馬", 0, 0); + n("山+", ""); + x2("河+", "河", 0, 2); + x2("時+", "時時時時", 0, 8); + x2("え+", "ええううう", 0, 4); + x2("う+", "おうううう", 2, 10); + x2(".?", "た", 0, 2); + x2(".*", "ぱぴぷぺ", 0, 8); + x2(".+", "ろ", 0, 2); + x2(".+", "いうえか\n", 0, 8); + x2("あ|い", "あ", 0, 2); + x2("あ|い", "い", 0, 2); + x2("あい|いう", "あい", 0, 4); + x2("あい|いう", "いう", 0, 4); + x2("を(?:かき|きく)", "をかき", 0, 6); + x2("を(?:かき|きく)け", "をきくけ", 0, 8); + x2("あい|(?:あう|あを)", "あを", 0, 4); + x2("あ|い|う", "えう", 2, 4); + x2("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "しすせ", 0, 6); + n("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "すせ"); + x2("あ|^わ", "ぶあ", 2, 4); + x2("あ|^を", "をあ", 0, 2); + x2("鬼|\\G車", "け車鬼", 4, 6); + x2("鬼|\\G車", "車鬼", 0, 2); + x2("鬼|\\A車", "b車鬼", 3, 5); + x2("鬼|\\A車", "車", 0, 2); + x2("鬼|車\\Z", "車鬼", 2, 4); + x2("鬼|車\\Z", "車", 0, 2); + x2("鬼|車\\Z", "車\n", 0, 2); + x2("鬼|車\\z", "車鬼", 2, 4); + x2("鬼|車\\z", "車", 0, 2); + x2("\\w|\\s", "お", 0, 2); + x2("\\w|%", "%お", 0, 1); + x2("\\w|[&$]", "う&", 0, 2); + x2("[い-け]", "う", 0, 2); + x2("[い-け]|[^か-こ]", "あ", 0, 2); + x2("[い-け]|[^か-こ]", "か", 0, 2); + x2("[^あ]", "\n", 0, 1); + x2("(?:あ|[う-き])|いを", "うを", 0, 2); + x2("(?:あ|[う-き])|いを", "いを", 0, 4); + x2("あいう|(?=けけ)..ほ", "けけほ", 0, 6); + x2("あいう|(?!けけ)..ほ", "あいほ", 0, 6); + x2("(?=をあ)..あ|(?=をを)..あ", "ををあ", 0, 6); + x2("(?<=あ|いう)い", "いうい", 4, 6); + n("(?>あ|あいえ)う", "あいえう"); + x2("(?>あいえ|あ)う", "あいえう", 0, 8); + x2("あ?|い", "あ", 0, 2); + x2("あ?|い", "い", 0, 0); + x2("あ?|い", "", 0, 0); + x2("あ*|い", "ああ", 0, 4); + x2("あ*|い*", "いあ", 0, 0); + x2("あ*|い*", "あい", 0, 2); + x2("[aあ]*|い*", "aあいいい", 0, 3); + x2("あ+|い*", "", 0, 0); + x2("あ+|い*", "いいい", 0, 6); + x2("あ+|い*", "あいいい", 0, 2); + x2("あ+|い*", "aあいいい", 0, 0); + n("あ+|い+", ""); + x2("(あ|い)?", "い", 0, 2); + x2("(あ|い)*", "いあ", 0, 4); + x2("(あ|い)+", "いあい", 0, 6); + x2("(あい|うあ)+", "うああいうえ", 0, 8); + x2("(あい|うえ)+", "うああいうえ", 4, 12); + x2("(あい|うあ)+", "ああいうあ", 2, 10); + x2("(あい|うあ)+", "あいをうあ", 0, 4); + x2("(あい|うあ)+", "$$zzzzあいをうあ", 6, 10); + x2("(あ|いあい)+", "あいあいあ", 0, 10); + x2("(あ|いあい)+", "いあ", 2, 4); + x2("(あ|いあい)+", "いあああいあ", 2, 8); + x2("(?:あ|い)(?:あ|い)", "あい", 0, 4); + x2("(?:あ*|い*)(?:あ*|い*)", "あああいいい", 0, 6); + x2("(?:あ*|い*)(?:あ+|い+)", "あああいいい", 0, 12); + x2("(?:あ+|い+){2}", "あああいいい", 0, 12); + x2("(?:あ+|い+){1,2}", "あああいいい", 0, 12); + x2("(?:あ+|\\Aい*)うう", "うう", 0, 4); + n("(?:あ+|\\Aい*)うう", "あいうう"); + x2("(?:^あ+|い+)*う", "ああいいいあいう", 12, 16); + x2("(?:^あ+|い+)*う", "ああいいいいう", 0, 14); + x2("う{0,}", "うううう", 0, 8); + x2("あ|(?i)c", "C", 0, 1); + x2("(?i)c|あ", "C", 0, 1); + x2("(?i:あ)|a", "a", 0, 1); + n("(?i:あ)|a", "A"); + x2("[あいう]?", "あいう", 0, 2); + x2("[あいう]*", "あいう", 0, 6); + x2("[^あいう]*", "あいう", 0, 0); + n("[^あいう]+", "あいう"); + x2("あ?\?", "あああ", 0, 0); + x2("いあ?\?い", "いあい", 0, 6); + x2("あ*?", "あああ", 0, 0); + x2("いあ*?", "いああ", 0, 2); + x2("いあ*?い", "いああい", 0, 8); + x2("あ+?", "あああ", 0, 2); + x2("いあ+?", "いああ", 0, 4); + x2("いあ+?い", "いああい", 0, 8); + x2("(?:天?)?\?", "天", 0, 0); + x2("(?:天?\?)?", "天", 0, 0); + x2("(?:夢?)+?", "夢夢夢", 0, 2); + x2("(?:風+)?\?", "風風風", 0, 0); + x2("(?:雪+)?\?霜", "雪雪雪霜", 0, 8); + x2("(?:あい)?{2}", "", 0, 0); + x2("(?:鬼車)?{2}", "鬼車鬼車鬼", 0, 8); + x2("(?:鬼車)*{0}", "鬼車鬼車鬼", 0, 0); + x2("(?:鬼車){3,}", "鬼車鬼車鬼車鬼車", 0, 16); + n("(?:鬼車){3,}", "鬼車鬼車"); + x2("(?:鬼車){2,4}", "鬼車鬼車鬼車", 0, 12); + x2("(?:鬼車){2,4}", "鬼車鬼車鬼車鬼車鬼車", 0, 16); + x2("(?:鬼車){2,4}?", "鬼車鬼車鬼車鬼車鬼車", 0, 8); + x2("(?:鬼車){,}", "鬼車{,}", 0, 7); + x2("(?:かきく)+?{2}", "かきくかきくかきく", 0, 12); + x3("(火)", "火", 0, 2, 1); + x3("(火水)", "火水", 0, 4, 1); + x2("((時間))", "時間", 0, 4); + x3("((風水))", "風水", 0, 4, 1); + x3("((昨日))", "昨日", 0, 4, 2); + x3("((((((((((((((((((((量子))))))))))))))))))))", "量子", 0, 4, 20); + x3("(あい)(うえ)", "あいうえ", 0, 4, 1); + x3("(あい)(うえ)", "あいうえ", 4, 8, 2); + x3("()(あ)いう(えおか)きくけこ", "あいうえおかきくけこ", 6, 12, 3); + x3("(()(あ)いう(えおか)きくけこ)", "あいうえおかきくけこ", 6, 12, 4); + x3(".*(フォ)ン・マ(ン()シュタ)イン", "フォン・マンシュタイン", 10, 18, 2); + x2("(^あ)", "あ", 0, 2); + x3("(あ)|(あ)", "いあ", 2, 4, 1); + x3("(^あ)|(あ)", "いあ", 2, 4, 2); + x3("(あ?)", "あああ", 0, 2, 1); + x3("(ま*)", "ままま", 0, 6, 1); + x3("(と*)", "", 0, 0, 1); + x3("(る+)", "るるるるるるる", 0, 14, 1); + x3("(ふ+|へ*)", "ふふふへへ", 0, 6, 1); + x3("(あ+|い?)", "いいいああ", 0, 2, 1); + x3("(あいう)?", "あいう", 0, 6, 1); + x3("(あいう)*", "あいう", 0, 6, 1); + x3("(あいう)+", "あいう", 0, 6, 1); + x3("(さしす|あいう)+", "あいう", 0, 6, 1); + x3("([なにぬ][かきく]|かきく)+", "かきく", 0, 6, 1); + x3("((?i:あいう))", "あいう", 0, 6, 1); + x3("((?m:あ.う))", "あ\nう", 0, 5, 1); + x3("((?=あん)あ)", "あんい", 0, 2, 1); + x3("あいう|(.あいえ)", "んあいえ", 0, 8, 1); + x3("あ*(.)", "ああああん", 8, 10, 1); + x3("あ*?(.)", "ああああん", 0, 2, 1); + x3("あ*?(ん)", "ああああん", 8, 10, 1); + x3("[いうえ]あ*(.)", "えああああん", 10, 12, 1); + x3("(\\Aいい)うう", "いいうう", 0, 4, 1); + n("(\\Aいい)うう", "んいいうう"); + x3("(^いい)うう", "いいうう", 0, 4, 1); + n("(^いい)うう", "んいいうう"); + x3("ろろ(るる$)", "ろろるる", 4, 8, 1); + n("ろろ(るる$)", "ろろるるる"); + x2("(無)\\1", "無無", 0, 4); + n("(無)\\1", "無武"); + x2("(空?)\\1", "空空", 0, 4); + x2("(空?\?)\\1", "空空", 0, 0); + x2("(空*)\\1", "空空空空空", 0, 8); + x3("(空*)\\1", "空空空空空", 0, 4, 1); + x2("あ(い*)\\1", "あいいいい", 0, 10); + x2("あ(い*)\\1", "あい", 0, 2); + x2("(あ*)(い*)\\1\\2", "あああいいあああいい", 0, 20); + x2("(あ*)(い*)\\2", "あああいいいい", 0, 14); + x3("(あ*)(い*)\\2", "あああいいいい", 6, 10, 2); + x2("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 16); + x3("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 6, 7); + x2("(は)(ひ)(ふ)\\2\\1\\3", "はひふひはふ", 0, 12); + x2("([き-け])\\1", "くく", 0, 4); + x2("(\\w\\d\\s)\\1", "あ5 あ5 ", 0, 8); + n("(\\w\\d\\s)\\1", "あ5 あ5"); + x2("(誰?|[あ-う]{3})\\1", "誰?誰?", 0, 8); + x2("...(誰?|[あ-う]{3})\\1", "あaあ誰?誰?", 0, 13); + x2("(誰?|[あ-う]{3})\\1", "ういうういう", 0, 12); + x2("(^こ)\\1", "ここ", 0, 4); + n("(^む)\\1", "めむむ"); + n("(あ$)\\1", "ああ"); + n("(あい\\Z)\\1", "あい"); + x2("(あ*\\Z)\\1", "あ", 2, 2); + x2(".(あ*\\Z)\\1", "いあ", 2, 4); + x3("(.(やいゆ)\\2)", "zやいゆやいゆ", 0, 13, 1); + x3("(.(..\\d.)\\2)", "あ12341234", 0, 10, 1); + x2("((?i:あvず))\\1", "あvずあvず", 0, 10); + x2("(?<愚か>変|\\(\\g<愚か>\\))", "((((((変))))))", 0, 14); + x2("\\A(?:\\g<阿_1>|\\g<云_2>|\\z終了 (?<阿_1>観|自\\g<云_2>自)(?<云_2>在|菩薩\\g<阿_1>菩薩))$", "菩薩自菩薩自在自菩薩自菩薩", 0, 26); + x2("[[ひふ]]", "ふ", 0, 2); + x2("[[いおう]か]", "か", 0, 2); + n("[[^あ]]", "あ"); + n("[^[あ]]", "あ"); + x2("[^[^あ]]", "あ", 0, 2); + x2("[[かきく]&&きく]", "く", 0, 2); + n("[[かきく]&&きく]", "か"); + n("[[かきく]&&きく]", "け"); + x2("[あ-ん&&い-を&&う-ゑ]", "ゑ", 0, 2); + n("[^あ-ん&&い-を&&う-ゑ]", "ゑ"); + x2("[[^あ&&あ]&&あ-ん]", "い", 0, 2); + n("[[^あ&&あ]&&あ-ん]", "あ"); + x2("[[^あ-ん&&いうえお]&&[^う-か]]", "き", 0, 2); + n("[[^あ-ん&&いうえお]&&[^う-か]]", "い"); + x2("[^[^あいう]&&[^うえお]]", "う", 0, 2); + x2("[^[^あいう]&&[^うえお]]", "え", 0, 2); + n("[^[^あいう]&&[^うえお]]", "か"); + x2("[あ-&&-あ]", "-", 0, 1); + x2("[^[^a-zあいう]&&[^bcdefgうえお]q-w]", "え", 0, 2); + x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "f", 0, 1); + x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "g", 0, 1); + n("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "2"); + x2("a<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 32); + x2(".<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 32); + fprintf(stdout, + "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", + nsucc, nfail, nerror, onig_version()); + +#ifndef POSIX_TEST + onig_region_free(region, 1); + onig_end(); +#endif + + return ((nfail == 0 && nerror == 0) ? 0 : -1); +} |