summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore30
-rw-r--r--CMakeLists.txt9
-rw-r--r--HISTORY64
-rw-r--r--README2
-rw-r--r--README.md12
-rw-r--r--configure.ac41
-rw-r--r--contributed/makefile3
-rw-r--r--doc/API2
-rw-r--r--doc/RE.ja2
-rw-r--r--index.html3
-rw-r--r--index_ja.html3
-rw-r--r--onig-config.in2
-rw-r--r--oniguruma.pc.in1
-rw-r--r--sample/.gitignore14
-rw-r--r--sample/CMakeLists.txt2
-rw-r--r--sample/crnl.c2
-rw-r--r--sample/listcap.c2
-rw-r--r--src/.gitignore15
-rw-r--r--src/Makefile.windows366
-rw-r--r--src/ascii.c2
-rw-r--r--src/big5.c4
-rw-r--r--src/config.h.cmake.in21
-rw-r--r--src/config.h.win32133
-rw-r--r--src/config.h.win64133
-rw-r--r--src/config.h.windows.in142
-rw-r--r--src/cp1251.c2
-rw-r--r--src/euc_jp.c4
-rw-r--r--src/euc_kr.c6
-rw-r--r--src/euc_tw.c2
-rw-r--r--src/gb18030.c2
-rwxr-xr-xsrc/gperf_fold_key_conv.py8
-rw-r--r--src/iso8859_1.c2
-rw-r--r--src/iso8859_10.c2
-rw-r--r--src/iso8859_11.c2
-rw-r--r--src/iso8859_13.c2
-rw-r--r--src/iso8859_14.c2
-rw-r--r--src/iso8859_15.c2
-rw-r--r--src/iso8859_16.c2
-rw-r--r--src/iso8859_2.c2
-rw-r--r--src/iso8859_3.c2
-rw-r--r--src/iso8859_4.c2
-rw-r--r--src/iso8859_5.c2
-rw-r--r--src/iso8859_6.c2
-rw-r--r--src/iso8859_7.c2
-rw-r--r--src/iso8859_8.c2
-rw-r--r--src/iso8859_9.c2
-rw-r--r--src/koi8.c2
-rw-r--r--src/koi8_r.c2
-rwxr-xr-xsrc/make_unicode_fold.sh7
-rwxr-xr-xsrc/make_unicode_fold_data.py1
-rwxr-xr-xsrc/make_unicode_property_data.py20
-rw-r--r--src/oniguruma.h4
-rw-r--r--src/regcomp.c1169
-rw-r--r--src/regenc.c4
-rw-r--r--src/regenc.h16
-rw-r--r--src/regerror.c38
-rw-r--r--src/regexec.c1032
-rw-r--r--src/regint.h127
-rw-r--r--src/regparse.c348
-rw-r--r--src/regparse.h302
-rw-r--r--src/regposerr.c6
-rw-r--r--src/regsyntax.c4
-rw-r--r--src/sjis.c11
-rw-r--r--src/unicode.c16
-rw-r--r--src/unicode_fold1_key.c2
-rw-r--r--src/unicode_fold2_key.c2
-rw-r--r--src/unicode_fold3_key.c2
-rw-r--r--src/unicode_fold_data.c1
-rw-r--r--src/unicode_unfold_key.c2
-rw-r--r--src/utf16_be.c2
-rw-r--r--src/utf16_le.c2
-rw-r--r--src/utf32_be.c2
-rw-r--r--src/utf32_le.c2
-rw-r--r--src/utf8.c6
-rw-r--r--test/.gitignore4
-rw-r--r--test/test_utf8.c33
-rw-r--r--test/testc.c6
-rw-r--r--windows/testc.c1734
78 files changed, 3024 insertions, 2949 deletions
diff --git a/.gitignore b/.gitignore
index 3ab6a6b..398488c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,3 +27,33 @@ Makefile.in
/build
/onig-*.tar.gz
m4/*.m4
+
+# src/
+/src/CaseFolding.txt
+/src/unicode_fold?_key.gperf
+/src/unicode_unfold_key.gperf
+/src/UNICODE_PROPERTIES
+/src/*.txt
+/src/mktable
+
+# test/
+/test/test_utf8
+/test/testc
+/test/testcu
+/test/testp
+
+# sample/
+/sample/crnl
+/sample/encode
+/sample/listcap
+/sample/names
+/sample/posix
+/sample/simple
+/sample/sql
+/sample/syntax
+/sample/user_property
+/sample/callout
+/sample/echo
+/sample/count
+/sample/bug_fix
+/sample/log*
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6aae87b..06068bc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.1)
-project(oniguruma VERSION 6.9.0)
+project(oniguruma VERSION 6.9.1)
set(PACKAGE onig)
set(PACKAGE_VERSION ${PROJECT_VERSION})
@@ -33,13 +33,7 @@ include(TestBigEndian)
check_function_exists(alloca HAVE_ALLOCA)
check_include_files(alloca.h HAVE_ALLOCA_H)
-set(HAVE_PROTOTYPES 1)
-check_include_files(stdarg.h HAVE_STDARG_PROTOTYPES)
check_include_files(stdint.h HAVE_STDINT_H)
-check_include_files(stdlib.h HAVE_STDLIB_H)
-check_include_files(strings.h HAVE_STRINGS_H)
-check_include_files(string.h HAVE_STRING_H)
-check_include_files(limits.h HAVE_LIMITS_H)
check_include_files(sys/times.h HAVE_SYS_TIMES_H)
check_include_files(sys/time.h HAVE_SYS_TIME_H)
check_include_files(sys/types.h HAVE_SYS_TYPES_H)
@@ -48,7 +42,6 @@ check_include_files(inttypes.h HAVE_INTTYPES_H)
check_type_size(int SIZEOF_INT)
check_type_size(long SIZEOF_LONG)
check_type_size(short SIZEOF_SHORT)
-check_include_files("stdlib.h;stdarg.h;string.h;float.h" STDC_HEADERS)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/config.h.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/config.h)
diff --git a/HISTORY b/HISTORY
index c1d5b0a..8395746 100644
--- a/HISTORY
+++ b/HISTORY
@@ -1,5 +1,25 @@
History
+2018/12/11: Version 6.9.1
+
+2018/10/08: use ENC_FLAG_SKIP_OFFSET_XXX values
+2018/10/06: UTF-8 supports code range from 0x0000 to 0x10FFFF
+ (https://tools.ietf.org/html/rfc3629)
+2018/10/05: speed improvement
+2018/10/03: use OPTIMIZE_STR_CASE_FOLD_FAST
+2018/10/01: convert CRLF line endings to LF
+2018/09/27: set SIZEOF_SIZE_T for windows platforms
+2018/09/22: use Sunday quick search algorithm instead of Boyer-Moor-Horspool
+2018/09/20: introduce threaded code into match_at()
+2018/09/17: remove HAVE_STRINGS_H
+2018/09/16: remove HAVE_PROTOTYPES and HAVE_STDARG_PROTOTYPES
+2018/09/14: add a command line option '-gc' for make_unicode_property_data.py.
+2018/09/08: remove AC_HEADER_STDC
+2018/09/06: remove AC_OUTPUT macro call
+2018/09/06: remove AC_FUNC_MEMCMP, AC_HEADER_TIME, AC_C_CONST, HAVE__SETJMP and
+ HAVE_STRING_H
+2018/09/05: remove HAVE_LIMITS_H, HAVE_FLOAT_H and HAVE_STDLIB_H
+
2018/09/03: Version 6.9.0
2018/08/24: add Unicode Emoji properties
@@ -394,12 +414,12 @@ History
2006/11/07: [dist] remove test.rb, testconv.rb and testconvu.rb.
2006/11/07: [bug] get_case_fold_codes_by_str() should handle 'Ss' and 'sS'
combination for ess-tsett.
-2006/11/07: [impl] apply_all_case_fold() doesn't need to return all
+2006/11/07: [impl] apply_all_case_fold() doesn't need to return all
case character combination for multi-character folding.
(ONIGENC_CASE_FOLD_MULTI_CHAR)
2006/11/07: [bug] (thanks Byte)
add { 0xa3, 0xb3 } to CaseFoldMap[] for KOI8-R.
-2006/11/06: [spec] change ONIG_OPTION_FIND_LONGEST to search all of
+2006/11/06: [spec] change ONIG_OPTION_FIND_LONGEST to search all of
the string range.
add USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE.
2006/11/02: [impl] re-implement expand_case_fold_string() for
@@ -667,7 +687,7 @@ History
2006/05/11: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
2006/05/11: [bug] (thanks Yuji Kaneda)
- dead-lock in onig_end().
+ dead-lock in onig_end().
2006/05/11: [dist] update index.html.
2006/05/08: Version 4.0.3
@@ -719,7 +739,7 @@ History
use GNU libtool/automake.
change configure.in and add Makefile.am, sample/Makefile.am.
add AUTHORS file.
-2006/01/24: [dist] test programs return exit code -1 when test fails.
+2006/01/24: [dist] test programs return exit code -1 when test fails.
2006/01/24: [bug] (thanks KIMURA Koichi)
invalid syntax definition in ONIG_SYNTAX_GREP.
ONIG_SYN_OP_BRACE_INTERVAL
@@ -737,7 +757,7 @@ History
2005/11/24: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
2005/11/21: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin].
2005/11/21: [bug] (thanks Allan Odgaard)
- utf-8 character comments in extended mode leads
+ utf-8 character comments in extended mode leads
invalid result.
ex. /(?x)(?<= # <any-utf-8 multibyte char>o\n~) /
fix onigenc_unicode_is_code_ctype() and
@@ -819,7 +839,7 @@ History
add new character encoding ONIG_ENCODING_GB18030.
2005/06/30: [bug] invalid ctype check for multibyte encodings.
("graph", "print")
- fix onigenc_mb2/4_is_code_ctype(),
+ fix onigenc_mb2/4_is_code_ctype(),
eucjp_is_code_ctype() and sjis_is_code_ctype().
2005/06/30: [bug] invalid conversion from code point to mbc in
onigenc_mb4_code_to_mbc().
@@ -894,7 +914,7 @@ History
remove oniggnu.h from make 19.
2005/03/01: [bug] (thanks matz) [ruby-dev:25778]
uninitialized member (OptEnv.backrefed_status)
- was used.
+ was used.
2005/02/19: Version 3.7.0
@@ -945,7 +965,7 @@ History
2005/01/19: [bug] (thanks Isao Sonobe)
callback function argument name_end of onig_foreach_name()
was wrong.
- name key of name table should be null terminated for
+ name key of name table should be null terminated for
character encoding length.
add strdup_with_null(), rename onig_strdup() to k_strdup().
use e->name_len in i_names().
@@ -1217,7 +1237,7 @@ History
RelAddrType, AbsAddrType and LengthType change
from short int to int type for the very long string match.
2004/06/14: [bug] (thanks Greg A. Woods)
- fix nmatch argument of regexec() is smaller than
+ fix nmatch argument of regexec() is smaller than
reg->num_mem + 1 case. (POSIX API)
2004/06/14: [spec] (thanks Greg A. Woods)
set pmatch to NULL if nmatch is 0 in regexec(). (POSIX API)
@@ -1397,7 +1417,7 @@ History
2004/02/23: [new] support ISO-8859-10. (ONIG_ENCODING_ISO_8859_10)
2004/02/20: [bug] fix iso_8859_4_mbc_is_case_ambig().
2004/02/20: [new] support ISO-8859-9. (ONIG_ENCODING_ISO_8859_9)
-2004/02/19: [bug] correct ctype tables for ISO-8859-3, ISO-8859-4,
+2004/02/19: [bug] correct ctype tables for ISO-8859-3, ISO-8859-4,
ISO-8859-6, ISO-8859-7, ISO-8859-8, KOI8_R.
2004/02/18: [bug] wrong replaced name OnigSyntaxGnuOnigex.
2004/02/17: [spec] check capture status for empty infinite loop.
@@ -1570,7 +1590,7 @@ History
2003/11/11: [spec] add syntax op. REG_SYN_OP_VARIABLE_META_CHARS.
2003/11/11: [spec] rename REG_SYN_OP_ESC_CAPITAL_Q_QUOTE to
REG_SYN_OP2_ESC_CAPITAL_Q_QUOTE,
- REG_SYN_OP_QMARK_GROUP_EFFECT to
+ REG_SYN_OP_QMARK_GROUP_EFFECT to
REG_SYN_OP2_QMARK_GROUP_EFFECT.
2003/11/06: [impl] define THREAD_PASS as rb_thread_schedule() in Ruby mode.
2003/11/05: [spec] add syntax behavior REG_SYN_WARN_REDUNDANT_NESTED_REPEAT.
@@ -1587,7 +1607,7 @@ History
2003/10/03: [bug] (thanks nobu) [ruby-dev:21472]
sub-anchor of optimization map info was wrong
in concat_left_node_opt_info().
- ex. /^(x?y)/ = "xy" fail.
+ ex. /^(x?y)/ = "xy" fail.
2003/09/17: Version 1.9.4
@@ -1650,7 +1670,7 @@ History
2003/09/01: [dist] update doc/RE and doc/RE.ja.
2003/08/26: [bug] (thanks Guy Decoux)
should not double free node at the case TK_CC_CC_OPEN
- in parse_char_class().
+ in parse_char_class().
2003/08/19: Version 1.9.3
@@ -1662,8 +1682,8 @@ History
REG_SYN_OP2_ATMARK_CAPTURE_HISTORY.
2003/08/18: [spec] (thanks nobu)
don't use IMPORT in oniguruma.h and onigposix.h.
-2003/08/18: [impl] (thanks nobu) change error output to stdout in testconv.rb.
-2003/08/18: [inst] (thanks nobu) lacked $(srcdir) in Makefile.in.
+2003/08/18: [impl] (thanks nobu) change error output to stdout in testconv.rb.
+2003/08/18: [inst] (thanks nobu) lacked $(srcdir) in Makefile.in.
2003/08/18: [bug] REG_MBLEN_TABLE[SJIS][0xFD-0xFF] should be 1.
2003/08/18: [bug] (thanks nobu) mbctab_sjis[0x80] should be 0.
2003/08/18: [bug] (thanks nobu)
@@ -1692,7 +1712,7 @@ History
2003/07/29: [new] add regex_get_encoding(), regex_get_options() and
regex_get_syntax().
2003/07/25: [spec] (thanks akr)
- change group(...) to shy-group(?:...) if named group is
+ change group(...) to shy-group(?:...) if named group is
used in the pattern.
add REG_SYN_CAPTURE_ONLY_NAMED_GROUP.
2003/07/24: [spec] rename REG_OPTION_CAPTURE_ONLY_NAMED_GROUP to
@@ -1720,7 +1740,7 @@ History
set option status to effect memory in optimize_node_left().
2003/07/07: [impl] add opcode OP_ANYCHAR_ML, OP_ANYCHAR_ML_STAR and
OP_ANYCHAR_ML_START_PEEK_NEXT.
-2003/07/07: [bug] (thanks nobu) REG_MBLEN_TABLE[SJIS][0x80] should be 1.
+2003/07/07: [bug] (thanks nobu) REG_MBLEN_TABLE[SJIS][0x80] should be 1.
2003/07/07: [spec] rename REG_SYN_OP_QUOTE to REG_SYN_OP_ESC_Q_QUOTE.
2003/07/04: Version 1.9.1
@@ -1783,7 +1803,7 @@ History
2003/06/12: [spec] add syntax behavior REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED.
2003/06/12: [spec] invalid POSIX bracket should be error. ex. [[:upper :]]
2003/06/11: [new] char-class in char-class (as Java(TM)).
-2003/06/11: [spec] change AND operator in char-class from &&[..] to &&.
+2003/06/11: [spec] change AND operator in char-class from &&[..] to &&.
2003/06/04: [spec] {n,m}+ should not be possessive operator.
ex. a{3}+ should be (?:a{3})+
2003/06/03: [bug] should compare strings with min-length in is_not_included().
@@ -1947,7 +1967,7 @@ History
2003/02/26: [impl] add -win option to testconv.rb.
2003/02/25: [spec] allow to assign same name to different group.
add OP_BACKREF_MULTI.
-2003/02/24: [impl] reduce redundant repeat of empty target.
+2003/02/24: [impl] reduce redundant repeat of empty target.
ex. /()*/ ==> /()?/, /()+/ ==> /()/, /(?:)+/ ==> //
2003/02/24: [impl] change condition in regex_is_allow_reverse_match().
2003/02/24: [impl] convert i(/../, ...) functions in testconv.rb.
@@ -2016,7 +2036,7 @@ History
2003/02/04: [bug] typo miss in regex_region_copy().
2003/02/04: [impl] change THREAD_PASS macro. (regint.h)
2003/02/04: [dist] add API document file doc/API.
-2003/02/04: [tune] if sub_anchor has ANCHOR_BEGIN_LINE then
+2003/02/04: [tune] if sub_anchor has ANCHOR_BEGIN_LINE then
set REG_OPTIMIZE_EXACT_BM in set_optimize_exact_info().
2003/02/04: [spec] reimplement regex_clone() and it is obsoleted.
2003/02/04: [bug] add REGERR_OVER_THREAD_PASS_LIMIT_COUNT
@@ -2136,7 +2156,7 @@ History
2002/04/01: [dist] add COPYING.
2002/03/30: [spec] warn redundant nested repeat operator
in Ruby verbose mode. ex. (?:a*)?
-2002/03/30: [spec] nested repeat operator error check should be
+2002/03/30: [spec] nested repeat operator error check should be
same with GNU regex. (thanks Guy Decoux)
2002/03/30: [new] add \x{hexadecimal-wide-char}. (thanks matz)
2002/03/27: [bug] MBCTYPE_XXX symbol values should be same with GNU regex.
@@ -2199,7 +2219,7 @@ History
ex. /(?:abc){10}/
2002/03/06: [new] add a symbol REG_TRANSTABLE_USE_DEFAULT in regex.h.
2002/03/06: [impl] rename RegDefaultCharCode to RegDefaultCharEncoding.
-2002/03/06: [bug] if pattern has NULL(\000) char, infinite loop happens
+2002/03/06: [bug] if pattern has NULL(\000) char, infinite loop happens
in ScanMakeNode(). (beware of strchr(). thanks Nobu)
2002/03/06: [bug] range argument of ForwardSearchRange() is wrong.
ex. /\A.a/, /\G.a/ mismatched with "aa". (thanks Nobu)
diff --git a/README b/README
index 7089df2..90b420d 100644
--- a/README
+++ b/README
@@ -94,7 +94,7 @@ Usage
See doc/API for Oniguruma API.
If you want to disable UChar type (== unsigned char) definition
- in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then
+ in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then
include oniguruma.h.
If you want to disable regex_t type definition in oniguruma.h,
diff --git a/README.md b/README.md
index cf62b33..5c576cb 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,6 @@
[![Build Status](https://travis-ci.org/kkos/oniguruma.svg?branch=master)](https://travis-ci.org/kkos/oniguruma)
+[![Code Quality: Cpp](https://img.shields.io/lgtm/grade/cpp/g/kkos/oniguruma.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/kkos/oniguruma/context:cpp)
+[![Total Alerts](https://img.shields.io/lgtm/alerts/g/kkos/oniguruma.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/kkos/oniguruma/alerts)
Oniguruma
=========
@@ -24,6 +26,12 @@ Supported character encodings:
* CP1251: contributed by Byte
+New feature of version 6.9.1
+--------------------------
+
+* Speed improvement (* especially UTF-8)
+
+
New feature of version 6.9.0
--------------------------
@@ -193,7 +201,7 @@ Usage
See doc/API for Oniguruma API.
If you want to disable UChar type (== unsigned char) definition
- in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then
+ in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then
include oniguruma.h.
If you want to disable regex_t type definition in oniguruma.h,
@@ -294,4 +302,4 @@ Source Files
|utf32_le.c |UTF-32LE encoding |
|unicode.c |common codes of Unicode encoding |
|unicode_fold_data.c|Unicode folding data |
-|windows/testc.c |Test program for Windowns (VC++) |
+|windows/testc.c |Test program for Windows (VC++) |
diff --git a/configure.ac b/configure.ac
index fef00cd..53bd8af 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,5 +1,5 @@
dnl Process this file with autoconf to produce a configure script.
-AC_INIT(onig, 6.9.0)
+AC_INIT(onig, 6.9.1)
AC_CONFIG_MACRO_DIR([m4])
@@ -10,7 +10,7 @@ AC_CONFIG_HEADERS([src/config.h])
dnl default value for STATISTICS
STATISTICS=""
AC_ARG_WITH(statistics,
- [ --with-statistics take matching time statistical data],
+ [ --with-statistics take matching time statistical data],
[ STATISTICS=-DONIG_DEBUG_STATISTICS ])
AC_SUBST(STATISTICS)
@@ -49,45 +49,16 @@ AC_PROG_MAKE_SET
dnl Checks for libraries.
dnl Checks for header files.
-AC_HEADER_STDC
-AC_CHECK_HEADERS(stdlib.h string.h strings.h limits.h sys/time.h unistd.h sys/times.h)
+AC_CHECK_HEADERS(sys/time.h unistd.h sys/times.h)
dnl Checks for typedefs, structures, and compiler characteristics.
AC_CHECK_SIZEOF(int, 4)
AC_CHECK_SIZEOF(short, 2)
AC_CHECK_SIZEOF(long, 4)
-AC_C_CONST
-AC_HEADER_TIME
dnl Checks for library functions.
AC_FUNC_ALLOCA
-AC_FUNC_MEMCMP
-
-AC_CACHE_CHECK(for prototypes, _cv_have_prototypes,
- [AC_COMPILE_IFELSE(
- [AC_LANG_PROGRAM([[int foo(int x) { return 0; }]],
- [[return foo(10);]])],
- [_cv_have_prototypes=yes],
- [_cv_have_prototypes=no])])
-if test "$_cv_have_prototypes" = yes; then
- AC_DEFINE(HAVE_PROTOTYPES,1,[Define if compilerr supports prototypes])
-fi
-
-AC_CACHE_CHECK(for variable length prototypes and stdarg.h, _cv_stdarg,
- [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
-#include <stdarg.h>
-int foo(int x, ...) {
- va_list va;
- va_start(va, x);
- va_arg(va, int);
- va_arg(va, char *);
- va_arg(va, double);
- return 0;
-}
-]], [[return foo(10, "", 3.14);]])],[_cv_stdarg=yes],[_cv_stdarg=no])])
-if test "$_cv_stdarg" = yes; then
- AC_DEFINE(HAVE_STDARG_PROTOTYPES,1,[Define if compiler supports stdarg prototypes])
-fi
-
-AC_OUTPUT([Makefile src/Makefile test/Makefile sample/Makefile onig-config], [chmod +x onig-config])
+AC_CONFIG_FILES([Makefile src/Makefile test/Makefile sample/Makefile onig-config])
+AC_CONFIG_COMMANDS([default],[chmod +x onig-config],[])
+AC_OUTPUT
diff --git a/contributed/makefile b/contributed/makefile
index c50ab36..f44a3c0 100644
--- a/contributed/makefile
+++ b/contributed/makefile
@@ -1,4 +1,3 @@
-
ONIG_LIB=../src/.libs/libonig.a
LIBS=$(ONIG_LIB) /usr/local/lib/libLLVMFuzzerMain.a
@@ -18,5 +17,5 @@ $(ONIG_LIB):
-clean:
+clean:
rm -f $(TARGETS)
diff --git a/doc/API b/doc/API
index 08f9227..24b531a 100644
--- a/doc/API
+++ b/doc/API
@@ -182,7 +182,7 @@ Oniguruma API Version 6.8.0 2018/03/13
ci->target_enc: target string character encoding.
ci->syntax: address of pattern syntax definition.
ci->option: compile time option.
- ci->case_fold_flag: character matching case fold bit flag for
+ ci->case_fold_flag: character matching case fold bit flag for
ONIG_OPTION_IGNORECASE mode.
ONIGENC_CASE_FOLD_MIN: minimum
diff --git a/doc/RE.ja b/doc/RE.ja
index a83bfb4..b35a51e 100644
--- a/doc/RE.ja
+++ b/doc/RE.ja
@@ -54,7 +54,7 @@
\t, \n, \v, \f, \r, \x20
Unicodeã®å ´åˆ:
- U+0009, U+000A, U+000B, U+000C, U+000D, U+0085(NEL),
+ U+0009, U+000A, U+000B, U+000C, U+000D, U+0085(NEL),
General_Category -- Line_Separator
-- Paragraph_Separator
-- Space_Separator
diff --git a/index.html b/index.html
index f55084e..5ad8231 100644
--- a/index.html
+++ b/index.html
@@ -8,7 +8,7 @@
<h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>)
<p>
-(c) K.Kosako, updated at: 2018/08/31
+(c) K.Kosako, updated at: 2018/12/06
</p>
<dl>
@@ -16,6 +16,7 @@
<dt><b>What's new</b>
</font>
<ul>
+<li>2018/12/11: Version 6.9.1 released.</li>
<li>2018/09/03: Version 6.9.0 released.</li>
<li>2018/04/17: Version 6.8.2 released.</li>
<li>2018/03/19: Version 6.8.1 released.</li>
diff --git a/index_ja.html b/index_ja.html
index 06c1753..0ada788 100644
--- a/index_ja.html
+++ b/index_ja.html
@@ -8,7 +8,7 @@
<h1>鬼車</h1>
<p>
-(c) K.Kosako, 最終更新: 2018/09/03
+(c) K.Kosako, 最終更新: 2018/12/06
</p>
<dl>
@@ -16,6 +16,7 @@
<dt><b>更新情報</b>
</font>
<ul>
+<li>2018/12/11: Version 6.9.1 リリース</li>
<li>2018/09/03: Version 6.9.0 リリース</li>
<li>2018/04/17: Version 6.8.2 リリース</li>
<li>2018/03/19: Version 6.8.1 リリース</li>
diff --git a/onig-config.in b/onig-config.in
index 57fe3ad..788d6ba 100644
--- a/onig-config.in
+++ b/onig-config.in
@@ -35,7 +35,7 @@ while test $# -gt 0; do
case "$1" in
-*=*) val=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'`
;;
- *) val=
+ *) val=
;;
esac
diff --git a/oniguruma.pc.in b/oniguruma.pc.in
index c78155d..ab27016 100644
--- a/oniguruma.pc.in
+++ b/oniguruma.pc.in
@@ -11,4 +11,3 @@ Version: @PACKAGE_VERSION@
Requires:
Libs: -L${libdir} -lonig
Cflags: -I${includedir}
-
diff --git a/sample/.gitignore b/sample/.gitignore
deleted file mode 100644
index b6b591b..0000000
--- a/sample/.gitignore
+++ /dev/null
@@ -1,14 +0,0 @@
-/crnl
-/encode
-/listcap
-/names
-/posix
-/simple
-/sql
-/syntax
-/user_property
-/callout
-/echo
-/count
-/bug_fix
-/log*
diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt
index 58d6a99..c0bd057 100644
--- a/sample/CMakeLists.txt
+++ b/sample/CMakeLists.txt
@@ -1,4 +1,3 @@
-
cmake_minimum_required(VERSION 2.8)
project(oniguruma_sample C)
@@ -34,4 +33,3 @@ target_link_libraries(sql onig)
add_executable(syntax syntax.c)
target_link_libraries(syntax onig)
-
diff --git a/sample/crnl.c b/sample/crnl.c
index 3f511e9..ee6a516 100644
--- a/sample/crnl.c
+++ b/sample/crnl.c
@@ -2,7 +2,7 @@
* crnl.c 2007/05/30 K.Kosako
*
* !!! You should enable USE_CRNL_AS_LINE_TERMINATOR. !!!
- *
+ *
* USE_CRNL_AS_LINE_TERMINATOR config test program.
*/
#include <stdio.h>
diff --git a/sample/listcap.c b/sample/listcap.c
index 6f9880c..8598d6a 100644
--- a/sample/listcap.c
+++ b/sample/listcap.c
@@ -98,7 +98,7 @@ extern int main(int argc, char* argv[])
/* enable capture hostory */
onig_copy_syntax(&syn, ONIG_SYNTAX_DEFAULT);
- onig_set_syntax_op2(&syn,
+ onig_set_syntax_op2(&syn,
onig_get_syntax_op2(&syn) | ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY);
r = ex(str1, pattern1, &syn);
diff --git a/src/.gitignore b/src/.gitignore
deleted file mode 100644
index 50ae793..0000000
--- a/src/.gitignore
+++ /dev/null
@@ -1,15 +0,0 @@
-Makefile
-config.h
-CaseFolding.txt
-unicode_fold?_key.gperf
-unicode_unfold_key.gperf
-UNICODE_PROPERTIES
-*.o
-*.so
-*.lo
-*.la
-*~
-*.txt
-.libs/
-.deps/
-/mktable
diff --git a/src/Makefile.windows b/src/Makefile.windows
index e98dc2e..762cf07 100644
--- a/src/Makefile.windows
+++ b/src/Makefile.windows
@@ -1,183 +1,183 @@
-# Oniguruma Makefile for Windows
-
-product_name = oniguruma
-
-CPPFLAGS =
-CFLAGS = -O2 -nologo /W3
-LDFLAGS =
-LOADLIBES =
-ARLIB = lib
-ARLIB_FLAGS = -nologo
-ARDLL = cl
-ARDLL_FLAGS = -nologo -LD $(LINKFLAGS) -dll
-LINKFLAGS = -link -incremental:no -pdb:none
-
-INSTALL = install -c
-CP = copy
-CC = cl
-DEFS = -DHAVE_CONFIG_H
-
-subdirs =
-
-libbase = onig
-libname = $(libbase)_s.lib
-dllname = $(libbase).dll
-dlllib = $(libbase).lib
-
-!IF defined(ENABLE_POSIX_API) && "$(ENABLE_POSIX_API)" == "NO"
-posixobjs =
-!ELSE
-posixobjs = $(BUILD_DIR)/regposix.obj $(BUILD_DIR)/regposerr.obj
-!ENDIF
-
-onigheaders = $(ONIG_DIR)/oniguruma.h $(ONIG_DIR)/regint.h $(ONIG_DIR)/regparse.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/st.h
-posixheaders = $(ONIG_DIR)/onigposix.h
-headers = $(posixheaders) $(onigheaders)
-
-onigobjs = $(BUILD_DIR)/reggnu.obj $(BUILD_DIR)/regerror.obj $(BUILD_DIR)/regparse.obj $(BUILD_DIR)/regext.obj $(BUILD_DIR)/regcomp.obj \
- $(BUILD_DIR)/regexec.obj $(BUILD_DIR)/regenc.obj $(BUILD_DIR)/regsyntax.obj $(BUILD_DIR)/regtrav.obj \
- $(BUILD_DIR)/regversion.obj $(BUILD_DIR)/st.obj $(BUILD_DIR)/onig_init.obj
-libobjs = $(onigobjs) $(posixobjs)
-
-jp_objs = $(BUILD_DIR)/euc_jp.obj $(BUILD_DIR)/sjis.obj
-iso8859_objs = $(BUILD_DIR)/iso8859_1.obj $(BUILD_DIR)/iso8859_2.obj \
- $(BUILD_DIR)/iso8859_3.obj $(BUILD_DIR)/iso8859_4.obj \
- $(BUILD_DIR)/iso8859_5.obj $(BUILD_DIR)/iso8859_6.obj \
- $(BUILD_DIR)/iso8859_7.obj $(BUILD_DIR)/iso8859_8.obj \
- $(BUILD_DIR)/iso8859_9.obj $(BUILD_DIR)/iso8859_10.obj \
- $(BUILD_DIR)/iso8859_11.obj $(BUILD_DIR)/iso8859_13.obj \
- $(BUILD_DIR)/iso8859_14.obj $(BUILD_DIR)/iso8859_15.obj \
- $(BUILD_DIR)/iso8859_16.obj
-
-encobjs = $(BUILD_DIR)/ascii.obj $(BUILD_DIR)/utf8.obj \
- $(BUILD_DIR)/unicode.obj \
- $(BUILD_DIR)/utf16_be.obj $(BUILD_DIR)/utf16_le.obj \
- $(BUILD_DIR)/utf32_be.obj $(BUILD_DIR)/utf32_le.obj \
- $(jp_objs) $(iso8859_objs) \
- $(BUILD_DIR)/euc_tw.obj $(BUILD_DIR)/euc_kr.obj $(BUILD_DIR)/big5.obj \
- $(BUILD_DIR)/gb18030.obj \
- $(BUILD_DIR)/koi8_r.obj \
- $(BUILD_DIR)/cp1251.obj \
- $(BUILD_DIR)/euc_jp_prop.obj $(BUILD_DIR)/sjis_prop.obj \
- $(BUILD_DIR)/unicode_unfold_key.obj $(BUILD_DIR)/unicode_fold1_key.obj \
- $(BUILD_DIR)/unicode_fold2_key.obj $(BUILD_DIR)/unicode_fold3_key.obj # $(BUILD_DIR)/koi8.obj
-
-onigsources = $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regparse.c $(ONIG_DIR)/regext.c $(ONIG_DIR)/regcomp.c $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regenc.c \
- $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regtrav.c $(ONIG_DIR)/regversion.c $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/st.c
-posixsources = $(ONIG_DIR)/regposix.c $(ONIG_DIR)/regposerr.c
-libsources = $(posixsources) $(onigsources)
-
-patchfiles = re.c.168.patch re.c.181.patch
-distfiles = README COPYING HISTORY \
- Makefile.in configure.in config.h.in configure \
- $(headers) $(libsources) $(patchfiles) \
- test.rb testconv.rb
-testc = testc
-testp = testp
-
-makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)'
-
-.SUFFIXES:
-.SUFFIXES: .obj .c .h .ps .dvi .info .texinfo
-
-{$(ONIG_DIR)}.c{$(BUILD_DIR)}.obj:
- $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /I.. /Fo$@ /c $<
-
-# targets
-default: all
-
-setup:
- $(CP) ..\win32\config.h config.h
- $(CP) ..\win32\testc.c testc.c
-
-
-all: $(libname) $(dllname)
-
-$(libname): $(libobjs) $(encobjs)
- $(ARLIB) $(ARLIB_FLAGS) -out:$@ $(libobjs) $(encobjs)
-
-$(dllname): $(libobjs) $(encobjs)
- $(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS)
-
-$(BUILD_DIR)/regparse.obj: $(ONIG_DIR)/regparse.c $(onigheaders) $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regext.obj: $(ONIG_DIR)/regext.c $(onigheaders) $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regtrav.obj: $(ONIG_DIR)/regtrav.c $(onigheaders) $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regcomp.obj: $(ONIG_DIR)/regcomp.c $(onigheaders) $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regexec.obj: $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/reggnu.obj: $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/oniggnu.h
-$(BUILD_DIR)/regerror.obj: $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regenc.obj: $(ONIG_DIR)/regenc.c $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regsyntax.obj: $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regversion.obj: $(ONIG_DIR)/regversion.c $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regposix.obj: $(ONIG_DIR)/regposix.c $(posixheaders) $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/regposerr.obj: $(ONIG_DIR)/regposerr.c $(posixheaders) $(BUILD_DIR)/config.h
-$(BUILD_DIR)/st.obj: $(ONIG_DIR)/st.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/st.h
-$(BUILD_DIR)/onig_init.obj: $(ONIG_DIR)/onig_init.c $(ONIG_DIR)/oniguruma.h
-
-$(BUILD_DIR)/ascii.obj: $(ONIG_DIR)/ascii.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/unicode.obj: $(ONIG_DIR)/unicode.c $(ONIG_DIR)/unicode_fold_data.c $(ONIG_DIR)/unicode_property_data.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/utf8.obj: $(ONIG_DIR)/utf8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/utf16_be.obj: $(ONIG_DIR)/utf16_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/utf16_le.obj: $(ONIG_DIR)/utf16_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/utf32_be.obj: $(ONIG_DIR)/utf32_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/utf32_le.obj: $(ONIG_DIR)/utf32_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/euc_jp.obj: $(ONIG_DIR)/euc_jp.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/euc_tw.obj: $(ONIG_DIR)/euc_tw.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/euc_kr.obj: $(ONIG_DIR)/euc_kr.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/sjis.obj: $(ONIG_DIR)/sjis.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_1.obj: $(ONIG_DIR)/iso8859_1.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_2.obj: $(ONIG_DIR)/iso8859_2.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_3.obj: $(ONIG_DIR)/iso8859_3.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_4.obj: $(ONIG_DIR)/iso8859_4.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_5.obj: $(ONIG_DIR)/iso8859_5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_6.obj: $(ONIG_DIR)/iso8859_6.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_7.obj: $(ONIG_DIR)/iso8859_7.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_8.obj: $(ONIG_DIR)/iso8859_8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_9.obj: $(ONIG_DIR)/iso8859_9.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_10.obj: $(ONIG_DIR)/iso8859_10.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_11.obj: $(ONIG_DIR)/iso8859_11.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_13.obj: $(ONIG_DIR)/iso8859_13.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_14.obj: $(ONIG_DIR)/iso8859_14.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_15.obj: $(ONIG_DIR)/iso8859_15.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/iso8859_16.obj: $(ONIG_DIR)/iso8859_16.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/koi8.obj: $(ONIG_DIR)/koi8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/koi8_r.obj: $(ONIG_DIR)/koi8_r.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/cp1251.obj: $(ONIG_DIR)/cp1251.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/big5.obj: $(ONIG_DIR)/big5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/gb18030.obj: $(ONIG_DIR)/gb18030.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/euc_jp_prop.obj: $(ONIG_DIR)/euc_jp_prop.c $(ONIG_DIR)/regenc.h
-$(BUILD_DIR)/sjis_prop.obj: $(ONIG_DIR)/sjis_prop.c $(ONIG_DIR)/regenc.h
-$(BUILD_DIR)/unicode_unfold_key.obj: $(ONIG_DIR)/unicode_unfold_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/unicode_fold1_key.obj: $(ONIG_DIR)/unicode_fold1_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/unicode_fold2_key.obj: $(ONIG_DIR)/unicode_fold2_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-$(BUILD_DIR)/unicode_fold3_key.obj: $(ONIG_DIR)/unicode_fold3_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-
-# C library test
-ctest: $(testc)
- .\$(testc)
-
-# POSIX C library test
-ptest: $(testp)
- .\$(testp)
-
-$(testc): $(testc).c $(libname)
- $(CC) -nologo /Fe:$(testc) -DONIG_EXTERN=extern $(testc).c $(libname)
-
-$(testp): $(testc).c $(dlllib)
- $(CC) -nologo -DPOSIX_TEST /Fe:$(testp) $(testc).c $(dlllib)
-
-$(testc)u: $(testc)u.c $(libname)
- $(CC) -nologo /Fe:$(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname)
-
-clean:
- del $(BUILD_DIR)\*.obj $(BUILD_DIR)\*.lib $(BUILD_DIR)\*.exp $(BUILD_DIR)\*.dll $(BUILD_DIR)\$(testp).exe $(BUILD_DIR)\$(testc).exe $(BUILD_DIR)\$(testc).obj
-
-
-samples: all
- $(CC) $(CFLAGS) -I. /Fe:simple $(ONIG_DIR)\sample\simple.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:posix $(ONIG_DIR)\sample\posix.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:names $(ONIG_DIR)\sample\names.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:listcap $(ONIG_DIR)\sample\listcap.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:sql $(ONIG_DIR)\sample\sql.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:encode $(ONIG_DIR)\sample\encode.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:syntax $(ONIG_DIR)\sample\syntax.c $(dlllib) \ No newline at end of file
+# Oniguruma Makefile for Windows
+
+product_name = oniguruma
+
+CPPFLAGS =
+CFLAGS = -O2 -nologo /W3
+LDFLAGS =
+LOADLIBES =
+ARLIB = lib
+ARLIB_FLAGS = -nologo
+ARDLL = cl
+ARDLL_FLAGS = -nologo -LD $(LINKFLAGS) -dll
+LINKFLAGS = -link -incremental:no -pdb:none
+
+INSTALL = install -c
+CP = copy
+CC = cl
+DEFS = -DHAVE_CONFIG_H
+
+subdirs =
+
+libbase = onig
+libname = $(libbase)_s.lib
+dllname = $(libbase).dll
+dlllib = $(libbase).lib
+
+!IF defined(ENABLE_POSIX_API) && "$(ENABLE_POSIX_API)" == "NO"
+posixobjs =
+!ELSE
+posixobjs = $(BUILD_DIR)/regposix.obj $(BUILD_DIR)/regposerr.obj
+!ENDIF
+
+onigheaders = $(ONIG_DIR)/oniguruma.h $(ONIG_DIR)/regint.h $(ONIG_DIR)/regparse.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/st.h
+posixheaders = $(ONIG_DIR)/onigposix.h
+headers = $(posixheaders) $(onigheaders)
+
+onigobjs = $(BUILD_DIR)/reggnu.obj $(BUILD_DIR)/regerror.obj $(BUILD_DIR)/regparse.obj $(BUILD_DIR)/regext.obj $(BUILD_DIR)/regcomp.obj \
+ $(BUILD_DIR)/regexec.obj $(BUILD_DIR)/regenc.obj $(BUILD_DIR)/regsyntax.obj $(BUILD_DIR)/regtrav.obj \
+ $(BUILD_DIR)/regversion.obj $(BUILD_DIR)/st.obj $(BUILD_DIR)/onig_init.obj
+libobjs = $(onigobjs) $(posixobjs)
+
+jp_objs = $(BUILD_DIR)/euc_jp.obj $(BUILD_DIR)/sjis.obj
+iso8859_objs = $(BUILD_DIR)/iso8859_1.obj $(BUILD_DIR)/iso8859_2.obj \
+ $(BUILD_DIR)/iso8859_3.obj $(BUILD_DIR)/iso8859_4.obj \
+ $(BUILD_DIR)/iso8859_5.obj $(BUILD_DIR)/iso8859_6.obj \
+ $(BUILD_DIR)/iso8859_7.obj $(BUILD_DIR)/iso8859_8.obj \
+ $(BUILD_DIR)/iso8859_9.obj $(BUILD_DIR)/iso8859_10.obj \
+ $(BUILD_DIR)/iso8859_11.obj $(BUILD_DIR)/iso8859_13.obj \
+ $(BUILD_DIR)/iso8859_14.obj $(BUILD_DIR)/iso8859_15.obj \
+ $(BUILD_DIR)/iso8859_16.obj
+
+encobjs = $(BUILD_DIR)/ascii.obj $(BUILD_DIR)/utf8.obj \
+ $(BUILD_DIR)/unicode.obj \
+ $(BUILD_DIR)/utf16_be.obj $(BUILD_DIR)/utf16_le.obj \
+ $(BUILD_DIR)/utf32_be.obj $(BUILD_DIR)/utf32_le.obj \
+ $(jp_objs) $(iso8859_objs) \
+ $(BUILD_DIR)/euc_tw.obj $(BUILD_DIR)/euc_kr.obj $(BUILD_DIR)/big5.obj \
+ $(BUILD_DIR)/gb18030.obj \
+ $(BUILD_DIR)/koi8_r.obj \
+ $(BUILD_DIR)/cp1251.obj \
+ $(BUILD_DIR)/euc_jp_prop.obj $(BUILD_DIR)/sjis_prop.obj \
+ $(BUILD_DIR)/unicode_unfold_key.obj $(BUILD_DIR)/unicode_fold1_key.obj \
+ $(BUILD_DIR)/unicode_fold2_key.obj $(BUILD_DIR)/unicode_fold3_key.obj # $(BUILD_DIR)/koi8.obj
+
+onigsources = $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regparse.c $(ONIG_DIR)/regext.c $(ONIG_DIR)/regcomp.c $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regenc.c \
+ $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regtrav.c $(ONIG_DIR)/regversion.c $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/st.c
+posixsources = $(ONIG_DIR)/regposix.c $(ONIG_DIR)/regposerr.c
+libsources = $(posixsources) $(onigsources)
+
+patchfiles = re.c.168.patch re.c.181.patch
+distfiles = README COPYING HISTORY \
+ Makefile.in configure.in config.h.in configure \
+ $(headers) $(libsources) $(patchfiles) \
+ test.rb testconv.rb
+testc = testc
+testp = testp
+
+makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)'
+
+.SUFFIXES:
+.SUFFIXES: .obj .c .h .ps .dvi .info .texinfo
+
+{$(ONIG_DIR)}.c{$(BUILD_DIR)}.obj:
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /I.. /Fo$@ /c $<
+
+# targets
+default: all
+
+setup:
+ $(CP) ..\win32\config.h config.h
+ $(CP) ..\win32\testc.c testc.c
+
+
+all: $(libname) $(dllname)
+
+$(libname): $(libobjs) $(encobjs)
+ $(ARLIB) $(ARLIB_FLAGS) -out:$@ $(libobjs) $(encobjs)
+
+$(dllname): $(libobjs) $(encobjs)
+ $(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS)
+
+$(BUILD_DIR)/regparse.obj: $(ONIG_DIR)/regparse.c $(onigheaders) $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regext.obj: $(ONIG_DIR)/regext.c $(onigheaders) $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regtrav.obj: $(ONIG_DIR)/regtrav.c $(onigheaders) $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regcomp.obj: $(ONIG_DIR)/regcomp.c $(onigheaders) $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regexec.obj: $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/reggnu.obj: $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/oniggnu.h
+$(BUILD_DIR)/regerror.obj: $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regenc.obj: $(ONIG_DIR)/regenc.c $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regsyntax.obj: $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regversion.obj: $(ONIG_DIR)/regversion.c $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regposix.obj: $(ONIG_DIR)/regposix.c $(posixheaders) $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regposerr.obj: $(ONIG_DIR)/regposerr.c $(posixheaders) $(BUILD_DIR)/config.h
+$(BUILD_DIR)/st.obj: $(ONIG_DIR)/st.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/st.h
+$(BUILD_DIR)/onig_init.obj: $(ONIG_DIR)/onig_init.c $(ONIG_DIR)/oniguruma.h
+
+$(BUILD_DIR)/ascii.obj: $(ONIG_DIR)/ascii.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/unicode.obj: $(ONIG_DIR)/unicode.c $(ONIG_DIR)/unicode_fold_data.c $(ONIG_DIR)/unicode_property_data.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/utf8.obj: $(ONIG_DIR)/utf8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/utf16_be.obj: $(ONIG_DIR)/utf16_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/utf16_le.obj: $(ONIG_DIR)/utf16_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/utf32_be.obj: $(ONIG_DIR)/utf32_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/utf32_le.obj: $(ONIG_DIR)/utf32_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/euc_jp.obj: $(ONIG_DIR)/euc_jp.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/euc_tw.obj: $(ONIG_DIR)/euc_tw.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/euc_kr.obj: $(ONIG_DIR)/euc_kr.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/sjis.obj: $(ONIG_DIR)/sjis.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_1.obj: $(ONIG_DIR)/iso8859_1.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_2.obj: $(ONIG_DIR)/iso8859_2.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_3.obj: $(ONIG_DIR)/iso8859_3.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_4.obj: $(ONIG_DIR)/iso8859_4.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_5.obj: $(ONIG_DIR)/iso8859_5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_6.obj: $(ONIG_DIR)/iso8859_6.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_7.obj: $(ONIG_DIR)/iso8859_7.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_8.obj: $(ONIG_DIR)/iso8859_8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_9.obj: $(ONIG_DIR)/iso8859_9.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_10.obj: $(ONIG_DIR)/iso8859_10.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_11.obj: $(ONIG_DIR)/iso8859_11.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_13.obj: $(ONIG_DIR)/iso8859_13.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_14.obj: $(ONIG_DIR)/iso8859_14.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_15.obj: $(ONIG_DIR)/iso8859_15.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_16.obj: $(ONIG_DIR)/iso8859_16.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/koi8.obj: $(ONIG_DIR)/koi8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/koi8_r.obj: $(ONIG_DIR)/koi8_r.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/cp1251.obj: $(ONIG_DIR)/cp1251.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/big5.obj: $(ONIG_DIR)/big5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/gb18030.obj: $(ONIG_DIR)/gb18030.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/euc_jp_prop.obj: $(ONIG_DIR)/euc_jp_prop.c $(ONIG_DIR)/regenc.h
+$(BUILD_DIR)/sjis_prop.obj: $(ONIG_DIR)/sjis_prop.c $(ONIG_DIR)/regenc.h
+$(BUILD_DIR)/unicode_unfold_key.obj: $(ONIG_DIR)/unicode_unfold_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/unicode_fold1_key.obj: $(ONIG_DIR)/unicode_fold1_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/unicode_fold2_key.obj: $(ONIG_DIR)/unicode_fold2_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/unicode_fold3_key.obj: $(ONIG_DIR)/unicode_fold3_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+
+# C library test
+ctest: $(testc)
+ .\$(testc)
+
+# POSIX C library test
+ptest: $(testp)
+ .\$(testp)
+
+$(testc): $(testc).c $(libname)
+ $(CC) -nologo /Fe:$(testc) -DONIG_EXTERN=extern $(testc).c $(libname)
+
+$(testp): $(testc).c $(dlllib)
+ $(CC) -nologo -DPOSIX_TEST /Fe:$(testp) $(testc).c $(dlllib)
+
+$(testc)u: $(testc)u.c $(libname)
+ $(CC) -nologo /Fe:$(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname)
+
+clean:
+ del $(BUILD_DIR)\*.obj $(BUILD_DIR)\*.lib $(BUILD_DIR)\*.exp $(BUILD_DIR)\*.dll $(BUILD_DIR)\$(testp).exe $(BUILD_DIR)\$(testc).exe $(BUILD_DIR)\$(testc).obj
+
+
+samples: all
+ $(CC) $(CFLAGS) -I. /Fe:simple $(ONIG_DIR)\sample\simple.c $(dlllib)
+ $(CC) $(CFLAGS) -I. /Fe:posix $(ONIG_DIR)\sample\posix.c $(dlllib)
+ $(CC) $(CFLAGS) -I. /Fe:names $(ONIG_DIR)\sample\names.c $(dlllib)
+ $(CC) $(CFLAGS) -I. /Fe:listcap $(ONIG_DIR)\sample\listcap.c $(dlllib)
+ $(CC) $(CFLAGS) -I. /Fe:sql $(ONIG_DIR)\sample\sql.c $(dlllib)
+ $(CC) $(CFLAGS) -I. /Fe:encode $(ONIG_DIR)\sample\encode.c $(dlllib)
+ $(CC) $(CFLAGS) -I. /Fe:syntax $(ONIG_DIR)\sample\syntax.c $(dlllib)
diff --git a/src/ascii.c b/src/ascii.c
index eb38944..e83e4d6 100644
--- a/src/ascii.c
+++ b/src/ascii.c
@@ -113,6 +113,6 @@ OnigEncodingType OnigEncodingASCII = {
init,
0, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/big5.c b/src/big5.c
index dbc750d..f931ade 100644
--- a/src/big5.c
+++ b/src/big5.c
@@ -151,7 +151,7 @@ big5_left_adjust_char_head(const UChar* start, const UChar* s)
p++;
break;
}
- }
+ }
}
len = enclen(ONIG_ENCODING_BIG5, p);
if (p + len > s) return (UChar* )p;
@@ -187,6 +187,6 @@ OnigEncodingType OnigEncodingBIG5 = {
NULL, /* init */
NULL, /* is_initialized */
is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/config.h.cmake.in b/src/config.h.cmake.in
index b59cc8d..f49177f 100644
--- a/src/config.h.cmake.in
+++ b/src/config.h.cmake.in
@@ -13,27 +13,9 @@
*/
#cmakedefine HAVE_ALLOCA_H ${HAVE_ALLOCA_H}
-/* Define if compilerr supports prototypes */
-#cmakedefine HAVE_PROTOTYPES ${HAVE_PROTOTYPES}
-
-/* Define if compiler supports stdarg prototypes */
-#cmakedefine HAVE_STDARG_PROTOTYPES ${HAVE_STDARG_PROTOTYPES}
-
/* Define to 1 if you have the <stdint.h> header file. */
#cmakedefine HAVE_STDINT_H ${HAVE_STDINT_H}
-/* Define to 1 if you have the <stdlib.h> header file. */
-#cmakedefine HAVE_STDLIB_H ${HAVE_STDLIB_H}
-
-/* Define to 1 if you have the <strings.h> header file. */
-#cmakedefine HAVE_STRINGS_H ${HAVE_STRINGS_H}
-
-/* Define to 1 if you have the <string.h> header file. */
-#cmakedefine HAVE_STRING_H ${HAVE_STRING_H}
-
-/* Define to 1 if you have the <limits.h> header file. */
-#cmakedefine HAVE_LIMITS_H ${HAVE_LIMITS_H}
-
/* Define to 1 if you have the <sys/times.h> header file. */
#cmakedefine HAVE_SYS_TIMES_H ${HAVE_SYS_TIMES_H}
@@ -64,9 +46,6 @@
/* The size of `short', as computed by sizeof. */
#cmakedefine SIZEOF_SHORT ${SIZEOF_SHORT}
-/* Define to 1 if you have the ANSI C header files. */
-#cmakedefine STDC_HEADERS ${STDC_HEADERS}
-
/* Define if enable CR+NL as line terminator */
#cmakedefine USE_CRNL_AS_LINE_TERMINATOR ${USE_CRNL_AS_LINE_TERMINATOR}
diff --git a/src/config.h.win32 b/src/config.h.win32
index 12609df..a8a8426 100644
--- a/src/config.h.win32
+++ b/src/config.h.win32
@@ -1,81 +1,52 @@
-#define STDC_HEADERS 1
-#define HAVE_SYS_TYPES_H 1
-#define HAVE_SYS_STAT_H 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_MEMORY_H 1
-#define HAVE_FLOAT_H 1
-#define HAVE_OFF_T 1
-#define SIZEOF_INT 4
-#define SIZEOF_SHORT 2
-#define SIZEOF_LONG 4
-#define SIZEOF_LONG_LONG 8
-#define SIZEOF___INT64 8
-#define SIZEOF_OFF_T 4
-#define SIZEOF_VOIDP 4
-#define SIZEOF_FLOAT 4
-#define SIZEOF_DOUBLE 8
-#define HAVE_PROTOTYPES 1
-#define TOKEN_PASTE(x,y) x##y
-#define HAVE_STDARG_PROTOTYPES 1
-#ifndef NORETURN
-#if _MSC_VER > 1100
-#define NORETURN(x) __declspec(noreturn) x
-#else
-#define NORETURN(x) x
-#endif
-#endif
-#define HAVE_DECL_SYS_NERR 1
-#define STDC_HEADERS 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_LIMITS_H 1
-#define HAVE_FCNTL_H 1
-#define HAVE_SYS_UTIME_H 1
-#define HAVE_MEMORY_H 1
-#define uid_t int
-#define gid_t int
-#define GETGROUPS_T int
-#define HAVE_ALLOCA 1
-#define HAVE_DUP2 1
-#define HAVE_MEMCMP 1
-#define HAVE_MEMMOVE 1
-#define HAVE_MKDIR 1
-#define HAVE_STRCASECMP 1
-#define HAVE_STRNCASECMP 1
-#define HAVE_STRERROR 1
-#define HAVE_STRFTIME 1
-#define HAVE_STRCHR 1
-#define HAVE_STRSTR 1
-#define HAVE_STRTOD 1
-#define HAVE_STRTOL 1
-#define HAVE_STRTOUL 1
-#define HAVE_FLOCK 1
-#define HAVE_VSNPRINTF 1
-#define HAVE_FINITE 1
-#define HAVE_FMOD 1
-#define HAVE_FREXP 1
-#define HAVE_HYPOT 1
-#define HAVE_MODF 1
-#define HAVE_WAITPID 1
-#define HAVE_CHSIZE 1
-#define HAVE_TIMES 1
-#define HAVE__SETJMP 1
-#define HAVE_TELLDIR 1
-#define HAVE_SEEKDIR 1
-#define HAVE_MKTIME 1
-#define HAVE_COSH 1
-#define HAVE_SINH 1
-#define HAVE_TANH 1
-#define HAVE_EXECVE 1
-#define HAVE_TZNAME 1
-#define HAVE_DAYLIGHT 1
-#define SETPGRP_VOID 1
-#define inline __inline
-#define NEED_IO_SEEK_BETWEEN_RW 1
-#define RSHIFT(x,y) ((x)>>(int)y)
-#define FILE_COUNT _cnt
-#define FILE_READPTR _ptr
-#define DEFAULT_KCODE KCODE_NONE
-#define DLEXT ".so"
-#define DLEXT2 ".dll"
+#define HAVE_SYS_TYPES_H 1
+#define HAVE_SYS_STAT_H 1
+#define HAVE_MEMORY_H 1
+#define HAVE_OFF_T 1
+#define SIZEOF_INT 4
+#define SIZEOF_SHORT 2
+#define SIZEOF_LONG 4
+#define SIZEOF_LONG_LONG 8
+#define SIZEOF___INT64 8
+#define SIZEOF_OFF_T 4
+#define SIZEOF_VOIDP 4
+#define SIZEOF_FLOAT 4
+#define SIZEOF_DOUBLE 8
+#define SIZEOF_SIZE_T 4
+#define TOKEN_PASTE(x,y) x##y
+#ifndef NORETURN
+#if _MSC_VER > 1100
+#define NORETURN(x) __declspec(noreturn) x
+#else
+#define NORETURN(x) x
+#endif
+#endif
+#define HAVE_DECL_SYS_NERR 1
+#define HAVE_FCNTL_H 1
+#define HAVE_SYS_UTIME_H 1
+#define HAVE_MEMORY_H 1
+#define uid_t int
+#define gid_t int
+#define GETGROUPS_T int
+#define HAVE_ALLOCA 1
+#define HAVE_DUP2 1
+#define HAVE_MKDIR 1
+#define HAVE_FLOCK 1
+#define HAVE_VSNPRINTF 1
+#define HAVE_FINITE 1
+#define HAVE_HYPOT 1
+#define HAVE_WAITPID 1
+#define HAVE_CHSIZE 1
+#define HAVE_TIMES 1
+#define HAVE_TELLDIR 1
+#define HAVE_SEEKDIR 1
+#define HAVE_EXECVE 1
+#define HAVE_DAYLIGHT 1
+#define SETPGRP_VOID 1
+#define inline __inline
+#define NEED_IO_SEEK_BETWEEN_RW 1
+#define RSHIFT(x,y) ((x)>>(int)y)
+#define FILE_COUNT _cnt
+#define FILE_READPTR _ptr
+#define DEFAULT_KCODE KCODE_NONE
+#define DLEXT ".so"
+#define DLEXT2 ".dll"
diff --git a/src/config.h.win64 b/src/config.h.win64
index e892086..59485fa 100644
--- a/src/config.h.win64
+++ b/src/config.h.win64
@@ -1,81 +1,52 @@
-#define STDC_HEADERS 1
-#define HAVE_SYS_TYPES_H 1
-#define HAVE_SYS_STAT_H 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_MEMORY_H 1
-#define HAVE_FLOAT_H 1
-#define HAVE_OFF_T 1
-#define SIZEOF_INT 4
-#define SIZEOF_SHORT 2
-#define SIZEOF_LONG 4
-#define SIZEOF_LONG_LONG 8
-#define SIZEOF___INT64 8
-#define SIZEOF_OFF_T 4
-#define SIZEOF_VOIDP 8
-#define SIZEOF_FLOAT 4
-#define SIZEOF_DOUBLE 8
-#define HAVE_PROTOTYPES 1
-#define TOKEN_PASTE(x,y) x##y
-#define HAVE_STDARG_PROTOTYPES 1
-#ifndef NORETURN
-#if _MSC_VER > 1100
-#define NORETURN(x) __declspec(noreturn) x
-#else
-#define NORETURN(x) x
-#endif
-#endif
-#define HAVE_DECL_SYS_NERR 1
-#define STDC_HEADERS 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_LIMITS_H 1
-#define HAVE_FCNTL_H 1
-#define HAVE_SYS_UTIME_H 1
-#define HAVE_MEMORY_H 1
-#define uid_t int
-#define gid_t int
-#define GETGROUPS_T int
-#define HAVE_ALLOCA 1
-#define HAVE_DUP2 1
-#define HAVE_MEMCMP 1
-#define HAVE_MEMMOVE 1
-#define HAVE_MKDIR 1
-#define HAVE_STRCASECMP 1
-#define HAVE_STRNCASECMP 1
-#define HAVE_STRERROR 1
-#define HAVE_STRFTIME 1
-#define HAVE_STRCHR 1
-#define HAVE_STRSTR 1
-#define HAVE_STRTOD 1
-#define HAVE_STRTOL 1
-#define HAVE_STRTOUL 1
-#define HAVE_FLOCK 1
-#define HAVE_VSNPRINTF 1
-#define HAVE_FINITE 1
-#define HAVE_FMOD 1
-#define HAVE_FREXP 1
-#define HAVE_HYPOT 1
-#define HAVE_MODF 1
-#define HAVE_WAITPID 1
-#define HAVE_CHSIZE 1
-#define HAVE_TIMES 1
-#define HAVE__SETJMP 1
-#define HAVE_TELLDIR 1
-#define HAVE_SEEKDIR 1
-#define HAVE_MKTIME 1
-#define HAVE_COSH 1
-#define HAVE_SINH 1
-#define HAVE_TANH 1
-#define HAVE_EXECVE 1
-#define HAVE_TZNAME 1
-#define HAVE_DAYLIGHT 1
-#define SETPGRP_VOID 1
-#define inline __inline
-#define NEED_IO_SEEK_BETWEEN_RW 1
-#define RSHIFT(x,y) ((x)>>(int)y)
-#define FILE_COUNT _cnt
-#define FILE_READPTR _ptr
-#define DEFAULT_KCODE KCODE_NONE
-#define DLEXT ".so"
-#define DLEXT2 ".dll"
+#define HAVE_SYS_TYPES_H 1
+#define HAVE_SYS_STAT_H 1
+#define HAVE_MEMORY_H 1
+#define HAVE_OFF_T 1
+#define SIZEOF_INT 4
+#define SIZEOF_SHORT 2
+#define SIZEOF_LONG 4
+#define SIZEOF_LONG_LONG 8
+#define SIZEOF___INT64 8
+#define SIZEOF_OFF_T 4
+#define SIZEOF_VOIDP 8
+#define SIZEOF_FLOAT 4
+#define SIZEOF_DOUBLE 8
+#define SIZEOF_SIZE_T 8
+#define TOKEN_PASTE(x,y) x##y
+#ifndef NORETURN
+#if _MSC_VER > 1100
+#define NORETURN(x) __declspec(noreturn) x
+#else
+#define NORETURN(x) x
+#endif
+#endif
+#define HAVE_DECL_SYS_NERR 1
+#define HAVE_FCNTL_H 1
+#define HAVE_SYS_UTIME_H 1
+#define HAVE_MEMORY_H 1
+#define uid_t int
+#define gid_t int
+#define GETGROUPS_T int
+#define HAVE_ALLOCA 1
+#define HAVE_DUP2 1
+#define HAVE_MKDIR 1
+#define HAVE_FLOCK 1
+#define HAVE_VSNPRINTF 1
+#define HAVE_FINITE 1
+#define HAVE_HYPOT 1
+#define HAVE_WAITPID 1
+#define HAVE_CHSIZE 1
+#define HAVE_TIMES 1
+#define HAVE_TELLDIR 1
+#define HAVE_SEEKDIR 1
+#define HAVE_EXECVE 1
+#define HAVE_DAYLIGHT 1
+#define SETPGRP_VOID 1
+#define inline __inline
+#define NEED_IO_SEEK_BETWEEN_RW 1
+#define RSHIFT(x,y) ((x)>>(int)y)
+#define FILE_COUNT _cnt
+#define FILE_READPTR _ptr
+#define DEFAULT_KCODE KCODE_NONE
+#define DLEXT ".so"
+#define DLEXT2 ".dll"
diff --git a/src/config.h.windows.in b/src/config.h.windows.in
index 57fb426..0a18db8 100644
--- a/src/config.h.windows.in
+++ b/src/config.h.windows.in
@@ -1,85 +1,57 @@
-#define STDC_HEADERS 1
-#define HAVE_SYS_TYPES_H 1
-#define HAVE_SYS_STAT_H 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_MEMORY_H 1
-#define HAVE_FLOAT_H 1
-#define HAVE_OFF_T 1
-#define SIZEOF_INT 4
-#define SIZEOF_SHORT 2
-#define SIZEOF_LONG 4
-#define SIZEOF_LONG_LONG 8
-#define SIZEOF___INT64 8
-#define SIZEOF_OFF_T 4
-#ifdef _WIN64
-#define SIZEOF_VOIDP 8
-#else
-#define SIZEOF_VOIDP 4
-#endif
-#define SIZEOF_FLOAT 4
-#define SIZEOF_DOUBLE 8
-#define HAVE_PROTOTYPES 1
-#define TOKEN_PASTE(x,y) x##y
-#define HAVE_STDARG_PROTOTYPES 1
-#ifndef NORETURN
-#if _MSC_VER > 1100
-#define NORETURN(x) __declspec(noreturn) x
-#else
-#define NORETURN(x) x
-#endif
-#endif
-#define HAVE_DECL_SYS_NERR 1
-#define STDC_HEADERS 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_LIMITS_H 1
-#define HAVE_FCNTL_H 1
-#define HAVE_SYS_UTIME_H 1
-#define HAVE_MEMORY_H 1
-#define uid_t int
-#define gid_t int
-#define GETGROUPS_T int
-#define HAVE_ALLOCA 1
-#define HAVE_DUP2 1
-#define HAVE_MEMCMP 1
-#define HAVE_MEMMOVE 1
-#define HAVE_MKDIR 1
-#define HAVE_STRCASECMP 1
-#define HAVE_STRNCASECMP 1
-#define HAVE_STRERROR 1
-#define HAVE_STRFTIME 1
-#define HAVE_STRCHR 1
-#define HAVE_STRSTR 1
-#define HAVE_STRTOD 1
-#define HAVE_STRTOL 1
-#define HAVE_STRTOUL 1
-#define HAVE_FLOCK 1
-#define HAVE_VSNPRINTF 1
-#define HAVE_FINITE 1
-#define HAVE_FMOD 1
-#define HAVE_FREXP 1
-#define HAVE_HYPOT 1
-#define HAVE_MODF 1
-#define HAVE_WAITPID 1
-#define HAVE_CHSIZE 1
-#define HAVE_TIMES 1
-#define HAVE__SETJMP 1
-#define HAVE_TELLDIR 1
-#define HAVE_SEEKDIR 1
-#define HAVE_MKTIME 1
-#define HAVE_COSH 1
-#define HAVE_SINH 1
-#define HAVE_TANH 1
-#define HAVE_EXECVE 1
-#define HAVE_TZNAME 1
-#define HAVE_DAYLIGHT 1
-#define SETPGRP_VOID 1
-#define inline __inline
-#define NEED_IO_SEEK_BETWEEN_RW 1
-#define RSHIFT(x,y) ((x)>>(int)y)
-#define FILE_COUNT _cnt
-#define FILE_READPTR _ptr
-#define DEFAULT_KCODE KCODE_NONE
-#define DLEXT ".so"
-#define DLEXT2 ".dll"
+#define HAVE_SYS_TYPES_H 1
+#define HAVE_SYS_STAT_H 1
+#define HAVE_MEMORY_H 1
+#define HAVE_OFF_T 1
+#define SIZEOF_INT 4
+#define SIZEOF_SHORT 2
+#define SIZEOF_LONG 4
+#define SIZEOF_LONG_LONG 8
+#define SIZEOF___INT64 8
+#define SIZEOF_OFF_T 4
+#ifdef _WIN64
+#define SIZEOF_VOIDP 8
+#define SIZEOF_SIZE_T 8
+#else
+#define SIZEOF_VOIDP 4
+#define SIZEOF_SIZE_T 4
+#endif
+#define SIZEOF_FLOAT 4
+#define SIZEOF_DOUBLE 8
+#define TOKEN_PASTE(x,y) x##y
+#ifndef NORETURN
+#if _MSC_VER > 1100
+#define NORETURN(x) __declspec(noreturn) x
+#else
+#define NORETURN(x) x
+#endif
+#endif
+#define HAVE_DECL_SYS_NERR 1
+#define HAVE_FCNTL_H 1
+#define HAVE_SYS_UTIME_H 1
+#define HAVE_MEMORY_H 1
+#define uid_t int
+#define gid_t int
+#define GETGROUPS_T int
+#define HAVE_ALLOCA 1
+#define HAVE_DUP2 1
+#define HAVE_MKDIR 1
+#define HAVE_FLOCK 1
+#define HAVE_VSNPRINTF 1
+#define HAVE_FINITE 1
+#define HAVE_HYPOT 1
+#define HAVE_WAITPID 1
+#define HAVE_CHSIZE 1
+#define HAVE_TIMES 1
+#define HAVE_TELLDIR 1
+#define HAVE_SEEKDIR 1
+#define HAVE_EXECVE 1
+#define HAVE_DAYLIGHT 1
+#define SETPGRP_VOID 1
+#define inline __inline
+#define NEED_IO_SEEK_BETWEEN_RW 1
+#define RSHIFT(x,y) ((x)>>(int)y)
+#define FILE_COUNT _cnt
+#define FILE_READPTR _ptr
+#define DEFAULT_KCODE KCODE_NONE
+#define DLEXT ".so"
+#define DLEXT2 ".dll"
diff --git a/src/cp1251.c b/src/cp1251.c
index e217037..7b19855 100644
--- a/src/cp1251.c
+++ b/src/cp1251.c
@@ -200,6 +200,6 @@ OnigEncodingType OnigEncodingCP1251 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/euc_jp.c b/src/euc_jp.c
index 5d3c1f9..c1ab89e 100644
--- a/src/euc_jp.c
+++ b/src/euc_jp.c
@@ -151,7 +151,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf)
#if 1
if (enclen(ONIG_ENCODING_EUC_JP, buf) != (p - buf))
return ONIGERR_INVALID_CODE_POINT_VALUE;
-#endif
+#endif
return (int )(p - buf);
}
@@ -307,6 +307,6 @@ OnigEncodingType OnigEncodingEUC_JP = {
NULL, /* init */
NULL, /* is_initialized */
is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1_OR_0,
0, 0
};
diff --git a/src/euc_kr.c b/src/euc_kr.c
index def311b..9b62514 100644
--- a/src/euc_kr.c
+++ b/src/euc_kr.c
@@ -161,7 +161,9 @@ OnigEncodingType OnigEncodingEUC_KR = {
euckr_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- is_valid_mbc_string
+ is_valid_mbc_string,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1_OR_0,
+ 0, 0
};
/* Same with OnigEncodingEUC_KR except the name */
@@ -185,6 +187,6 @@ OnigEncodingType OnigEncodingEUC_CN = {
NULL, /* init */
NULL, /* is_initialized */
is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1_OR_0,
0, 0
};
diff --git a/src/euc_tw.c b/src/euc_tw.c
index 8738598..7683336 100644
--- a/src/euc_tw.c
+++ b/src/euc_tw.c
@@ -168,6 +168,6 @@ OnigEncodingType OnigEncodingEUC_TW = {
NULL, /* init */
NULL, /* is_initialized */
is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/gb18030.c b/src/gb18030.c
index d4a1108..7654432 100644
--- a/src/gb18030.c
+++ b/src/gb18030.c
@@ -535,6 +535,6 @@ OnigEncodingType OnigEncodingGB18030 = {
NULL, /* init */
NULL, /* is_initialized */
is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/gperf_fold_key_conv.py b/src/gperf_fold_key_conv.py
index 376d343..f453186 100755
--- a/src/gperf_fold_key_conv.py
+++ b/src/gperf_fold_key_conv.py
@@ -54,11 +54,13 @@ def parse_line(s, key_len):
def parse_file(f, key_len):
print "/* This file was converted by gperf_fold_key_conv.py\n from gperf output file. */"
- line = f.readline()
- while line:
+ while True:
+ line = f.readline()
+ if not line:
+ break
+
s = parse_line(line, key_len)
print s
- line = f.readline()
# main
diff --git a/src/iso8859_1.c b/src/iso8859_1.c
index ff47b80..0ce70a6 100644
--- a/src/iso8859_1.c
+++ b/src/iso8859_1.c
@@ -272,6 +272,6 @@ OnigEncodingType OnigEncodingISO_8859_1 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/iso8859_10.c b/src/iso8859_10.c
index f9804e2..4a34b38 100644
--- a/src/iso8859_10.c
+++ b/src/iso8859_10.c
@@ -239,6 +239,6 @@ OnigEncodingType OnigEncodingISO_8859_10 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/iso8859_11.c b/src/iso8859_11.c
index 108ee8a..da8fda0 100644
--- a/src/iso8859_11.c
+++ b/src/iso8859_11.c
@@ -96,6 +96,6 @@ OnigEncodingType OnigEncodingISO_8859_11 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/iso8859_13.c b/src/iso8859_13.c
index 9585355..23a0265 100644
--- a/src/iso8859_13.c
+++ b/src/iso8859_13.c
@@ -228,6 +228,6 @@ OnigEncodingType OnigEncodingISO_8859_13 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/iso8859_14.c b/src/iso8859_14.c
index 83fc551..7281e93 100644
--- a/src/iso8859_14.c
+++ b/src/iso8859_14.c
@@ -241,6 +241,6 @@ OnigEncodingType OnigEncodingISO_8859_14 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/iso8859_15.c b/src/iso8859_15.c
index 3a7ad05..3d9f571 100644
--- a/src/iso8859_15.c
+++ b/src/iso8859_15.c
@@ -235,6 +235,6 @@ OnigEncodingType OnigEncodingISO_8859_15 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/iso8859_16.c b/src/iso8859_16.c
index 02022d9..a6977dd 100644
--- a/src/iso8859_16.c
+++ b/src/iso8859_16.c
@@ -237,6 +237,6 @@ OnigEncodingType OnigEncodingISO_8859_16 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/iso8859_2.c b/src/iso8859_2.c
index ecdbb99..4f994c4 100644
--- a/src/iso8859_2.c
+++ b/src/iso8859_2.c
@@ -235,6 +235,6 @@ OnigEncodingType OnigEncodingISO_8859_2 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/iso8859_3.c b/src/iso8859_3.c
index 739f1c9..944a7ae 100644
--- a/src/iso8859_3.c
+++ b/src/iso8859_3.c
@@ -235,6 +235,6 @@ OnigEncodingType OnigEncodingISO_8859_3 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/iso8859_4.c b/src/iso8859_4.c
index 4f2b6a0..3a7c210 100644
--- a/src/iso8859_4.c
+++ b/src/iso8859_4.c
@@ -237,6 +237,6 @@ OnigEncodingType OnigEncodingISO_8859_4 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/iso8859_5.c b/src/iso8859_5.c
index cf41061..0a8b7ec 100644
--- a/src/iso8859_5.c
+++ b/src/iso8859_5.c
@@ -226,6 +226,6 @@ OnigEncodingType OnigEncodingISO_8859_5 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/iso8859_6.c b/src/iso8859_6.c
index 1ffe99f..1c16c79 100644
--- a/src/iso8859_6.c
+++ b/src/iso8859_6.c
@@ -96,6 +96,6 @@ OnigEncodingType OnigEncodingISO_8859_6 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/iso8859_7.c b/src/iso8859_7.c
index 87288c2..0877b6f 100644
--- a/src/iso8859_7.c
+++ b/src/iso8859_7.c
@@ -222,6 +222,6 @@ OnigEncodingType OnigEncodingISO_8859_7 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/iso8859_8.c b/src/iso8859_8.c
index 8f162a4..bd3e94d 100644
--- a/src/iso8859_8.c
+++ b/src/iso8859_8.c
@@ -96,6 +96,6 @@ OnigEncodingType OnigEncodingISO_8859_8 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/iso8859_9.c b/src/iso8859_9.c
index 52589cf..8819f4a 100644
--- a/src/iso8859_9.c
+++ b/src/iso8859_9.c
@@ -228,6 +228,6 @@ OnigEncodingType OnigEncodingISO_8859_9 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/koi8.c b/src/koi8.c
index 9fb2ee5..7ce965f 100644
--- a/src/koi8.c
+++ b/src/koi8.c
@@ -250,6 +250,6 @@ OnigEncodingType OnigEncodingKOI8 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/koi8_r.c b/src/koi8_r.c
index 8adc399..5994ebe 100644
--- a/src/koi8_r.c
+++ b/src/koi8_r.c
@@ -212,6 +212,6 @@ OnigEncodingType OnigEncodingKOI8_R = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/make_unicode_fold.sh b/src/make_unicode_fold.sh
index 45e9566..d5828e1 100755
--- a/src/make_unicode_fold.sh
+++ b/src/make_unicode_fold.sh
@@ -23,6 +23,13 @@ ${GPERF} ${GPERF_OPT} -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf
${GPERF} ${GPERF_OPT} -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf > ${TMP3}
./gperf_fold_key_conv.py 3 < ${TMP3} > unicode_fold3_key.c
+# remove redundant EOLs before EOF
+perl -i -pe 'BEGIN{undef $/}s/\n\n*\z/\n/;' unicode_fold_data.c
+perl -i -pe 'BEGIN{undef $/}s/\n\n*\z/\n/;' unicode_fold1_key.c
+perl -i -pe 'BEGIN{undef $/}s/\n\n*\z/\n/;' unicode_fold2_key.c
+perl -i -pe 'BEGIN{undef $/}s/\n\n*\z/\n/;' unicode_fold3_key.c
+perl -i -pe 'BEGIN{undef $/}s/\n\n*\z/\n/;' unicode_unfold_key.c
+
rm -f ${TMP0} ${TMP1} ${TMP2} ${TMP3}
rm -f unicode_unfold_key.gperf unicode_fold1_key.gperf unicode_fold2_key.gperf unicode_fold3_key.gperf
diff --git a/src/make_unicode_fold_data.py b/src/make_unicode_fold_data.py
index 5c87d4c..783988c 100755
--- a/src/make_unicode_fold_data.py
+++ b/src/make_unicode_fold_data.py
@@ -5,7 +5,6 @@
import sys
import re
-import codecs
SOURCE_FILE = 'CaseFolding.txt'
GPERF_UNFOLD_KEY_FILE = 'unicode_unfold_key.gperf'
diff --git a/src/make_unicode_property_data.py b/src/make_unicode_property_data.py
index 9a48ced..dc3071a 100755
--- a/src/make_unicode_property_data.py
+++ b/src/make_unicode_property_data.py
@@ -6,8 +6,6 @@
import sys
import re
-INCLUDE_GRAPHEME_CLUSTER_DATA = False
-
POSIX_LIST = [
'NEWLINE', 'Alpha', 'Blank', 'Cntrl', 'Digit', 'Graph', 'Lower',
'Print', 'Punct', 'Space', 'Upper', 'XDigit', 'Word', 'Alnum', 'ASCII'
@@ -427,9 +425,17 @@ argv = sys.argv
argc = len(argv)
POSIX_ONLY = False
-if argc >= 2:
- if argv[1] == '-posix':
+INCLUDE_GRAPHEME_CLUSTER_DATA = False
+
+for i in range(1, argc):
+ arg = argv[i]
+ if arg == '-posix':
POSIX_ONLY = True
+ elif arg == '-gc':
+ INCLUDE_GRAPHEME_CLUSTER_DATA = True
+ else:
+ print >> sys.stderr, "Invalid argument: %s" % arg
+
OUTPUT_LIST_MODE = not(POSIX_ONLY)
@@ -441,11 +447,11 @@ with open('UnicodeData.txt', 'r') as f:
PROPS = DIC.keys()
PROPS = list_sub(PROPS, POSIX_LIST)
-dic, props = parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property')
+parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property')
dic, props = parse_and_merge_properties('Scripts.txt', 'Script')
DIC['Unknown'] = inverse_ranges(add_ranges_in_dic(dic))
-dic, props = parse_and_merge_properties('PropList.txt', 'Binary Property')
-dic, props = parse_and_merge_properties('emoji-data.txt', 'Emoji Property')
+parse_and_merge_properties('PropList.txt', 'Binary Property')
+parse_and_merge_properties('emoji-data.txt', 'Emoji Property')
PROPS.append('Unknown')
KDIC['Unknown'] = 'Script'
diff --git a/src/oniguruma.h b/src/oniguruma.h
index ab917c6..746445a 100644
--- a/src/oniguruma.h
+++ b/src/oniguruma.h
@@ -36,9 +36,9 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 6
#define ONIGURUMA_VERSION_MINOR 9
-#define ONIGURUMA_VERSION_TEENY 0
+#define ONIGURUMA_VERSION_TEENY 1
-#define ONIGURUMA_VERSION_INT 60900
+#define ONIGURUMA_VERSION_INT 60901
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
diff --git a/src/regcomp.c b/src/regcomp.c
index 83b9252..400368d 100644
--- a/src/regcomp.c
+++ b/src/regcomp.c
@@ -138,6 +138,17 @@ int_multiply_cmp(int x, int y, int v)
return 1;
}
+extern int
+onig_positive_int_multiply(int x, int y)
+{
+ if (x == 0 || y == 0) return 0;
+
+ if (x < INT_MAX / y)
+ return x * y;
+ else
+ return -1;
+}
+
#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
@@ -152,7 +163,7 @@ swap_node(Node* a, Node* b)
if (NODE_TYPE(a) == NODE_STRING) {
StrNode* sn = STR_(a);
- if (sn->capa == 0) {
+ if (sn->capacity == 0) {
int len = (int )(sn->end - sn->s);
sn->s = sn->buf;
sn->end = sn->s + len;
@@ -161,7 +172,7 @@ swap_node(Node* a, Node* b)
if (NODE_TYPE(b) == NODE_STRING) {
StrNode* sn = STR_(b);
- if (sn->capa == 0) {
+ if (sn->capacity == 0) {
int len = (int )(sn->end - sn->s);
sn->s = sn->buf;
sn->end = sn->s + len;
@@ -970,8 +981,9 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
if (r != 0) return r;
for (i = 0; i < n; i++) {
- r = add_opcode_rel_addr(reg, OP_PUSH,
- (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
+ int v = onig_positive_int_multiply(n - i, tlen);
+ if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+ r = add_opcode_rel_addr(reg, OP_PUSH, v + (n - i - 1) * SIZE_OP_PUSH);
if (r != 0) return r;
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
if (r != 0) return r;
@@ -991,49 +1003,49 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
}
static int
-compile_length_option_node(EnclosureNode* node, regex_t* reg)
+compile_length_option_node(BagNode* node, regex_t* reg)
{
int tlen;
OnigOptionType prev = reg->options;
reg->options = node->o.options;
- tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
+ tlen = compile_length_tree(NODE_BAG_BODY(node), reg);
reg->options = prev;
return tlen;
}
static int
-compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
+compile_option_node(BagNode* node, regex_t* reg, ScanEnv* env)
{
int r;
OnigOptionType prev = reg->options;
reg->options = node->o.options;
- r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
+ r = compile_tree(NODE_BAG_BODY(node), reg, env);
reg->options = prev;
return r;
}
static int
-compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)
+compile_length_bag_node(BagNode* node, regex_t* reg)
{
int len;
int tlen;
- if (node->type == ENCLOSURE_OPTION)
+ if (node->type == BAG_OPTION)
return compile_length_option_node(node, reg);
- if (NODE_ENCLOSURE_BODY(node)) {
- tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
+ if (NODE_BAG_BODY(node)) {
+ tlen = compile_length_tree(NODE_BAG_BODY(node), reg);
if (tlen < 0) return tlen;
}
else
tlen = 0;
switch (node->type) {
- case ENCLOSURE_MEMORY:
+ case BAG_MEMORY:
#ifdef USE_CALL
if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
@@ -1069,23 +1081,27 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)
}
break;
- case ENCLOSURE_STOP_BACKTRACK:
+ case BAG_STOP_BACKTRACK:
if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
- QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));
+ int v;
+ QuantNode* qn;
+
+ qn = QUANT_(NODE_BAG_BODY(node));
tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
- len = tlen * qn->lower
- + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;
+ v = onig_positive_int_multiply(qn->lower, tlen);
+ if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+ len = v + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;
}
else {
len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END;
}
break;
- case ENCLOSURE_IF_ELSE:
+ case BAG_IF_ELSE:
{
- Node* cond = NODE_ENCLOSURE_BODY(node);
+ Node* cond = NODE_BAG_BODY(node);
Node* Then = node->te.Then;
Node* Else = node->te.Else;
@@ -1109,18 +1125,18 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)
}
break;
- default:
- return ONIGERR_TYPE_BUG;
+ case BAG_OPTION:
+ len = tlen;
break;
}
return len;
}
-static int get_char_length_tree(Node* node, regex_t* reg, int* len);
+static int get_char_len_node(Node* node, regex_t* reg, int* len);
static int
-compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
+compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)
{
int r;
int len;
@@ -1133,12 +1149,12 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
NODE_STATUS_ADD(node, ADDR_FIXED);
r = add_abs_addr(reg, (int )node->m.called_addr);
if (r != 0) return r;
- len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
+ len = compile_length_tree(NODE_BAG_BODY(node), reg);
len += SIZE_OP_RETURN;
r = add_opcode_rel_addr(reg, OP_JUMP, len);
if (r != 0) return r;
- r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
+ r = compile_tree(NODE_BAG_BODY(node), reg, env);
if (r != 0) return r;
r = add_opcode(reg, OP_RETURN);
return r;
@@ -1151,7 +1167,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
NODE_STATUS_ADD(node, ADDR_FIXED);
r = add_abs_addr(reg, (int )node->m.called_addr);
if (r != 0) return r;
- len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
+ len = compile_length_tree(NODE_BAG_BODY(node), reg);
len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
len += (NODE_IS_RECURSION(node)
@@ -1172,7 +1188,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
if (r != 0) return r;
r = add_mem_num(reg, node->m.regnum);
if (r != 0) return r;
- r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
+ r = compile_tree(NODE_BAG_BODY(node), reg, env);
if (r != 0) return r;
#ifdef USE_CALL
@@ -1201,22 +1217,22 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
}
static int
-compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
+compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
{
int r, len;
switch (node->type) {
- case ENCLOSURE_MEMORY:
- r = compile_enclosure_memory_node(node, reg, env);
+ case BAG_MEMORY:
+ r = compile_bag_memory_node(node, reg, env);
break;
- case ENCLOSURE_OPTION:
+ case BAG_OPTION:
r = compile_option_node(node, reg, env);
break;
- case ENCLOSURE_STOP_BACKTRACK:
+ case BAG_STOP_BACKTRACK:
if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
- QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));
+ QuantNode* qn = QUANT_(NODE_BAG_BODY(node));
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
if (r != 0) return r;
@@ -1235,16 +1251,16 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
else {
r = add_opcode(reg, OP_ATOMIC_START);
if (r != 0) return r;
- r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
+ r = compile_tree(NODE_BAG_BODY(node), reg, env);
if (r != 0) return r;
r = add_opcode(reg, OP_ATOMIC_END);
}
break;
- case ENCLOSURE_IF_ELSE:
+ case BAG_IF_ELSE:
{
int cond_len, then_len, jump_len;
- Node* cond = NODE_ENCLOSURE_BODY(node);
+ Node* cond = NODE_BAG_BODY(node);
Node* Then = node->te.Then;
Node* Else = node->te.Else;
@@ -1283,10 +1299,6 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
}
}
break;
-
- default:
- return ONIGERR_TYPE_BUG;
- break;
}
return r;
@@ -1304,30 +1316,30 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg)
}
switch (node->type) {
- case ANCHOR_PREC_READ:
+ case ANCR_PREC_READ:
len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;
break;
- case ANCHOR_PREC_READ_NOT:
+ case ANCR_PREC_READ_NOT:
len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END;
break;
- case ANCHOR_LOOK_BEHIND:
+ case ANCR_LOOK_BEHIND:
len = SIZE_OP_LOOK_BEHIND + tlen;
break;
- case ANCHOR_LOOK_BEHIND_NOT:
+ case ANCR_LOOK_BEHIND_NOT:
len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END;
break;
- case ANCHOR_WORD_BOUNDARY:
- case ANCHOR_NO_WORD_BOUNDARY:
+ case ANCR_WORD_BOUNDARY:
+ case ANCR_NO_WORD_BOUNDARY:
#ifdef USE_WORD_BEGIN_END
- case ANCHOR_WORD_BEGIN:
- case ANCHOR_WORD_END:
+ case ANCR_WORD_BEGIN:
+ case ANCR_WORD_END:
#endif
len = SIZE_OP_WORD_BOUNDARY;
break;
- case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
- case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+ case ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+ case ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
len = SIZE_OPCODE;
break;
@@ -1346,14 +1358,14 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
enum OpCode op;
switch (node->type) {
- case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
- case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
- case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
- case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
- case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
- case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
-
- case ANCHOR_WORD_BOUNDARY:
+ case ANCR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
+ case ANCR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
+ case ANCR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
+ case ANCR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
+ case ANCR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
+ case ANCR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
+
+ case ANCR_WORD_BOUNDARY:
op = OP_WORD_BOUNDARY;
word:
r = add_opcode(reg, op);
@@ -1361,27 +1373,27 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
r = add_mode(reg, (ModeType )node->ascii_mode);
break;
- case ANCHOR_NO_WORD_BOUNDARY:
+ case ANCR_NO_WORD_BOUNDARY:
op = OP_NO_WORD_BOUNDARY; goto word;
break;
#ifdef USE_WORD_BEGIN_END
- case ANCHOR_WORD_BEGIN:
+ case ANCR_WORD_BEGIN:
op = OP_WORD_BEGIN; goto word;
break;
- case ANCHOR_WORD_END:
+ case ANCR_WORD_END:
op = OP_WORD_END; goto word;
break;
#endif
- case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+ case ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
r = add_opcode(reg, OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
break;
- case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+ case ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
r = add_opcode(reg, OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
break;
- case ANCHOR_PREC_READ:
+ case ANCR_PREC_READ:
r = add_opcode(reg, OP_PREC_READ_START);
if (r != 0) return r;
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
@@ -1389,7 +1401,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
r = add_opcode(reg, OP_PREC_READ_END);
break;
- case ANCHOR_PREC_READ_NOT:
+ case ANCR_PREC_READ_NOT:
len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
if (len < 0) return len;
r = add_opcode_rel_addr(reg, OP_PREC_READ_NOT_START, len + SIZE_OP_PREC_READ_NOT_END);
@@ -1399,13 +1411,13 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
r = add_opcode(reg, OP_PREC_READ_NOT_END);
break;
- case ANCHOR_LOOK_BEHIND:
+ case ANCR_LOOK_BEHIND:
{
int n;
r = add_opcode(reg, OP_LOOK_BEHIND);
if (r != 0) return r;
if (node->char_len < 0) {
- r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);
+ r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);
if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
else
@@ -1417,7 +1429,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
}
break;
- case ANCHOR_LOOK_BEHIND_NOT:
+ case ANCR_LOOK_BEHIND_NOT:
{
int n;
@@ -1426,7 +1438,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
len + SIZE_OP_LOOK_BEHIND_NOT_END);
if (r != 0) return r;
if (node->char_len < 0) {
- r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);
+ r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);
if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
else
@@ -1635,8 +1647,8 @@ compile_length_tree(Node* node, regex_t* reg)
r = compile_length_quantifier_node(QUANT_(node), reg);
break;
- case NODE_ENCLOSURE:
- r = compile_length_enclosure_node(ENCLOSURE_(node), reg);
+ case NODE_BAG:
+ r = compile_length_bag_node(BAG_(node), reg);
break;
case NODE_ANCHOR:
@@ -1826,8 +1838,8 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
r = compile_quantifier_node(QUANT_(node), reg, env);
break;
- case NODE_ENCLOSURE:
- r = compile_enclosure_node(ENCLOSURE_(node), reg, env);
+ case NODE_BAG:
+ r = compile_bag_node(BAG_(node), reg, env);
break;
case NODE_ANCHOR:
@@ -1873,10 +1885,10 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
}
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
- if (en->type == ENCLOSURE_MEMORY) {
+ BagNode* en = BAG_(node);
+ if (en->type == BAG_MEMORY) {
if (NODE_IS_NAMED_GROUP(node)) {
(*counter)++;
map[en->m.regnum].new_val = *counter;
@@ -1890,8 +1902,8 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
r = noname_disable_map(plink, map, counter);
}
}
- else if (en->type == ENCLOSURE_IF_ELSE) {
- r = noname_disable_map(&(NODE_ENCLOSURE_BODY(en)), map, counter);
+ else if (en->type == BAG_IF_ELSE) {
+ r = noname_disable_map(&(NODE_BAG_BODY(en)), map, counter);
if (r != 0) return r;
if (IS_NOT_NULL(en->te.Then)) {
r = noname_disable_map(&(en->te.Then), map, counter);
@@ -1964,14 +1976,14 @@ renumber_by_map(Node* node, GroupNumRemap* map)
r = renumber_by_map(NODE_BODY(node), map);
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
r = renumber_by_map(NODE_BODY(node), map);
if (r != 0) return r;
- if (en->type == ENCLOSURE_IF_ELSE) {
+ if (en->type == BAG_IF_ELSE) {
if (IS_NOT_NULL(en->te.Then)) {
r = renumber_by_map(en->te.Then, map);
if (r != 0) return r;
@@ -2021,14 +2033,14 @@ numbered_ref_check(Node* node)
r = numbered_ref_check(NODE_BODY(node));
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
r = numbered_ref_check(NODE_BODY(node));
if (r != 0) return r;
- if (en->type == ENCLOSURE_IF_ELSE) {
+ if (en->type == BAG_IF_ELSE) {
if (IS_NOT_NULL(en->te.Then)) {
r = numbered_ref_check(en->te.Then);
if (r != 0) return r;
@@ -2099,14 +2111,14 @@ static int
fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)
{
int i, offset;
- EnclosureNode* en;
+ BagNode* en;
AbsAddrType addr;
for (i = 0; i < uslist->num; i++) {
if (! NODE_IS_ADDR_FIXED(uslist->us[i].target))
return ONIGERR_PARSER_BUG;
- en = ENCLOSURE_(uslist->us[i].target);
+ en = BAG_(uslist->us[i].target);
addr = en->m.called_addr;
offset = uslist->us[i].offset;
@@ -2122,7 +2134,7 @@ fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)
/* fixed size pattern node only */
static int
-get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
+get_char_len_node1(Node* node, regex_t* reg, int* len, int level)
{
int tlen;
int r = 0;
@@ -2132,7 +2144,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
switch (NODE_TYPE(node)) {
case NODE_LIST:
do {
- r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);
+ r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level);
if (r == 0)
*len = distance_add(*len, tlen);
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
@@ -2143,9 +2155,9 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
int tlen2;
int varlen = 0;
- r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);
+ r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level);
while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) {
- r = get_char_length_tree1(NODE_CAR(node), reg, &tlen2, level);
+ r = get_char_len_node1(NODE_CAR(node), reg, &tlen2, level);
if (r == 0) {
if (tlen != tlen2)
varlen = 1;
@@ -2185,7 +2197,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
*len = 0;
}
else {
- r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level);
+ r = get_char_len_node1(NODE_BODY(node), reg, &tlen, level);
if (r == 0)
*len = distance_multiply(tlen, qn->lower);
}
@@ -2198,7 +2210,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
#ifdef USE_CALL
case NODE_CALL:
if (! NODE_IS_RECURSION(node))
- r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
+ r = get_char_len_node1(NODE_BODY(node), reg, len, level);
else
r = GET_CHAR_LEN_VARLEN;
break;
@@ -2209,17 +2221,17 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
*len = 1;
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
switch (en->type) {
- case ENCLOSURE_MEMORY:
+ case BAG_MEMORY:
#ifdef USE_CALL
if (NODE_IS_CLEN_FIXED(node))
*len = en->char_len;
else {
- r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
+ r = get_char_len_node1(NODE_BODY(node), reg, len, level);
if (r == 0) {
en->char_len = *len;
NODE_STATUS_ADD(node, CLEN_FIXED);
@@ -2227,23 +2239,23 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
}
break;
#endif
- case ENCLOSURE_OPTION:
- case ENCLOSURE_STOP_BACKTRACK:
- r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
+ case BAG_OPTION:
+ case BAG_STOP_BACKTRACK:
+ r = get_char_len_node1(NODE_BODY(node), reg, len, level);
break;
- case ENCLOSURE_IF_ELSE:
+ case BAG_IF_ELSE:
{
int clen, elen;
- r = get_char_length_tree1(NODE_BODY(node), reg, &clen, level);
+ r = get_char_len_node1(NODE_BODY(node), reg, &clen, level);
if (r == 0) {
if (IS_NOT_NULL(en->te.Then)) {
- r = get_char_length_tree1(en->te.Then, reg, &tlen, level);
+ r = get_char_len_node1(en->te.Then, reg, &tlen, level);
if (r != 0) break;
}
else tlen = 0;
if (IS_NOT_NULL(en->te.Else)) {
- r = get_char_length_tree1(en->te.Else, reg, &elen, level);
+ r = get_char_len_node1(en->te.Else, reg, &elen, level);
if (r != 0) break;
}
else elen = 0;
@@ -2257,9 +2269,6 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
}
}
break;
-
- default:
- break;
}
}
break;
@@ -2281,9 +2290,9 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
}
static int
-get_char_length_tree(Node* node, regex_t* reg, int* len)
+get_char_len_node(Node* node, regex_t* reg, int* len)
{
- return get_char_length_tree1(node, reg, len, 0);
+ return get_char_len_node1(node, reg, len, 0);
}
/* x is not included y ==> 1 : 0 */
@@ -2450,7 +2459,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg)
code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
- return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
+ return onig_is_code_in_cc(reg->enc, code, cc) == 0;
}
break;
@@ -2520,10 +2529,8 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
if (sn->end <= sn->s)
break;
- if (exact != 0 &&
- !NODE_STRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
- }
- else {
+ if (exact == 0 ||
+ ! IS_IGNORECASE(reg->options) || NODE_STRING_IS_RAW(node)) {
n = node;
}
}
@@ -2541,23 +2548,23 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
}
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
switch (en->type) {
- case ENCLOSURE_OPTION:
+ case BAG_OPTION:
{
OnigOptionType options = reg->options;
- reg->options = ENCLOSURE_(node)->o.options;
+ reg->options = BAG_(node)->o.options;
n = get_head_value_node(NODE_BODY(node), exact, reg);
reg->options = options;
}
break;
- case ENCLOSURE_MEMORY:
- case ENCLOSURE_STOP_BACKTRACK:
- case ENCLOSURE_IF_ELSE:
+ case BAG_MEMORY:
+ case BAG_STOP_BACKTRACK:
+ case BAG_IF_ELSE:
n = get_head_value_node(NODE_BODY(node), exact, reg);
break;
}
@@ -2565,7 +2572,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
break;
case NODE_ANCHOR:
- if (ANCHOR_(node)->type == ANCHOR_PREC_READ)
+ if (ANCHOR_(node)->type == ANCR_PREC_READ)
n = get_head_value_node(NODE_BODY(node), exact, reg);
break;
@@ -2578,7 +2585,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
}
static int
-check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)
+check_type_tree(Node* node, int type_mask, int bag_mask, int anchor_mask)
{
NodeType type;
int r = 0;
@@ -2591,29 +2598,29 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)
case NODE_LIST:
case NODE_ALT:
do {
- r = check_type_tree(NODE_CAR(node), type_mask, enclosure_mask,
+ r = check_type_tree(NODE_CAR(node), type_mask, bag_mask,
anchor_mask);
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
case NODE_QUANT:
- r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
+ r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
- if (((1<<en->type) & enclosure_mask) == 0)
+ BagNode* en = BAG_(node);
+ if (((1<<en->type) & bag_mask) == 0)
return 1;
- r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
- if (r == 0 && en->type == ENCLOSURE_IF_ELSE) {
+ r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);
+ if (r == 0 && en->type == BAG_IF_ELSE) {
if (IS_NOT_NULL(en->te.Then)) {
- r = check_type_tree(en->te.Then, type_mask, enclosure_mask, anchor_mask);
+ r = check_type_tree(en->te.Then, type_mask, bag_mask, anchor_mask);
if (r != 0) break;
}
if (IS_NOT_NULL(en->te.Else)) {
- r = check_type_tree(en->te.Else, type_mask, enclosure_mask, anchor_mask);
+ r = check_type_tree(en->te.Else, type_mask, bag_mask, anchor_mask);
}
}
}
@@ -2625,7 +2632,7 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)
return 1;
if (IS_NOT_NULL(NODE_BODY(node)))
- r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
+ r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);
break;
case NODE_GIMMICK:
@@ -2666,7 +2673,7 @@ tree_min_len(Node* node, ScanEnv* env)
Node* t = NODE_BODY(node);
if (NODE_IS_RECURSION(node)) {
if (NODE_IS_MIN_FIXED(t))
- len = ENCLOSURE_(t)->min_len;
+ len = BAG_(t)->min_len;
}
else
len = tree_min_len(t, env);
@@ -2717,11 +2724,11 @@ tree_min_len(Node* node, ScanEnv* env)
}
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
switch (en->type) {
- case ENCLOSURE_MEMORY:
+ case BAG_MEMORY:
if (NODE_IS_MIN_FIXED(node))
len = en->min_len;
else {
@@ -2738,11 +2745,11 @@ tree_min_len(Node* node, ScanEnv* env)
}
break;
- case ENCLOSURE_OPTION:
- case ENCLOSURE_STOP_BACKTRACK:
+ case BAG_OPTION:
+ case BAG_STOP_BACKTRACK:
len = tree_min_len(NODE_BODY(node), env);
break;
- case ENCLOSURE_IF_ELSE:
+ case BAG_IF_ELSE:
{
OnigLen elen;
@@ -2854,11 +2861,11 @@ tree_max_len(Node* node, ScanEnv* env)
}
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
switch (en->type) {
- case ENCLOSURE_MEMORY:
+ case BAG_MEMORY:
if (NODE_IS_MAX_FIXED(node))
len = en->max_len;
else {
@@ -2875,11 +2882,11 @@ tree_max_len(Node* node, ScanEnv* env)
}
break;
- case ENCLOSURE_OPTION:
- case ENCLOSURE_STOP_BACKTRACK:
+ case BAG_OPTION:
+ case BAG_STOP_BACKTRACK:
len = tree_max_len(NODE_BODY(node), env);
break;
- case ENCLOSURE_IF_ELSE:
+ case BAG_IF_ELSE:
{
OnigLen tlen, elen;
@@ -2931,12 +2938,12 @@ check_backrefs(Node* node, ScanEnv* env)
r = check_backrefs(NODE_BODY(node), env);
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
r = check_backrefs(NODE_BODY(node), env);
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
- if (en->type == ENCLOSURE_IF_ELSE) {
+ if (en->type == BAG_IF_ELSE) {
if (r != 0) return r;
if (IS_NOT_NULL(en->te.Then)) {
r = check_backrefs(en->te.Then, env);
@@ -3039,11 +3046,11 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head)
r = infinite_recursive_call_check(NODE_BODY(node), env, head);
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
- if (en->type == ENCLOSURE_MEMORY) {
+ if (en->type == BAG_MEMORY) {
if (NODE_IS_MARK2(node))
return 0;
else if (NODE_IS_MARK1(node))
@@ -3055,7 +3062,7 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head)
NODE_STATUS_REMOVE(node, MARK2);
}
}
- else if (en->type == ENCLOSURE_IF_ELSE) {
+ else if (en->type == BAG_IF_ELSE) {
int eret;
ret = infinite_recursive_call_check(NODE_BODY(node), env, head);
@@ -3116,11 +3123,11 @@ infinite_recursive_call_check_trav(Node* node, ScanEnv* env)
r = infinite_recursive_call_check_trav(NODE_BODY(node), env);
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
- if (en->type == ENCLOSURE_MEMORY) {
+ if (en->type == BAG_MEMORY) {
if (NODE_IS_RECURSION(node) && NODE_IS_CALLED(node)) {
int ret;
@@ -3134,7 +3141,7 @@ infinite_recursive_call_check_trav(Node* node, ScanEnv* env)
NODE_STATUS_REMOVE(node, MARK1);
}
}
- else if (en->type == ENCLOSURE_IF_ELSE) {
+ else if (en->type == BAG_IF_ELSE) {
if (IS_NOT_NULL(en->te.Then)) {
r = infinite_recursive_call_check_trav(en->te.Then, env);
if (r != 0) return r;
@@ -3189,11 +3196,11 @@ recursive_call_check(Node* node)
}
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
- if (en->type == ENCLOSURE_MEMORY) {
+ if (en->type == BAG_MEMORY) {
if (NODE_IS_MARK2(node))
return 0;
else if (NODE_IS_MARK1(node))
@@ -3204,7 +3211,7 @@ recursive_call_check(Node* node)
NODE_STATUS_REMOVE(node, MARK2);
}
}
- else if (en->type == ENCLOSURE_IF_ELSE) {
+ else if (en->type == BAG_IF_ELSE) {
r = 0;
if (IS_NOT_NULL(en->te.Then)) {
r |= recursive_call_check(en->te.Then);
@@ -3265,13 +3272,13 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)
}
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
int ret;
int state1;
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
- if (en->type == ENCLOSURE_MEMORY) {
+ if (en->type == BAG_MEMORY) {
if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) {
if (! NODE_IS_RECURSION(node)) {
NODE_STATUS_ADD(node, MARK1);
@@ -3294,7 +3301,7 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)
if (ret == FOUND_CALLED_NODE)
r = FOUND_CALLED_NODE;
- if (en->type == ENCLOSURE_IF_ELSE) {
+ if (en->type == BAG_IF_ELSE) {
if (IS_NOT_NULL(en->te.Then)) {
ret = recursive_call_check_trav(en->te.Then, env, state1);
if (ret == FOUND_CALLED_NODE)
@@ -3318,6 +3325,15 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)
#endif
+#define IN_ALT (1<<0)
+#define IN_NOT (1<<1)
+#define IN_REAL_REPEAT (1<<2)
+#define IN_VAR_REPEAT (1<<3)
+#define IN_ZERO_REPEAT (1<<4)
+#define IN_MULTI_ENTRY (1<<5)
+#define IN_LOOK_BEHIND (1<<6)
+
+
/* divide different length alternatives in look-behind.
(?<=A|B) ==> (?<=A)|(?<=B)
(?<!A|B) ==> (?<!A)(?<!B)
@@ -3343,7 +3359,7 @@ divide_look_behind_alternatives(Node* node)
NODE_CAR(np) = insert_node;
}
- if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
+ if (anc_type == ANCR_LOOK_BEHIND_NOT) {
np = node;
do {
NODE_SET_TYPE(np, NODE_LIST); /* alt -> list */
@@ -3358,7 +3374,7 @@ setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
int r, len;
AnchorNode* an = ANCHOR_(node);
- r = get_char_length_tree(NODE_ANCHOR_BODY(an), reg, &len);
+ r = get_char_len_node(NODE_ANCHOR_BODY(an), reg, &len);
if (r == 0)
an->char_len = len;
else if (r == GET_CHAR_LEN_VARLEN)
@@ -3398,7 +3414,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
if (IS_NOT_NULL(x)) {
y = get_head_value_node(next_node, 0, reg);
if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {
- Node* en = onig_node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
+ Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK);
CHECK_NULL_RETURN_MEMERR(en);
NODE_STATUS_ADD(en, STOP_BT_SIMPLE_REPEAT);
swap_node(node, en);
@@ -3409,9 +3425,9 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
}
}
}
- else if (type == NODE_ENCLOSURE) {
- EnclosureNode* en = ENCLOSURE_(node);
- if (en->type == ENCLOSURE_MEMORY) {
+ else if (type == NODE_BAG) {
+ BagNode* en = BAG_(node);
+ if (en->type == BAG_MEMORY) {
node = NODE_BODY(node);
goto retry;
}
@@ -3527,7 +3543,7 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p
for (i = 0; i < item_num; i++) {
snode = onig_node_new_str(NULL, NULL);
if (IS_NULL(snode)) goto mem_err;
-
+
for (j = 0; j < items[i].code_len; j++) {
len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
if (len < 0) {
@@ -3544,7 +3560,7 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p
goto mem_err2;
}
- if (items[i].byte_len != slen) {
+ if (items[i].byte_len != slen && IS_NOT_NULL(var_anode)) {
Node *rem;
UChar *q = p + items[i].byte_len;
@@ -3596,37 +3612,69 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p
}
static int
-expand_case_fold_string(Node* node, regex_t* reg)
+is_good_case_fold_items_for_search(OnigEncoding enc, int slen,
+ int n, OnigCaseFoldCodeItem items[])
{
+ int i, len;
+ UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+
+ for (i = 0; i < n; i++) {
+ OnigCaseFoldCodeItem* item = items + i;
+
+ if (item->code_len != 1) return 0;
+ if (item->byte_len != slen) return 0;
+ len = ONIGENC_CODE_TO_MBC(enc, item->code[0], buf);
+ if (len != slen) return 0;
+ }
+
+ return 1;
+}
+
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
+static int
+expand_case_fold_string(Node* node, regex_t* reg, int state)
+{
int r, n, len, alt_num;
+ int fold_len;
+ int prev_is_ambig, prev_is_good, is_good, is_in_look_behind;
UChar *start, *end, *p;
+ UChar* foldp;
Node *top_root, *root, *snode, *prev_node;
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
- StrNode* sn = STR_(node);
+ UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ StrNode* sn;
if (NODE_STRING_IS_AMBIG(node)) return 0;
+ sn = STR_(node);
+
start = sn->s;
end = sn->end;
if (start >= end) return 0;
+ is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
+
r = 0;
top_root = root = prev_node = snode = NULL_NODE;
alt_num = 1;
p = start;
while (p < end) {
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, p, end,
- items);
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
+ p, end, items);
if (n < 0) {
r = n;
goto err;
}
len = enclen(reg->enc, p);
+ is_good = is_good_case_fold_items_for_search(reg->enc, len, n, items);
- if (n == 0) {
+ if (is_in_look_behind ||
+ (IS_NOT_NULL(snode) ||
+ (is_good
+ /* expand single char case: ex. /(?i:a)/ */
+ && !(p == start && p + len >= end)))) {
if (IS_NULL(snode)) {
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
top_root = root = onig_node_list_add(NULL_NODE, prev_node);
@@ -3644,10 +3692,49 @@ expand_case_fold_string(Node* node, regex_t* reg)
goto mem_err;
}
}
+
+ prev_is_ambig = -1; /* -1: new */
+ prev_is_good = 0; /* escape compiler warning */
+ }
+ else {
+ prev_is_ambig = NODE_STRING_IS_AMBIG(snode);
+ prev_is_good = NODE_STRING_IS_GOOD_AMBIG(snode);
}
- r = onig_node_str_cat(snode, p, p + len);
- if (r != 0) goto err;
+ if (n != 0) {
+ foldp = p;
+ fold_len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag,
+ &foldp, end, buf);
+ foldp = buf;
+ }
+ else {
+ foldp = p; fold_len = len;
+ }
+
+ if ((prev_is_ambig == 0 && n != 0) ||
+ (prev_is_ambig > 0 && (n == 0 || prev_is_good != is_good))) {
+ if (IS_NULL(root) /* && IS_NOT_NULL(prev_node) */) {
+ top_root = root = onig_node_list_add(NULL_NODE, prev_node);
+ if (IS_NULL(root)) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
+
+ prev_node = snode = onig_node_new_str(foldp, foldp + fold_len);
+ if (IS_NULL(snode)) goto mem_err;
+ if (IS_NULL(onig_node_list_add(root, snode))) {
+ onig_node_free(snode);
+ goto mem_err;
+ }
+ }
+ else {
+ r = onig_node_str_cat(snode, foldp, foldp + fold_len);
+ if (r != 0) goto err;
+ }
+
+ if (n != 0) NODE_STRING_SET_AMBIG(snode);
+ if (is_good != 0) NODE_STRING_SET_GOOD_AMBIG(snode);
}
else {
alt_num *= (n + 1);
@@ -3768,22 +3855,22 @@ quantifiers_memory_node_info(Node* node)
}
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
switch (en->type) {
- case ENCLOSURE_MEMORY:
+ case BAG_MEMORY:
if (NODE_IS_RECURSION(node)) {
return QUANT_BODY_IS_EMPTY_REC;
}
return QUANT_BODY_IS_EMPTY_MEM;
break;
- case ENCLOSURE_OPTION:
- case ENCLOSURE_STOP_BACKTRACK:
+ case BAG_OPTION:
+ case BAG_STOP_BACKTRACK:
r = quantifiers_memory_node_info(NODE_BODY(node));
break;
- case ENCLOSURE_IF_ELSE:
+ case BAG_IF_ELSE:
{
int v;
r = quantifiers_memory_node_info(NODE_BODY(node));
@@ -3797,8 +3884,6 @@ quantifiers_memory_node_info(Node* node)
}
}
break;
- default:
- break;
}
}
break;
@@ -3818,13 +3903,6 @@ quantifiers_memory_node_info(Node* node)
#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
-#define IN_ALT (1<<0)
-#define IN_NOT (1<<1)
-#define IN_REAL_REPEAT (1<<2)
-#define IN_VAR_REPEAT (1<<3)
-#define IN_ZERO_REPEAT (1<<4)
-#define IN_MULTI_ENTRY (1<<5)
-
#ifdef USE_CALL
#ifdef __GNUC__
@@ -3901,18 +3979,18 @@ setup_call2_call(Node* node)
setup_call2_call(NODE_BODY(node));
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
- if (en->type == ENCLOSURE_MEMORY) {
+ if (en->type == BAG_MEMORY) {
if (! NODE_IS_MARK1(node)) {
NODE_STATUS_ADD(node, MARK1);
setup_call2_call(NODE_BODY(node));
NODE_STATUS_REMOVE(node, MARK1);
}
}
- else if (en->type == ENCLOSURE_IF_ELSE) {
+ else if (en->type == BAG_IF_ELSE) {
setup_call2_call(NODE_BODY(node));
if (IS_NOT_NULL(en->te.Then))
setup_call2_call(en->te.Then);
@@ -3935,7 +4013,7 @@ setup_call2_call(Node* node)
cn->entry_count++;
NODE_STATUS_ADD(called, CALLED);
- ENCLOSURE_(called)->m.entry_count++;
+ BAG_(called)->m.entry_count++;
setup_call2_call(called);
}
NODE_STATUS_REMOVE(node, MARK1);
@@ -3974,18 +4052,18 @@ setup_call(Node* node, ScanEnv* env, int state)
r = 0;
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
- if (en->type == ENCLOSURE_MEMORY) {
+ if (en->type == BAG_MEMORY) {
if ((state & IN_ZERO_REPEAT) != 0) {
NODE_STATUS_ADD(node, IN_ZERO_REPEAT);
- ENCLOSURE_(node)->m.entry_count--;
+ BAG_(node)->m.entry_count--;
}
r = setup_call(NODE_BODY(node), env, state);
}
- else if (en->type == ENCLOSURE_IF_ELSE) {
+ else if (en->type == BAG_IF_ELSE) {
r = setup_call(NODE_BODY(node), env, state);
if (r != 0) return r;
if (IS_NOT_NULL(en->te.Then)) {
@@ -4040,15 +4118,15 @@ setup_call2(Node* node)
r = setup_call2(NODE_BODY(node));
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
if (! NODE_IS_IN_ZERO_REPEAT(node))
r = setup_call2(NODE_BODY(node));
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
if (r != 0) return r;
- if (en->type == ENCLOSURE_IF_ELSE) {
+ if (en->type == BAG_IF_ELSE) {
if (IS_NOT_NULL(en->te.Then)) {
r = setup_call2(en->te.Then);
if (r != 0) return r;
@@ -4104,12 +4182,12 @@ setup_called_state_call(Node* node, int state)
AnchorNode* an = ANCHOR_(node);
switch (an->type) {
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND_NOT:
+ case ANCR_PREC_READ_NOT:
+ case ANCR_LOOK_BEHIND_NOT:
state |= IN_NOT;
/* fall */
- case ANCHOR_PREC_READ:
- case ANCHOR_LOOK_BEHIND:
+ case ANCR_PREC_READ:
+ case ANCR_LOOK_BEHIND:
setup_called_state_call(NODE_ANCHOR_BODY(an), state);
break;
default:
@@ -4118,11 +4196,11 @@ setup_called_state_call(Node* node, int state)
}
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
- if (en->type == ENCLOSURE_MEMORY) {
+ if (en->type == BAG_MEMORY) {
if (NODE_IS_MARK1(node)) {
if ((~en->m.called_state & state) != 0) {
en->m.called_state |= state;
@@ -4136,7 +4214,7 @@ setup_called_state_call(Node* node, int state)
NODE_STATUS_REMOVE(node, MARK1);
}
}
- else if (en->type == ENCLOSURE_IF_ELSE) {
+ else if (en->type == BAG_IF_ELSE) {
if (IS_NOT_NULL(en->te.Then)) {
setup_called_state_call(en->te.Then, state);
}
@@ -4177,22 +4255,22 @@ setup_called_state(Node* node, int state)
break;
#endif
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
switch (en->type) {
- case ENCLOSURE_MEMORY:
+ case BAG_MEMORY:
if (en->m.entry_count > 1)
state |= IN_MULTI_ENTRY;
en->m.called_state |= state;
/* fall */
- case ENCLOSURE_OPTION:
- case ENCLOSURE_STOP_BACKTRACK:
+ case BAG_OPTION:
+ case BAG_STOP_BACKTRACK:
setup_called_state(NODE_BODY(node), state);
break;
- case ENCLOSURE_IF_ELSE:
+ case BAG_IF_ELSE:
setup_called_state(NODE_BODY(node), state);
if (IS_NOT_NULL(en->te.Then))
setup_called_state(en->te.Then, state);
@@ -4221,12 +4299,12 @@ setup_called_state(Node* node, int state)
AnchorNode* an = ANCHOR_(node);
switch (an->type) {
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND_NOT:
+ case ANCR_PREC_READ_NOT:
+ case ANCR_LOOK_BEHIND_NOT:
state |= IN_NOT;
/* fall */
- case ANCHOR_PREC_READ:
- case ANCHOR_LOOK_BEHIND:
+ case ANCR_PREC_READ:
+ case ANCR_LOOK_BEHIND:
setup_called_state(NODE_ANCHOR_BODY(an), state);
break;
default:
@@ -4259,56 +4337,57 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
/* allowed node types in look-behind */
#define ALLOWED_TYPE_IN_LB \
( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \
- | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_ENCLOSURE | NODE_BIT_QUANT \
+ | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_BAG | NODE_BIT_QUANT \
| NODE_BIT_CALL | NODE_BIT_GIMMICK)
-#define ALLOWED_ENCLOSURE_IN_LB ( 1<<ENCLOSURE_MEMORY | 1<<ENCLOSURE_OPTION )
-#define ALLOWED_ENCLOSURE_IN_LB_NOT (1<<ENCLOSURE_OPTION)
+#define ALLOWED_BAG_IN_LB ( 1<<BAG_MEMORY | 1<<BAG_OPTION )
+#define ALLOWED_BAG_IN_LB_NOT (1<<BAG_OPTION)
#define ALLOWED_ANCHOR_IN_LB \
- ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF \
- | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY | ANCHOR_NO_WORD_BOUNDARY \
- | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \
- | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
- | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
+ ( ANCR_LOOK_BEHIND | ANCR_BEGIN_LINE | ANCR_END_LINE | ANCR_BEGIN_BUF \
+ | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY | ANCR_NO_WORD_BOUNDARY \
+ | ANCR_WORD_BEGIN | ANCR_WORD_END \
+ | ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
+ | ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
#define ALLOWED_ANCHOR_IN_LB_NOT \
- ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE \
- | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY \
- | ANCHOR_NO_WORD_BOUNDARY | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \
- | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
- | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
+ ( ANCR_LOOK_BEHIND | ANCR_LOOK_BEHIND_NOT | ANCR_BEGIN_LINE \
+ | ANCR_END_LINE | ANCR_BEGIN_BUF | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY \
+ | ANCR_NO_WORD_BOUNDARY | ANCR_WORD_BEGIN | ANCR_WORD_END \
+ | ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
+ | ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
int r;
AnchorNode* an = ANCHOR_(node);
switch (an->type) {
- case ANCHOR_PREC_READ:
+ case ANCR_PREC_READ:
r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);
break;
- case ANCHOR_PREC_READ_NOT:
+ case ANCR_PREC_READ_NOT:
r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
break;
- case ANCHOR_LOOK_BEHIND:
+ case ANCR_LOOK_BEHIND:
{
r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,
- ALLOWED_ENCLOSURE_IN_LB, ALLOWED_ANCHOR_IN_LB);
+ ALLOWED_BAG_IN_LB, ALLOWED_ANCHOR_IN_LB);
if (r < 0) return r;
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);
+ r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_LOOK_BEHIND), env);
if (r != 0) return r;
r = setup_look_behind(node, reg, env);
}
break;
- case ANCHOR_LOOK_BEHIND_NOT:
+ case ANCR_LOOK_BEHIND_NOT:
{
r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,
- ALLOWED_ENCLOSURE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
+ ALLOWED_BAG_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
if (r < 0) return r;
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
+ r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_NOT|IN_LOOK_BEHIND),
+ env);
if (r != 0) return r;
r = setup_look_behind(node, reg, env);
}
@@ -4346,9 +4425,9 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
qn->body_empty_info = quantifiers_memory_node_info(body);
if (qn->body_empty_info == QUANT_BODY_IS_EMPTY_REC) {
- if (NODE_TYPE(body) == NODE_ENCLOSURE &&
- ENCLOSURE_(body)->type == ENCLOSURE_MEMORY) {
- MEM_STATUS_ON(env->bt_mem_end, ENCLOSURE_(body)->m.regnum);
+ if (NODE_TYPE(body) == NODE_BAG &&
+ BAG_(body)->type == BAG_MEMORY) {
+ MEM_STATUS_ON(env->bt_mem_end, BAG_(body)->m.regnum);
}
}
#else
@@ -4439,7 +4518,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
case NODE_STRING:
if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) {
- r = expand_case_fold_string(node, reg);
+ r = expand_case_fold_string(node, reg, state);
}
break;
@@ -4462,21 +4541,21 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
}
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
switch (en->type) {
- case ENCLOSURE_OPTION:
+ case BAG_OPTION:
{
OnigOptionType options = reg->options;
- reg->options = ENCLOSURE_(node)->o.options;
+ reg->options = BAG_(node)->o.options;
r = setup_tree(NODE_BODY(node), reg, state, env);
reg->options = options;
}
break;
- case ENCLOSURE_MEMORY:
+ case BAG_MEMORY:
#ifdef USE_CALL
state |= en->m.called_state;
#endif
@@ -4488,7 +4567,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
r = setup_tree(NODE_BODY(node), reg, state, env);
break;
- case ENCLOSURE_STOP_BACKTRACK:
+ case BAG_STOP_BACKTRACK:
{
Node* target = NODE_BODY(node);
r = setup_tree(target, reg, state, env);
@@ -4503,7 +4582,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
}
break;
- case ENCLOSURE_IF_ELSE:
+ case BAG_IF_ELSE:
r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env);
if (r != 0) return r;
if (IS_NOT_NULL(en->te.Then)) {
@@ -4538,35 +4617,83 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
return r;
}
-/* set skip map for Boyer-Moore search */
static int
-set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
- UChar skip[], int** int_skip)
+set_sunday_quick_search_or_bmh_skip_table(regex_t* reg, int case_expand,
+ UChar* s, UChar* end,
+ UChar skip[], int* roffset)
{
- int i, len;
+ int i, j, k, len, offset;
+ int n, clen;
+ UChar* p;
+ OnigEncoding enc;
+ OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
+ UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+
+ enc = reg->enc;
+ offset = ENC_GET_SKIP_OFFSET(enc);
+ if (offset == ENC_SKIP_OFFSET_1_OR_0) {
+ UChar* p = s;
+ while (1) {
+ len = enclen(enc, p);
+ if (p + len >= end) {
+ if (len == 1) offset = 1;
+ else offset = 0;
+ break;
+ }
+ p += len;
+ }
+ }
len = (int )(end - s);
- if (len < ONIG_CHAR_TABLE_SIZE) {
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
+ if (len + offset >= UCHAR_MAX)
+ return ONIGERR_PARSER_BUG;
- for (i = 0; i < len - 1; i++)
- skip[s[i]] = len - 1 - i;
+ *roffset = offset;
+
+ for (i = 0; i < CHAR_MAP_SIZE; i++) {
+ skip[i] = (UChar )(len + offset);
}
- else {
- if (IS_NULL(*int_skip)) {
- *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
- if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
+
+ for (p = s; p < end; ) {
+ int z;
+
+ clen = enclen(enc, p);
+ if (p + clen > end) clen = (int )(end - p);
+
+ len = (int )(end - p);
+ for (j = 0; j < clen; j++) {
+ z = len - j + (offset - 1);
+ if (z <= 0) break;
+ skip[p[j]] = z;
+ }
+
+ if (case_expand != 0) {
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
+ p, end, items);
+ for (k = 0; k < n; k++) {
+ ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
+ for (j = 0; j < clen; j++) {
+ z = len - j + (offset - 1);
+ if (z <= 0) break;
+ if (skip[buf[j]] > z)
+ skip[buf[j]] = z;
+ }
+ }
}
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
- for (i = 0; i < len - 1; i++)
- (*int_skip)[s[i]] = len - 1 - i;
+ p += clen;
}
+
return 0;
}
+
#define OPT_EXACT_MAXLEN 24
+#if OPT_EXACT_MAXLEN >= UCHAR_MAX
+#error Too big OPT_EXACT_MAXLEN
+#endif
+
typedef struct {
OnigLen min; /* min byte length */
OnigLen max; /* max byte length */
@@ -4589,26 +4716,27 @@ typedef struct {
MinMax mmd; /* position */
OptAnc anc;
int reach_end;
- int ignore_case;
+ int case_fold;
+ int good_case_fold;
int len;
UChar s[OPT_EXACT_MAXLEN];
-} OptExact;
+} OptStr;
typedef struct {
MinMax mmd; /* position */
OptAnc anc;
int value; /* weighted value */
- UChar map[ONIG_CHAR_TABLE_SIZE];
+ UChar map[CHAR_MAP_SIZE];
} OptMap;
typedef struct {
- MinMax len;
- OptAnc anc;
- OptExact exb; /* boundary */
- OptExact exm; /* middle */
- OptExact expr; /* prec read (?=...) */
- OptMap map; /* boundary */
-} NodeOpt;
+ MinMax len;
+ OptAnc anc;
+ OptStr sb; /* boundary */
+ OptStr sm; /* middle */
+ OptStr spr; /* prec read (?=...) */
+ OptMap map; /* boundary */
+} OptNode;
static int
@@ -4640,15 +4768,15 @@ distance_value(MinMax* mm)
{
/* 1000 / (min-max-dist + 1) */
static const short int dist_vals[] = {
- 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
- 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
- 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
- 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
- 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
- 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
- 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
- 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
- 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
+ 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
+ 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
+ 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
+ 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
+ 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
+ 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
+ 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
+ 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
+ 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
11, 11, 11, 11, 11, 10, 10, 10, 10, 10
};
@@ -4684,7 +4812,7 @@ comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2)
static int
is_equal_mml(MinMax* a, MinMax* b)
{
- return (a->min == b->min && a->max == b->max) ? 1 : 0;
+ return a->min == b->min && a->max == b->max;
}
static void
@@ -4756,15 +4884,15 @@ concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right,
to->right |= left->right;
}
else {
- to->right |= (left->right & ANCHOR_PREC_READ_NOT);
+ to->right |= (left->right & ANCR_PREC_READ_NOT);
}
}
static int
is_left(int a)
{
- if (a == ANCHOR_END_BUF || a == ANCHOR_SEMI_END_BUF ||
- a == ANCHOR_END_LINE || a == ANCHOR_PREC_READ || a == ANCHOR_PREC_READ_NOT)
+ if (a == ANCR_END_BUF || a == ANCR_SEMI_END_BUF ||
+ a == ANCR_END_LINE || a == ANCR_PREC_READ || a == ANCR_PREC_READ_NOT)
return 0;
return 1;
@@ -4804,39 +4932,47 @@ alt_merge_opt_anc_info(OptAnc* to, OptAnc* add)
}
static int
-is_full_opt_exact(OptExact* e)
+is_full_opt_exact(OptStr* e)
{
- return (e->len >= OPT_EXACT_MAXLEN ? 1 : 0);
+ return e->len >= OPT_EXACT_MAXLEN;
}
static void
-clear_opt_exact(OptExact* e)
+clear_opt_exact(OptStr* e)
{
clear_mml(&e->mmd);
clear_opt_anc_info(&e->anc);
- e->reach_end = 0;
- e->ignore_case = 0;
- e->len = 0;
- e->s[0] = '\0';
+ e->reach_end = 0;
+ e->case_fold = 0;
+ e->good_case_fold = 0;
+ e->len = 0;
+ e->s[0] = '\0';
}
static void
-copy_opt_exact(OptExact* to, OptExact* from)
+copy_opt_exact(OptStr* to, OptStr* from)
{
*to = *from;
}
static int
-concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc)
+concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc)
{
int i, j, len, r;
UChar *p, *end;
OptAnc tanc;
- if (! to->ignore_case && add->ignore_case) {
- if (to->len >= add->len) return 0; /* avoid */
+ if (add->case_fold != 0) {
+ if (! to->case_fold) {
+ if (to->len > 1 || to->len >= add->len) return 0; /* avoid */
- to->ignore_case = 1;
+ to->case_fold = 1;
+ }
+ else {
+ if (to->good_case_fold != 0) {
+ if (add->good_case_fold == 0) return 0;
+ }
+ }
}
r = 0;
@@ -4863,7 +4999,7 @@ concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc)
}
static void
-concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc)
+concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc)
{
int i, j, len;
UChar *p;
@@ -4876,10 +5012,13 @@ concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc)
}
to->len = i;
+
+ if (p >= end && to->len == (int )(end - s))
+ to->reach_end = 1;
}
static void
-alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env)
+alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env)
{
int i, j, len;
@@ -4908,14 +5047,17 @@ alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env)
to->reach_end = 0;
}
to->len = i;
- to->ignore_case |= add->ignore_case;
+ if (add->case_fold != 0)
+ to->case_fold = 1;
+ if (add->good_case_fold == 0)
+ to->good_case_fold = 0;
alt_merge_opt_anc_info(&to->anc, &add->anc);
if (! to->reach_end) to->anc.right = 0;
}
static void
-select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt)
+select_opt_exact(OnigEncoding enc, OptStr* now, OptStr* alt)
{
int vn, va;
@@ -4938,8 +5080,11 @@ select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt)
if (alt->len > 1) va += 5;
}
- if (now->ignore_case == 0) vn *= 2;
- if (alt->ignore_case == 0) va *= 2;
+ if (now->case_fold == 0) vn *= 2;
+ if (alt->case_fold == 0) va *= 2;
+
+ if (now->good_case_fold != 0) vn *= 4;
+ if (alt->good_case_fold != 0) va *= 4;
if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
copy_opt_exact(now, alt);
@@ -5030,14 +5175,24 @@ select_opt_map(OptMap* now, OptMap* alt)
}
static int
-comp_opt_exact_or_map(OptExact* e, OptMap* m)
+comp_opt_exact_or_map(OptStr* e, OptMap* m)
{
#define COMP_EM_BASE 20
int ae, am;
+ int case_value;
if (m->value <= 0) return -1;
- ae = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
+ if (e->case_fold != 0) {
+ if (e->good_case_fold != 0)
+ case_value = 2;
+ else
+ case_value = 1;
+ }
+ else
+ case_value = 3;
+
+ ae = COMP_EM_BASE * e->len * case_value;
am = COMP_EM_BASE * 5 * 2 / m->value;
return comp_distance_value(&e->mmd, &m->mmd, ae, am);
}
@@ -5057,7 +5212,7 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)
alt_merge_mml(&to->mmd, &add->mmd);
val = 0;
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
+ for (i = 0; i < CHAR_MAP_SIZE; i++) {
if (add->map[i])
to->map[i] = 1;
@@ -5070,42 +5225,42 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)
}
static void
-set_bound_node_opt_info(NodeOpt* opt, MinMax* plen)
+set_bound_node_opt_info(OptNode* opt, MinMax* plen)
{
- copy_mml(&(opt->exb.mmd), plen);
- copy_mml(&(opt->expr.mmd), plen);
- copy_mml(&(opt->map.mmd), plen);
+ copy_mml(&(opt->sb.mmd), plen);
+ copy_mml(&(opt->spr.mmd), plen);
+ copy_mml(&(opt->map.mmd), plen);
}
static void
-clear_node_opt_info(NodeOpt* opt)
+clear_node_opt_info(OptNode* opt)
{
clear_mml(&opt->len);
clear_opt_anc_info(&opt->anc);
- clear_opt_exact(&opt->exb);
- clear_opt_exact(&opt->exm);
- clear_opt_exact(&opt->expr);
+ clear_opt_exact(&opt->sb);
+ clear_opt_exact(&opt->sm);
+ clear_opt_exact(&opt->spr);
clear_opt_map(&opt->map);
}
static void
-copy_node_opt_info(NodeOpt* to, NodeOpt* from)
+copy_node_opt_info(OptNode* to, OptNode* from)
{
*to = *from;
}
static void
-concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add)
+concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add)
{
- int exb_reach, exm_reach;
+ int sb_reach, sm_reach;
OptAnc tanc;
concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
copy_opt_anc_info(&to->anc, &tanc);
- if (add->exb.len > 0 && to->len.max == 0) {
- concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, to->len.max, add->len.max);
- copy_opt_anc_info(&add->exb.anc, &tanc);
+ if (add->sb.len > 0 && to->len.max == 0) {
+ concat_opt_anc_info(&tanc, &to->anc, &add->sb.anc, to->len.max, add->len.max);
+ copy_opt_anc_info(&add->sb.anc, &tanc);
}
if (add->map.value > 0 && to->len.max == 0) {
@@ -5113,38 +5268,38 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add)
add->map.anc.left |= to->anc.left;
}
- exb_reach = to->exb.reach_end;
- exm_reach = to->exm.reach_end;
+ sb_reach = to->sb.reach_end;
+ sm_reach = to->sm.reach_end;
if (add->len.max != 0)
- to->exb.reach_end = to->exm.reach_end = 0;
+ to->sb.reach_end = to->sm.reach_end = 0;
- if (add->exb.len > 0) {
- if (exb_reach) {
- concat_opt_exact(&to->exb, &add->exb, enc);
- clear_opt_exact(&add->exb);
+ if (add->sb.len > 0) {
+ if (sb_reach) {
+ concat_opt_exact(&to->sb, &add->sb, enc);
+ clear_opt_exact(&add->sb);
}
- else if (exm_reach) {
- concat_opt_exact(&to->exm, &add->exb, enc);
- clear_opt_exact(&add->exb);
+ else if (sm_reach) {
+ concat_opt_exact(&to->sm, &add->sb, enc);
+ clear_opt_exact(&add->sb);
}
}
- select_opt_exact(enc, &to->exm, &add->exb);
- select_opt_exact(enc, &to->exm, &add->exm);
+ select_opt_exact(enc, &to->sm, &add->sb);
+ select_opt_exact(enc, &to->sm, &add->sm);
- if (to->expr.len > 0) {
+ if (to->spr.len > 0) {
if (add->len.max > 0) {
- if (to->expr.len > (int )add->len.max)
- to->expr.len = add->len.max;
+ if (to->spr.len > (int )add->len.max)
+ to->spr.len = add->len.max;
- if (to->expr.mmd.max == 0)
- select_opt_exact(enc, &to->exb, &to->expr);
+ if (to->spr.mmd.max == 0)
+ select_opt_exact(enc, &to->sb, &to->spr);
else
- select_opt_exact(enc, &to->exm, &to->expr);
+ select_opt_exact(enc, &to->sm, &to->spr);
}
}
- else if (add->expr.len > 0) {
- copy_opt_exact(&to->expr, &add->expr);
+ else if (add->spr.len > 0) {
+ copy_opt_exact(&to->spr, &add->spr);
}
select_opt_map(&to->map, &add->map);
@@ -5152,12 +5307,12 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add)
}
static void
-alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env)
+alt_merge_node_opt_info(OptNode* to, OptNode* add, OptEnv* env)
{
alt_merge_opt_anc_info(&to->anc, &add->anc);
- alt_merge_opt_exact(&to->exb, &add->exb, env);
- alt_merge_opt_exact(&to->exm, &add->exm, env);
- alt_merge_opt_exact(&to->expr, &add->expr, env);
+ alt_merge_opt_exact(&to->sb, &add->sb, env);
+ alt_merge_opt_exact(&to->sm, &add->sm, env);
+ alt_merge_opt_exact(&to->spr, &add->spr, env);
alt_merge_opt_map(env->enc, &to->map, &add->map);
alt_merge_mml(&to->len, &add->len);
@@ -5167,11 +5322,11 @@ alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env)
#define MAX_NODE_OPT_INFO_REF_COUNT 5
static int
-optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
+optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
{
int i;
int r;
- NodeOpt xo;
+ OptNode xo;
OnigEncoding enc;
r = 0;
@@ -5217,7 +5372,7 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
/* int is_raw = NODE_STRING_IS_RAW(node); */
if (! NODE_STRING_IS_AMBIG(node)) {
- concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);
+ concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);
if (slen > 0) {
add_char_opt_map(&opt->map, *(sn->s), enc);
}
@@ -5231,8 +5386,10 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;
}
else {
- concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);
- opt->exb.ignore_case = 1;
+ concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);
+ opt->sb.case_fold = 1;
+ if (NODE_STRING_IS_GOOD_AMBIG(node))
+ opt->sb.good_case_fold = 1;
if (slen > 0) {
r = add_char_amb_opt_map(&opt->map, sn->s, sn->end,
@@ -5245,9 +5402,6 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
set_mml(&opt->len, slen, max);
}
-
- if (opt->exb.len == slen)
- opt->exb.reach_end = 1;
}
break;
@@ -5321,27 +5475,27 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
case NODE_ANCHOR:
switch (ANCHOR_(node)->type) {
- case ANCHOR_BEGIN_BUF:
- case ANCHOR_BEGIN_POSITION:
- case ANCHOR_BEGIN_LINE:
- case ANCHOR_END_BUF:
- case ANCHOR_SEMI_END_BUF:
- case ANCHOR_END_LINE:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
+ case ANCR_BEGIN_BUF:
+ case ANCR_BEGIN_POSITION:
+ case ANCR_BEGIN_LINE:
+ case ANCR_END_BUF:
+ case ANCR_SEMI_END_BUF:
+ case ANCR_END_LINE:
+ case ANCR_PREC_READ_NOT:
+ case ANCR_LOOK_BEHIND:
add_opt_anc_info(&opt->anc, ANCHOR_(node)->type);
break;
- case ANCHOR_PREC_READ:
+ case ANCR_PREC_READ:
{
r = optimize_nodes(NODE_BODY(node), &xo, env);
if (r == 0) {
- if (xo.exb.len > 0)
- copy_opt_exact(&opt->expr, &xo.exb);
- else if (xo.exm.len > 0)
- copy_opt_exact(&opt->expr, &xo.exm);
+ if (xo.sb.len > 0)
+ copy_opt_exact(&opt->spr, &xo.sb);
+ else if (xo.sm.len > 0)
+ copy_opt_exact(&opt->spr, &xo.sm);
- opt->expr.reach_end = 0;
+ opt->spr.reach_end = 0;
if (xo.map.value > 0)
copy_opt_map(&opt->map, &xo.map);
@@ -5349,7 +5503,7 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
}
break;
- case ANCHOR_LOOK_BEHIND_NOT:
+ case ANCR_LOOK_BEHIND_NOT:
break;
}
break;
@@ -5384,7 +5538,7 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
set_mml(&opt->len, 0, INFINITE_LEN);
else {
OnigOptionType save = env->options;
- env->options = ENCLOSURE_(NODE_BODY(node))->o.options;
+ env->options = BAG_(NODE_BODY(node))->o.options;
r = optimize_nodes(NODE_BODY(node), opt, env);
env->options = save;
}
@@ -5401,31 +5555,31 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
if (qn->lower > 0) {
copy_node_opt_info(opt, &xo);
- if (xo.exb.len > 0) {
- if (xo.exb.reach_end) {
- for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) {
- int rc = concat_opt_exact(&opt->exb, &xo.exb, enc);
+ if (xo.sb.len > 0) {
+ if (xo.sb.reach_end) {
+ for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->sb); i++) {
+ int rc = concat_opt_exact(&opt->sb, &xo.sb, enc);
if (rc > 0) break;
}
- if (i < qn->lower) opt->exb.reach_end = 0;
+ if (i < qn->lower) opt->sb.reach_end = 0;
}
}
if (qn->lower != qn->upper) {
- opt->exb.reach_end = 0;
- opt->exm.reach_end = 0;
+ opt->sb.reach_end = 0;
+ opt->sm.reach_end = 0;
}
if (qn->lower > 1)
- opt->exm.reach_end = 0;
+ opt->sm.reach_end = 0;
}
if (IS_REPEAT_INFINITE(qn->upper)) {
if (env->mmd.max == 0 &&
NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {
if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))
- add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_ML);
+ add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_ML);
else
- add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF);
+ add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF);
}
max = (xo.len.max > 0 ? INFINITE_LEN : 0);
@@ -5439,12 +5593,12 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
}
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
{
- EnclosureNode* en = ENCLOSURE_(node);
+ BagNode* en = BAG_(node);
switch (en->type) {
- case ENCLOSURE_OPTION:
+ case BAG_OPTION:
{
OnigOptionType save = env->options;
@@ -5454,7 +5608,7 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
}
break;
- case ENCLOSURE_MEMORY:
+ case BAG_MEMORY:
#ifdef USE_CALL
en->opt_count++;
if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
@@ -5470,23 +5624,23 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
#endif
{
r = optimize_nodes(NODE_BODY(node), opt, env);
- if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK)) {
+ if (is_set_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK)) {
if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum))
- remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK);
+ remove_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK);
}
}
break;
- case ENCLOSURE_STOP_BACKTRACK:
+ case BAG_STOP_BACKTRACK:
r = optimize_nodes(NODE_BODY(node), opt, env);
break;
- case ENCLOSURE_IF_ELSE:
+ case BAG_IF_ELSE:
{
OptEnv nenv;
copy_opt_env(&nenv, env);
- r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &xo, &nenv);
+ r = optimize_nodes(NODE_BAG_BODY(en), &xo, &nenv);
if (r == 0) {
add_mml(&nenv.mmd, &xo.len);
concat_left_node_opt_info(enc, opt, &xo);
@@ -5524,39 +5678,47 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
}
static int
-set_optimize_exact(regex_t* reg, OptExact* e)
+set_optimize_exact(regex_t* reg, OptStr* e)
{
int r;
if (e->len == 0) return 0;
- if (e->ignore_case) {
- reg->exact = (UChar* )xmalloc(e->len);
- CHECK_NULL_RETURN_MEMERR(reg->exact);
- xmemcpy(reg->exact, e->s, e->len);
- reg->exact_end = reg->exact + e->len;
- reg->optimize = OPTIMIZE_EXACT_IC;
+ reg->exact = (UChar* )xmalloc(e->len);
+ CHECK_NULL_RETURN_MEMERR(reg->exact);
+ xmemcpy(reg->exact, e->s, e->len);
+ reg->exact_end = reg->exact + e->len;
+
+ if (e->case_fold) {
+ reg->optimize = OPTIMIZE_STR_CASE_FOLD;
+ if (e->good_case_fold != 0) {
+ if (e->len >= 2) {
+ r = set_sunday_quick_search_or_bmh_skip_table(reg, 1,
+ reg->exact, reg->exact_end,
+ reg->map, &(reg->map_offset));
+ if (r != 0) return r;
+ reg->optimize = OPTIMIZE_STR_CASE_FOLD_FAST;
+ }
+ }
}
else {
int allow_reverse;
- reg->exact = onigenc_strdup(reg->enc, e->s, e->s + e->len);
- CHECK_NULL_RETURN_MEMERR(reg->exact);
- reg->exact_end = reg->exact + e->len;
-
allow_reverse =
ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
- if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
- r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
- reg->map, &(reg->int_map));
+ if (e->len >= 2 || (e->len >= 1 && allow_reverse)) {
+ r = set_sunday_quick_search_or_bmh_skip_table(reg, 0,
+ reg->exact, reg->exact_end,
+ reg->map, &(reg->map_offset));
if (r != 0) return r;
reg->optimize = (allow_reverse != 0
- ? OPTIMIZE_EXACT_BM : OPTIMIZE_EXACT_BM_NO_REV);
+ ? OPTIMIZE_STR_FAST
+ : OPTIMIZE_STR_FAST_STEP_FORWARD);
}
else {
- reg->optimize = OPTIMIZE_EXACT;
+ reg->optimize = OPTIMIZE_STR;
}
}
@@ -5575,7 +5737,7 @@ set_optimize_map(regex_t* reg, OptMap* m)
{
int i;
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ for (i = 0; i < CHAR_MAP_SIZE; i++)
reg->map[i] = m->map[i];
reg->optimize = OPTIMIZE_MAP;
@@ -5590,8 +5752,8 @@ set_optimize_map(regex_t* reg, OptMap* m)
static void
set_sub_anchor(regex_t* reg, OptAnc* anc)
{
- reg->sub_anchor |= anc->left & ANCHOR_BEGIN_LINE;
- reg->sub_anchor |= anc->right & ANCHOR_END_LINE;
+ reg->sub_anchor |= anc->left & ANCR_BEGIN_LINE;
+ reg->sub_anchor |= anc->right & ANCR_END_LINE;
}
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
@@ -5602,7 +5764,7 @@ static int
set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
{
int r;
- NodeOpt opt;
+ OptNode opt;
OptEnv env;
env.enc = reg->enc;
@@ -5614,29 +5776,29 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
r = optimize_nodes(node, &opt, &env);
if (r != 0) return r;
- reg->anchor = opt.anc.left & (ANCHOR_BEGIN_BUF |
- ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML |
- ANCHOR_LOOK_BEHIND);
+ reg->anchor = opt.anc.left & (ANCR_BEGIN_BUF |
+ ANCR_BEGIN_POSITION | ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML |
+ ANCR_LOOK_BEHIND);
- if ((opt.anc.left & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
- reg->anchor &= ~ANCHOR_ANYCHAR_INF_ML;
+ if ((opt.anc.left & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) != 0)
+ reg->anchor &= ~ANCR_ANYCHAR_INF_ML;
- reg->anchor |= opt.anc.right & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
- ANCHOR_PREC_READ_NOT);
+ reg->anchor |= opt.anc.right & (ANCR_END_BUF | ANCR_SEMI_END_BUF |
+ ANCR_PREC_READ_NOT);
- if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
+ if (reg->anchor & (ANCR_END_BUF | ANCR_SEMI_END_BUF)) {
reg->anchor_dmin = opt.len.min;
reg->anchor_dmax = opt.len.max;
}
- if (opt.exb.len > 0 || opt.exm.len > 0) {
- select_opt_exact(reg->enc, &opt.exb, &opt.exm);
- if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.exb, &opt.map) > 0) {
+ if (opt.sb.len > 0 || opt.sm.len > 0) {
+ select_opt_exact(reg->enc, &opt.sb, &opt.sm);
+ if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.sb, &opt.map) > 0) {
goto set_map;
}
else {
- r = set_optimize_exact(reg, &opt.exb);
- set_sub_anchor(reg, &opt.exb.anc);
+ r = set_optimize_exact(reg, &opt.sb);
+ set_sub_anchor(reg, &opt.sb.anc);
}
}
else if (opt.map.value > 0) {
@@ -5645,9 +5807,9 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
set_sub_anchor(reg, &opt.map.anc);
}
else {
- reg->sub_anchor |= opt.anc.left & ANCHOR_BEGIN_LINE;
+ reg->sub_anchor |= opt.anc.left & ANCR_BEGIN_LINE;
if (opt.len.max == 0)
- reg->sub_anchor |= opt.anc.right & ANCHOR_END_LINE;
+ reg->sub_anchor |= opt.anc.right & ANCR_END_LINE;
}
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
@@ -5665,6 +5827,7 @@ clear_optimize_info(regex_t* reg)
reg->anchor_dmax = 0;
reg->sub_anchor = 0;
reg->exact_end = (UChar* )NULL;
+ reg->map_offset = 0;
reg->threshold_len = 0;
if (IS_NOT_NULL(reg->exact)) {
xfree(reg->exact);
@@ -5733,41 +5896,41 @@ print_anchor(FILE* f, int anchor)
fprintf(f, "[");
- if (anchor & ANCHOR_BEGIN_BUF) {
+ if (anchor & ANCR_BEGIN_BUF) {
fprintf(f, "begin-buf");
q = 1;
}
- if (anchor & ANCHOR_BEGIN_LINE) {
+ if (anchor & ANCR_BEGIN_LINE) {
if (q) fprintf(f, ", ");
q = 1;
fprintf(f, "begin-line");
}
- if (anchor & ANCHOR_BEGIN_POSITION) {
+ if (anchor & ANCR_BEGIN_POSITION) {
if (q) fprintf(f, ", ");
q = 1;
fprintf(f, "begin-pos");
}
- if (anchor & ANCHOR_END_BUF) {
+ if (anchor & ANCR_END_BUF) {
if (q) fprintf(f, ", ");
q = 1;
fprintf(f, "end-buf");
}
- if (anchor & ANCHOR_SEMI_END_BUF) {
+ if (anchor & ANCR_SEMI_END_BUF) {
if (q) fprintf(f, ", ");
q = 1;
fprintf(f, "semi-end-buf");
}
- if (anchor & ANCHOR_END_LINE) {
+ if (anchor & ANCR_END_LINE) {
if (q) fprintf(f, ", ");
q = 1;
fprintf(f, "end-line");
}
- if (anchor & ANCHOR_ANYCHAR_INF) {
+ if (anchor & ANCR_ANYCHAR_INF) {
if (q) fprintf(f, ", ");
q = 1;
fprintf(f, "anychar-inf");
}
- if (anchor & ANCHOR_ANYCHAR_INF_ML) {
+ if (anchor & ANCR_ANYCHAR_INF_ML) {
if (q) fprintf(f, ", ");
fprintf(f, "anychar-inf-ml");
}
@@ -5778,12 +5941,13 @@ print_anchor(FILE* f, int anchor)
static void
print_optimize_info(FILE* f, regex_t* reg)
{
- static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
- "EXACT_IC", "MAP" };
+ static const char* on[] = { "NONE", "STR",
+ "STR_FAST", "STR_FAST_STEP_FORWARD",
+ "STR_CASE_FOLD_FAST", "STR_CASE_FOLD", "MAP" };
fprintf(f, "optimize: %s\n", on[reg->optimize]);
fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
- if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
+ if ((reg->anchor & ANCR_END_BUF_MASK) != 0)
print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
fprintf(f, "\n");
@@ -5804,14 +5968,14 @@ print_optimize_info(FILE* f, regex_t* reg)
else if (reg->optimize & OPTIMIZE_MAP) {
int c, i, n = 0;
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ for (i = 0; i < CHAR_MAP_SIZE; i++)
if (reg->map[i]) n++;
fprintf(f, "map: n=%d\n", n);
if (n > 0) {
c = 0;
fputc('[', f);
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
+ for (i = 0; i < CHAR_MAP_SIZE; i++) {
if (reg->map[i] != 0) {
if (c > 0) fputs(", ", f);
c++;
@@ -5832,7 +5996,7 @@ print_optimize_info(FILE* f, regex_t* reg)
extern RegexExt*
onig_get_regex_ext(regex_t* reg)
{
- if (IS_NULL(REG_EXTP(reg))) {
+ if (IS_NULL(reg->extp)) {
RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext));
if (IS_NULL(ext)) return 0;
@@ -5845,10 +6009,10 @@ onig_get_regex_ext(regex_t* reg)
ext->callout_list = 0;
#endif
- REG_EXTPL(reg) = (void* )ext;
+ reg->extp = ext;
}
- return REG_EXTP(reg);
+ return reg->extp;
}
static void
@@ -5895,12 +6059,10 @@ onig_free_body(regex_t* reg)
if (IS_NOT_NULL(reg)) {
if (IS_NOT_NULL(reg->p)) xfree(reg->p);
if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
- if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
- if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
- if (IS_NOT_NULL(REG_EXTP(reg))) {
- free_regex_ext(REG_EXTP(reg));
- REG_EXTPL(reg) = 0;
+ if (IS_NOT_NULL(reg->extp)) {
+ free_regex_ext(reg->extp);
+ reg->extp = 0;
}
onig_names_free(reg);
@@ -6060,7 +6222,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)
#ifdef USE_CALLOUT
- || (IS_NOT_NULL(REG_EXTP(reg)) && REG_EXTP(reg)->callout_num != 0)
+ || (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0)
#endif
)
reg->stack_pop_level = STACK_POP_LEVEL_ALL;
@@ -6152,9 +6314,7 @@ onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_fl
(reg)->syntax = syntax;
(reg)->optimize = 0;
(reg)->exact = (UChar* )NULL;
- (reg)->int_map = (int* )NULL;
- (reg)->int_map_backward = (int* )NULL;
- REG_EXTPL(reg) = NULL;
+ (reg)->extp = (RegexExt* )NULL;
(reg)->p = (UChar* )NULL;
(reg)->alloc = 0;
@@ -6309,11 +6469,11 @@ onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_
found = 0;
}
else {
- found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
+ found = onig_is_in_code_range(cc->mbuf->p, code) != 0;
}
}
else {
- found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
+ found = BITSET_AT(cc->bs, code) != 0;
}
if (IS_NCCLASS_NOT(cc))
@@ -6387,12 +6547,35 @@ print_indent_tree(FILE* f, Node* node, int indent)
break;
case NODE_STRING:
- fprintf(f, "<string%s:%p>", (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node);
- for (p = STR_(node)->s; p < STR_(node)->end; p++) {
- if (*p >= 0x20 && *p < 0x7f)
- fputc(*p, f);
- else {
- fprintf(f, " 0x%02x", *p);
+ {
+ char* mode;
+ char* dont;
+ char* good;
+
+ if (NODE_STRING_IS_RAW(node))
+ mode = "-raw";
+ else if (NODE_STRING_IS_AMBIG(node))
+ mode = "-ambig";
+ else
+ mode = "";
+
+ if (NODE_STRING_IS_GOOD_AMBIG(node))
+ good = "-good";
+ else
+ good = "";
+
+ if (NODE_STRING_IS_DONT_GET_OPT_INFO(node))
+ dont = " (dont-opt)";
+ else
+ dont = "";
+
+ fprintf(f, "<string%s%s%s:%p>", mode, good, dont, node);
+ for (p = STR_(node)->s; p < STR_(node)->end; p++) {
+ if (*p >= 0x20 && *p < 0x7f)
+ fputc(*p, f);
+ else {
+ fprintf(f, " 0x%02x", *p);
+ }
}
}
break;
@@ -6436,36 +6619,36 @@ print_indent_tree(FILE* f, Node* node, int indent)
case NODE_ANCHOR:
fprintf(f, "<anchor:%p> ", node);
switch (ANCHOR_(node)->type) {
- case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
- case ANCHOR_END_BUF: fputs("end buf", f); break;
- case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
- case ANCHOR_END_LINE: fputs("end line", f); break;
- case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
- case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
-
- case ANCHOR_WORD_BOUNDARY: fputs("word boundary", f); break;
- case ANCHOR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;
+ case ANCR_BEGIN_BUF: fputs("begin buf", f); break;
+ case ANCR_END_BUF: fputs("end buf", f); break;
+ case ANCR_BEGIN_LINE: fputs("begin line", f); break;
+ case ANCR_END_LINE: fputs("end line", f); break;
+ case ANCR_SEMI_END_BUF: fputs("semi end buf", f); break;
+ case ANCR_BEGIN_POSITION: fputs("begin position", f); break;
+
+ case ANCR_WORD_BOUNDARY: fputs("word boundary", f); break;
+ case ANCR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;
#ifdef USE_WORD_BEGIN_END
- case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
- case ANCHOR_WORD_END: fputs("word end", f); break;
+ case ANCR_WORD_BEGIN: fputs("word begin", f); break;
+ case ANCR_WORD_END: fputs("word end", f); break;
#endif
- case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+ case ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
fputs("extended-grapheme-cluster boundary", f); break;
- case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+ case ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
fputs("no-extended-grapheme-cluster boundary", f); break;
- case ANCHOR_PREC_READ:
+ case ANCR_PREC_READ:
fprintf(f, "prec read\n");
print_indent_tree(f, NODE_BODY(node), indent + add);
break;
- case ANCHOR_PREC_READ_NOT:
+ case ANCR_PREC_READ_NOT:
fprintf(f, "prec read not\n");
print_indent_tree(f, NODE_BODY(node), indent + add);
break;
- case ANCHOR_LOOK_BEHIND:
+ case ANCR_LOOK_BEHIND:
fprintf(f, "look behind\n");
print_indent_tree(f, NODE_BODY(node), indent + add);
break;
- case ANCHOR_LOOK_BEHIND_NOT:
+ case ANCR_LOOK_BEHIND_NOT:
fprintf(f, "look behind not\n");
print_indent_tree(f, NODE_BODY(node), indent + add);
break;
@@ -6506,20 +6689,20 @@ print_indent_tree(FILE* f, Node* node, int indent)
print_indent_tree(f, NODE_BODY(node), indent + add);
break;
- case NODE_ENCLOSURE:
- fprintf(f, "<enclosure:%p> ", node);
- switch (ENCLOSURE_(node)->type) {
- case ENCLOSURE_OPTION:
- fprintf(f, "option:%d", ENCLOSURE_(node)->o.options);
+ case NODE_BAG:
+ fprintf(f, "<bag:%p> ", node);
+ switch (BAG_(node)->type) {
+ case BAG_OPTION:
+ fprintf(f, "option:%d", BAG_(node)->o.options);
break;
- case ENCLOSURE_MEMORY:
- fprintf(f, "memory:%d", ENCLOSURE_(node)->m.regnum);
+ case BAG_MEMORY:
+ fprintf(f, "memory:%d", BAG_(node)->m.regnum);
break;
- case ENCLOSURE_STOP_BACKTRACK:
+ case BAG_STOP_BACKTRACK:
fprintf(f, "stop-bt");
break;
-
- default:
+ case BAG_IF_ELSE:
+ fprintf(f, "if-else");
break;
}
fprintf(f, "\n");
@@ -6561,7 +6744,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
}
if (type != NODE_LIST && type != NODE_ALT && type != NODE_QUANT &&
- type != NODE_ENCLOSURE)
+ type != NODE_BAG)
fprintf(f, "\n");
fflush(f);
}
diff --git a/src/regenc.c b/src/regenc.c
index 21f3536..d8f5274 100644
--- a/src/regenc.c
+++ b/src/regenc.c
@@ -231,7 +231,7 @@ onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
{
int n = 0;
UChar* q = (UChar* )p;
-
+
while (q < end) {
q += ONIGENC_MBC_ENC_LEN(enc, q);
n++;
@@ -244,7 +244,7 @@ onigenc_strlen_null(OnigEncoding enc, const UChar* s)
{
int n = 0;
UChar* p = (UChar* )s;
-
+
while (1) {
if (*p == '\0') {
UChar* q;
diff --git a/src/regenc.h b/src/regenc.h
index ae8d65e..8a3397d 100644
--- a/src/regenc.h
+++ b/src/regenc.h
@@ -121,8 +121,20 @@ struct PropertyNameCtype {
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
+#define ENC_SKIP_OFFSET_1_OR_0 7
+
#define ENC_FLAG_ASCII_COMPATIBLE (1<<0)
#define ENC_FLAG_UNICODE (1<<1)
+#define ENC_FLAG_SKIP_OFFSET_MASK (7<<2)
+#define ENC_FLAG_SKIP_OFFSET_0 0
+#define ENC_FLAG_SKIP_OFFSET_1 (1<<2)
+#define ENC_FLAG_SKIP_OFFSET_2 (2<<2)
+#define ENC_FLAG_SKIP_OFFSET_3 (3<<2)
+#define ENC_FLAG_SKIP_OFFSET_4 (4<<2)
+#define ENC_FLAG_SKIP_OFFSET_1_OR_0 (ENC_SKIP_OFFSET_1_OR_0<<2)
+
+#define ENC_GET_SKIP_OFFSET(enc) \
+ (((enc)->flag & ENC_FLAG_SKIP_OFFSET_MASK)>>2)
/* for encoding system implementation (internal) */
@@ -197,7 +209,7 @@ extern int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar*
else if ((buk)->fold_len == 3)\
addr = OnigUnicodeFolds3 + (buk)->index;\
else\
- addr = 0;\
+ return ONIGERR_INVALID_CODE_POINT_VALUE;\
} while (0)
extern OnigCodePoint OnigUnicodeFolds1[];
@@ -252,7 +264,7 @@ extern const unsigned short OnigEncAsciiCtypeTable[];
#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
(ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\
ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
-
+
#define ONIGENC_IS_UNICODE_ENCODING(enc) \
(((enc)->flag & ENC_FLAG_UNICODE) != 0)
diff --git a/src/regerror.c b/src/regerror.c
index 70efe9a..3fbcdfe 100644
--- a/src/regerror.c
+++ b/src/regerror.c
@@ -30,13 +30,7 @@
#include "regint.h"
#include <stdio.h> /* for vsnprintf() */
-#ifdef HAVE_STDARG_PROTOTYPES
#include <stdarg.h>
-#define va_init_list(a,b) va_start(a,b)
-#else
-#include <varargs.h>
-#define va_init_list(a,b) va_start(a)
-#endif
extern UChar*
onig_error_code_to_format(int code)
@@ -247,7 +241,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
if (len >= buf_size) break;
}
- *is_over = ((p < end) ? 1 : 0);
+ *is_over = p < end;
}
else {
len = MIN((int )(end - s), buf_size);
@@ -262,15 +256,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
/* for ONIG_MAX_ERROR_MESSAGE_LEN */
#define MAX_ERROR_PAR_LEN 30
-extern int
-#ifdef HAVE_STDARG_PROTOTYPES
-onig_error_code_to_str(UChar* s, int code, ...)
-#else
-onig_error_code_to_str(s, code, va_alist)
- UChar* s;
- int code;
- va_dcl
-#endif
+extern int onig_error_code_to_str(UChar* s, int code, ...)
{
UChar *p, *q;
OnigErrorInfo* einfo;
@@ -278,7 +264,7 @@ onig_error_code_to_str(s, code, va_alist)
UChar parbuf[MAX_ERROR_PAR_LEN];
va_list vargs;
- va_init_list(vargs, code);
+ va_start(vargs, code);
switch (code) {
case ONIGERR_UNDEFINED_NAME_REFERENCE:
@@ -330,27 +316,15 @@ onig_error_code_to_str(s, code, va_alist)
}
-void
-#ifdef HAVE_STDARG_PROTOTYPES
-onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
- UChar* pat, UChar* pat_end, const UChar *fmt, ...)
-#else
-onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
- UChar buf[];
- int bufsize;
- OnigEncoding enc;
- UChar* pat;
- UChar* pat_end;
- const UChar *fmt;
- va_dcl
-#endif
+void onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
+ UChar* pat, UChar* pat_end, const UChar *fmt, ...)
{
int n, need, len;
UChar *p, *s, *bp;
UChar bs[6];
va_list args;
- va_init_list(args, fmt);
+ va_start(args, fmt);
n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args);
va_end(args);
diff --git a/src/regexec.c b/src/regexec.c
index 6c76d85..fa61839 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -782,13 +782,13 @@ static int
onig_region_resize_clear(OnigRegion* region, int n)
{
int r;
-
+
r = onig_region_resize(region, n);
if (r != 0) return r;
onig_region_clear(region);
return 0;
}
-
+
extern int
onig_region_set(OnigRegion* region, int at, int beg, int end)
{
@@ -798,7 +798,7 @@ onig_region_set(OnigRegion* region, int at, int beg, int end)
int r = onig_region_resize(region, at + 1);
if (r < 0) return r;
}
-
+
region->beg[at] = beg;
region->end[at] = end;
return 0;
@@ -1225,7 +1225,7 @@ onig_initialize_match_param(OnigMatchParam* mp)
static int
adjust_match_param(regex_t* reg, OnigMatchParam* mp)
{
- RegexExt* ext = REG_EXTP(reg);
+ RegexExt* ext = reg->extp;
mp->match_at_call_counter = 0;
@@ -2337,6 +2337,79 @@ typedef struct {
regoff_t rm_eo;
} posix_regmatch_t;
+
+#ifdef __GNUC__
+#define USE_THREADED_CODE
+#endif
+
+#ifdef USE_THREADED_CODE
+
+#define BYTECODE_INTERPRETER_START JUMP_OP;
+#define BYTECODE_INTERPRETER_END
+#define CASE_OP(x) L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(1)
+#define DEFAULT_OP /* L_DEFAULT: */
+#define NEXT_OP sprev = sbegin; JUMP_OP
+#define JUMP_OP goto *opcode_to_label[*p++]
+#define BREAK_OP /* Nothing */
+
+#else
+
+#define BYTECODE_INTERPRETER_START \
+ while (1) {\
+ MATCH_DEBUG_OUT(0)\
+ sbegin = s;\
+ switch (*p++) {
+#define BYTECODE_INTERPRETER_END } sprev = sbegin; }
+#define CASE_OP(x) case OP_##x: SOP_IN(OP_##x);
+#define DEFAULT_OP default:
+#define NEXT_OP break
+#define JUMP_OP continue; break
+#define BREAK_OP break
+
+#endif /* USE_THREADED_CODE */
+
+#define NEXT_OUT SOP_OUT; NEXT_OP
+#define JUMP_OUT SOP_OUT; JUMP_OP
+#define BREAK_OUT SOP_OUT; BREAK_OP
+#define CHECK_INTERRUPT_JUMP_OUT SOP_OUT; CHECK_INTERRUPT_IN_MATCH; JUMP_OP
+
+
+#ifdef ONIG_DEBUG_MATCH
+#define MATCH_DEBUG_OUT(offset) do {\
+ UChar *xp, *q, *bp, buf[50];\
+ int len, spos;\
+ spos = IS_NOT_NULL(s) ? (int )(s - str) : -1;\
+ xp = p - (offset);\
+ fprintf(stderr, "%7u: %7ld: %4d> \"",\
+ counter, GET_STACK_INDEX(stk), spos);\
+ counter++;\
+ bp = buf;\
+ if (IS_NOT_NULL(s)) {\
+ for (i = 0, q = s; i < 7 && q < end; i++) {\
+ len = enclen(encode, q);\
+ while (len-- > 0) *bp++ = *q++;\
+ }\
+ if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }\
+ else { xmemcpy(bp, "\"", 1); bp += 1; }\
+ }\
+ else {\
+ xmemcpy(bp, "\"", 1); bp += 1;\
+ }\
+ *bp = 0;\
+ fputs((char* )buf, stderr);\
+ for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);\
+ if (xp == FinishCode)\
+ fprintf(stderr, "----: ");\
+ else\
+ fprintf(stderr, "%4d: ", (int )(xp - reg->p));\
+ onig_print_compiled_byte_code(stderr, xp, NULL, reg->p, encode);\
+ fprintf(stderr, "\n");\
+ } while(0);
+#else
+#define MATCH_DEBUG_OUT(offset)
+#endif
+
+
/* match data(str - end) from position (sstart). */
/* if sstart == str then set sprev to NULL. */
static int
@@ -2346,6 +2419,107 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
{
static UChar FinishCode[] = { OP_FINISH };
+#ifdef USE_THREADED_CODE
+ static const void *opcode_to_label[] = {
+ &&L_FINISH,
+ &&L_END,
+ &&L_EXACT1,
+ &&L_EXACT2,
+ &&L_EXACT3,
+ &&L_EXACT4,
+ &&L_EXACT5,
+ &&L_EXACTN,
+ &&L_EXACTMB2N1,
+ &&L_EXACTMB2N2,
+ &&L_EXACTMB2N3,
+ &&L_EXACTMB2N,
+ &&L_EXACTMB3N,
+ &&L_EXACTMBN,
+ &&L_EXACT1_IC,
+ &&L_EXACTN_IC,
+ &&L_CCLASS,
+ &&L_CCLASS_MB,
+ &&L_CCLASS_MIX,
+ &&L_CCLASS_NOT,
+ &&L_CCLASS_MB_NOT,
+ &&L_CCLASS_MIX_NOT,
+#ifdef USE_OP_CCLASS_NODE
+ &&L_CCLASS_NODE,
+#endif
+ &&L_ANYCHAR,
+ &&L_ANYCHAR_ML,
+ &&L_ANYCHAR_STAR,
+ &&L_ANYCHAR_ML_STAR,
+ &&L_ANYCHAR_STAR_PEEK_NEXT,
+ &&L_ANYCHAR_ML_STAR_PEEK_NEXT,
+ &&L_WORD,
+ &&L_WORD_ASCII,
+ &&L_NO_WORD,
+ &&L_NO_WORD_ASCII,
+ &&L_WORD_BOUNDARY,
+ &&L_NO_WORD_BOUNDARY,
+ &&L_WORD_BEGIN,
+ &&L_WORD_END,
+ &&L_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY,
+ &&L_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY,
+ &&L_BEGIN_BUF,
+ &&L_END_BUF,
+ &&L_BEGIN_LINE,
+ &&L_END_LINE,
+ &&L_SEMI_END_BUF,
+ &&L_BEGIN_POSITION,
+ &&L_BACKREF1,
+ &&L_BACKREF2,
+ &&L_BACKREF_N,
+ &&L_BACKREF_N_IC,
+ &&L_BACKREF_MULTI,
+ &&L_BACKREF_MULTI_IC,
+ &&L_BACKREF_WITH_LEVEL,
+ &&L_BACKREF_CHECK,
+ &&L_BACKREF_CHECK_WITH_LEVEL,
+ &&L_MEMORY_START,
+ &&L_MEMORY_START_PUSH,
+ &&L_MEMORY_END_PUSH,
+ &&L_MEMORY_END_PUSH_REC,
+ &&L_MEMORY_END,
+ &&L_MEMORY_END_REC,
+ &&L_FAIL,
+ &&L_JUMP,
+ &&L_PUSH,
+ &&L_PUSH_SUPER,
+ &&L_POP_OUT,
+ &&L_PUSH_OR_JUMP_EXACT1,
+ &&L_PUSH_IF_PEEK_NEXT,
+ &&L_REPEAT,
+ &&L_REPEAT_NG,
+ &&L_REPEAT_INC,
+ &&L_REPEAT_INC_NG,
+ &&L_REPEAT_INC_SG,
+ &&L_REPEAT_INC_NG_SG,
+ &&L_EMPTY_CHECK_START,
+ &&L_EMPTY_CHECK_END,
+ &&L_EMPTY_CHECK_END_MEMST,
+ &&L_EMPTY_CHECK_END_MEMST_PUSH,
+ &&L_PREC_READ_START,
+ &&L_PREC_READ_END,
+ &&L_PREC_READ_NOT_START,
+ &&L_PREC_READ_NOT_END,
+ &&L_ATOMIC_START,
+ &&L_ATOMIC_END,
+ &&L_LOOK_BEHIND,
+ &&L_LOOK_BEHIND_NOT_START,
+ &&L_LOOK_BEHIND_NOT_END,
+ &&L_CALL,
+ &&L_RETURN,
+ &&L_PUSH_SAVE_VAL,
+ &&L_UPDATE_VAR,
+#ifdef USE_CALLOUT
+ &&L_CALLOUT_CONTENTS,
+ &&L_CALLOUT_NAME,
+#endif
+ };
+#endif
+
int i, n, num_mem, best_len, pop_level;
LengthType tlen, tlen2;
MemNumType mem;
@@ -2374,6 +2548,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
OnigEncoding encode = reg->enc;
OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
+#ifdef ONIG_DEBUG_MATCH
+ static unsigned int counter = 1;
+#endif
+
#ifdef USE_CALLOUT
msa->mp->match_at_call_counter++;
#endif
@@ -2406,40 +2584,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
retry_in_match_counter = 0;
#endif
- while (1) {
-#ifdef ONIG_DEBUG_MATCH
- {
- static unsigned int counter = 1;
-
- UChar *q, *bp, buf[50];
- int len;
- fprintf(stderr, "%7u: %7ld: %4d> \"",
- counter, GET_STACK_INDEX(stk), (int )(s - str));
- counter++;
-
- bp = buf;
- for (i = 0, q = s; i < 7 && q < end; i++) {
- len = enclen(encode, q);
- while (len-- > 0) *bp++ = *q++;
- }
- if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
- else { xmemcpy(bp, "\"", 1); bp += 1; }
- *bp = 0;
- fputs((char* )buf, stderr);
-
- for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
- if (p == FinishCode)
- fprintf(stderr, "----: ");
- else
- fprintf(stderr, "%4d: ", (int )(p - reg->p));
- onig_print_compiled_byte_code(stderr, p, NULL, reg->p, encode);
- fprintf(stderr, "\n");
- }
-#endif
-
- sbegin = s;
- switch (*p++) {
- case OP_END: SOP_IN(OP_END);
+ BYTECODE_INTERPRETER_START {
+ CASE_OP(END)
n = (int )(s - sstart);
if (n > best_len) {
OnigRegion* region;
@@ -2551,16 +2697,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
/* default behavior: return first-matching result. */
goto finish;
- break;
- case OP_EXACT1: SOP_IN(OP_EXACT1);
+ CASE_OP(EXACT1)
DATA_ENSURE(1);
if (*p != *s) goto fail;
p++; s++;
- SOP_OUT;
- break;
+ NEXT_OUT;
- case OP_EXACT1_IC: SOP_IN(OP_EXACT1_IC);
+ CASE_OP(EXACT1_IC)
{
int len;
UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
@@ -2579,21 +2723,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; q++;
}
}
- SOP_OUT;
- break;
+ NEXT_OUT;
- case OP_EXACT2: SOP_IN(OP_EXACT2);
+ CASE_OP(EXACT2)
DATA_ENSURE(2);
if (*p != *s) goto fail;
p++; s++;
if (*p != *s) goto fail;
sprev = s;
p++; s++;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_EXACT3: SOP_IN(OP_EXACT3);
+ CASE_OP(EXACT3)
DATA_ENSURE(3);
if (*p != *s) goto fail;
p++; s++;
@@ -2602,11 +2743,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*p != *s) goto fail;
sprev = s;
p++; s++;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_EXACT4: SOP_IN(OP_EXACT4);
+ CASE_OP(EXACT4)
DATA_ENSURE(4);
if (*p != *s) goto fail;
p++; s++;
@@ -2617,11 +2756,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*p != *s) goto fail;
sprev = s;
p++; s++;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_EXACT5: SOP_IN(OP_EXACT5);
+ CASE_OP(EXACT5)
DATA_ENSURE(5);
if (*p != *s) goto fail;
p++; s++;
@@ -2634,22 +2771,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*p != *s) goto fail;
sprev = s;
p++; s++;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_EXACTN: SOP_IN(OP_EXACTN);
+ CASE_OP(EXACTN)
GET_LENGTH_INC(tlen, p);
DATA_ENSURE(tlen);
while (tlen-- > 0) {
if (*p++ != *s++) goto fail;
}
sprev = s - 1;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_EXACTN_IC: SOP_IN(OP_EXACTN_IC);
+ CASE_OP(EXACTN_IC)
{
int len;
UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
@@ -2673,20 +2806,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_EXACTMB2N1: SOP_IN(OP_EXACTMB2N1);
+ CASE_OP(EXACTMB2N1)
DATA_ENSURE(2);
if (*p != *s) goto fail;
p++; s++;
if (*p != *s) goto fail;
p++; s++;
- SOP_OUT;
- break;
+ NEXT_OUT;
- case OP_EXACTMB2N2: SOP_IN(OP_EXACTMB2N2);
+ CASE_OP(EXACTMB2N2)
DATA_ENSURE(4);
if (*p != *s) goto fail;
p++; s++;
@@ -2697,11 +2827,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; s++;
if (*p != *s) goto fail;
p++; s++;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_EXACTMB2N3: SOP_IN(OP_EXACTMB2N3);
+ CASE_OP(EXACTMB2N3)
DATA_ENSURE(6);
if (*p != *s) goto fail;
p++; s++;
@@ -2716,11 +2844,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; s++;
if (*p != *s) goto fail;
p++; s++;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_EXACTMB2N: SOP_IN(OP_EXACTMB2N);
+ CASE_OP(EXACTMB2N)
GET_LENGTH_INC(tlen, p);
DATA_ENSURE(tlen * 2);
while (tlen-- > 0) {
@@ -2730,11 +2856,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; s++;
}
sprev = s - 2;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_EXACTMB3N: SOP_IN(OP_EXACTMB3N);
+ CASE_OP(EXACTMB3N)
GET_LENGTH_INC(tlen, p);
DATA_ENSURE(tlen * 3);
while (tlen-- > 0) {
@@ -2746,11 +2870,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; s++;
}
sprev = s - 3;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_EXACTMBN: SOP_IN(OP_EXACTMBN);
+ CASE_OP(EXACTMBN)
GET_LENGTH_INC(tlen, p); /* mb-len */
GET_LENGTH_INC(tlen2, p); /* string len */
tlen2 *= tlen;
@@ -2760,19 +2882,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; s++;
}
sprev = s - tlen;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_CCLASS: SOP_IN(OP_CCLASS);
+ CASE_OP(CCLASS)
DATA_ENSURE(1);
if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
p += SIZE_BITSET;
s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */
- SOP_OUT;
- break;
+ NEXT_OUT;
- case OP_CCLASS_MB: SOP_IN(OP_CCLASS_MB);
+ CASE_OP(CCLASS_MB)
if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
cclass_mb:
@@ -2798,10 +2917,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
}
p += tlen;
- SOP_OUT;
- break;
+ NEXT_OUT;
- case OP_CCLASS_MIX: SOP_IN(OP_CCLASS_MIX);
+ CASE_OP(CCLASS_MIX)
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_HEAD(encode, s)) {
p += SIZE_BITSET;
@@ -2816,18 +2934,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p += tlen;
s++;
}
- SOP_OUT;
- break;
+ NEXT_OUT;
- case OP_CCLASS_NOT: SOP_IN(OP_CCLASS_NOT);
+ CASE_OP(CCLASS_NOT)
DATA_ENSURE(1);
if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
p += SIZE_BITSET;
s += enclen(encode, s);
- SOP_OUT;
- break;
+ NEXT_OUT;
- case OP_CCLASS_MB_NOT: SOP_IN(OP_CCLASS_MB_NOT);
+ CASE_OP(CCLASS_MB_NOT)
DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
s++;
@@ -2865,10 +2981,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p += tlen;
cc_mb_not_success:
- SOP_OUT;
- break;
+ NEXT_OUT;
- case OP_CCLASS_MIX_NOT: SOP_IN(OP_CCLASS_MIX_NOT);
+ CASE_OP(CCLASS_MIX_NOT)
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_HEAD(encode, s)) {
p += SIZE_BITSET;
@@ -2883,11 +2998,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p += tlen;
s++;
}
- SOP_OUT;
- break;
+ NEXT_OUT;
#ifdef USE_OP_CCLASS_NODE
- case OP_CCLASS_NODE: SOP_IN(OP_CCLASS_NODE);
+ CASE_OP(CCLASS_NODE)
{
OnigCodePoint code;
void *node;
@@ -2903,28 +3017,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
code = ONIGENC_MBC_TO_CODE(encode, ss, s);
if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
}
- SOP_OUT;
- break;
+ NEXT_OUT;
#endif
- case OP_ANYCHAR: SOP_IN(OP_ANYCHAR);
+ CASE_OP(ANYCHAR)
DATA_ENSURE(1);
n = enclen(encode, s);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
s += n;
- SOP_OUT;
- break;
+ NEXT_OUT;
- case OP_ANYCHAR_ML: SOP_IN(OP_ANYCHAR_ML);
+ CASE_OP(ANYCHAR_ML)
DATA_ENSURE(1);
n = enclen(encode, s);
DATA_ENSURE(n);
s += n;
- SOP_OUT;
- break;
+ NEXT_OUT;
- case OP_ANYCHAR_STAR: SOP_IN(OP_ANYCHAR_STAR);
+ CASE_OP(ANYCHAR_STAR)
while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev);
n = enclen(encode, s);
@@ -2933,11 +3044,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
sprev = s;
s += n;
}
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_ANYCHAR_ML_STAR: SOP_IN(OP_ANYCHAR_ML_STAR);
+ CASE_OP(ANYCHAR_ML_STAR)
while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev);
n = enclen(encode, s);
@@ -2951,11 +3060,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
s++;
}
}
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_ANYCHAR_STAR_PEEK_NEXT: SOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
+ CASE_OP(ANYCHAR_STAR_PEEK_NEXT)
while (DATA_ENSURE_CHECK1) {
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
@@ -2967,10 +3074,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
s += n;
}
p++;
- SOP_OUT;
- break;
+ NEXT_OUT;
- case OP_ANYCHAR_ML_STAR_PEEK_NEXT:SOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ CASE_OP(ANYCHAR_ML_STAR_PEEK_NEXT)
while (DATA_ENSURE_CHECK1) {
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
@@ -2987,46 +3093,41 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
p++;
- SOP_OUT;
- break;
+ NEXT_OUT;
- case OP_WORD: SOP_IN(OP_WORD);
+ CASE_OP(WORD)
DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
s += enclen(encode, s);
- SOP_OUT;
- break;
+ NEXT_OUT;
- case OP_WORD_ASCII: SOP_IN(OP_WORD_ASCII);
+ CASE_OP(WORD_ASCII)
DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
goto fail;
s += enclen(encode, s);
- SOP_OUT;
- break;
+ NEXT_OUT;
- case OP_NO_WORD: SOP_IN(OP_NO_WORD);
+ CASE_OP(NO_WORD)
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
s += enclen(encode, s);
- SOP_OUT;
- break;
+ NEXT_OUT;
- case OP_NO_WORD_ASCII: SOP_IN(OP_NO_WORD_ASCII);
+ CASE_OP(NO_WORD_ASCII)
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
goto fail;
s += enclen(encode, s);
- SOP_OUT;
- break;
+ NEXT_OUT;
- case OP_WORD_BOUNDARY: SOP_IN(OP_WORD_BOUNDARY);
+ CASE_OP(WORD_BOUNDARY)
{
ModeType mode;
GET_MODE_INC(mode, p); /* ascii_mode */
@@ -3046,11 +3147,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto fail;
}
}
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_NO_WORD_BOUNDARY: SOP_IN(OP_NO_WORD_BOUNDARY);
+ CASE_OP(NO_WORD_BOUNDARY)
{
ModeType mode;
GET_MODE_INC(mode, p); /* ascii_mode */
@@ -3069,189 +3168,150 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto fail;
}
}
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
#ifdef USE_WORD_BEGIN_END
- case OP_WORD_BEGIN: SOP_IN(OP_WORD_BEGIN);
+ CASE_OP(WORD_BEGIN)
{
ModeType mode;
GET_MODE_INC(mode, p); /* ascii_mode */
if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
- SOP_OUT;
- continue;
+ JUMP_OUT;
}
}
}
goto fail;
- break;
- case OP_WORD_END: SOP_IN(OP_WORD_END);
+ CASE_OP(WORD_END)
{
ModeType mode;
GET_MODE_INC(mode, p); /* ascii_mode */
if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
- SOP_OUT;
- continue;
+ JUMP_OUT;
}
}
}
goto fail;
- break;
#endif
- case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
- SOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
+ CASE_OP(EXTENDED_GRAPHEME_CLUSTER_BOUNDARY)
if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) {
- SOP_OUT;
- continue;
+ JUMP_OUT;
}
goto fail;
- break;
- case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
- SOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
+ CASE_OP(NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY)
if (onigenc_egcb_is_break_position(encode, s, sprev, str, end))
goto fail;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_BEGIN_BUF: SOP_IN(OP_BEGIN_BUF);
+ CASE_OP(BEGIN_BUF)
if (! ON_STR_BEGIN(s)) goto fail;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_END_BUF: SOP_IN(OP_END_BUF);
+ CASE_OP(END_BUF)
if (! ON_STR_END(s)) goto fail;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_BEGIN_LINE: SOP_IN(OP_BEGIN_LINE);
+ CASE_OP(BEGIN_LINE)
if (ON_STR_BEGIN(s)) {
if (IS_NOTBOL(msa->options)) goto fail;
- SOP_OUT;
- continue;
+ JUMP_OUT;
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
- SOP_OUT;
- continue;
+ JUMP_OUT;
}
goto fail;
- break;
- case OP_END_LINE: SOP_IN(OP_END_LINE);
+ CASE_OP(END_LINE)
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (IS_NOTEOL(msa->options)) goto fail;
- SOP_OUT;
- continue;
+ JUMP_OUT;
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
}
#endif
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
- SOP_OUT;
- continue;
+ JUMP_OUT;
}
#ifdef USE_CRNL_AS_LINE_TERMINATOR
else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
- SOP_OUT;
- continue;
+ JUMP_OUT;
}
#endif
goto fail;
- break;
- case OP_SEMI_END_BUF: SOP_IN(OP_SEMI_END_BUF);
+ CASE_OP(SEMI_END_BUF)
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (IS_NOTEOL(msa->options)) goto fail;
- SOP_OUT;
- continue;
+ JUMP_OUT;
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
}
#endif
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
ON_STR_END(s + enclen(encode, s))) {
- SOP_OUT;
- continue;
+ JUMP_OUT;
}
#ifdef USE_CRNL_AS_LINE_TERMINATOR
else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
UChar* ss = s + enclen(encode, s);
ss += enclen(encode, ss);
if (ON_STR_END(ss)) {
- SOP_OUT;
- continue;
+ JUMP_OUT;
}
}
#endif
goto fail;
- break;
- case OP_BEGIN_POSITION: SOP_IN(OP_BEGIN_POSITION);
+ CASE_OP(BEGIN_POSITION)
if (s != msa->start)
goto fail;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_MEMORY_START_PUSH: SOP_IN(OP_MEMORY_START_PUSH);
+ CASE_OP(MEMORY_START_PUSH)
GET_MEMNUM_INC(mem, p);
STACK_PUSH_MEM_START(mem, s);
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_MEMORY_START: SOP_IN(OP_MEMORY_START);
+ CASE_OP(MEMORY_START)
GET_MEMNUM_INC(mem, p);
mem_start_stk[mem] = (StackIndex )((void* )s);
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_MEMORY_END_PUSH: SOP_IN(OP_MEMORY_END_PUSH);
+ CASE_OP(MEMORY_END_PUSH)
GET_MEMNUM_INC(mem, p);
STACK_PUSH_MEM_END(mem, s);
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_MEMORY_END: SOP_IN(OP_MEMORY_END);
+ CASE_OP(MEMORY_END)
GET_MEMNUM_INC(mem, p);
mem_end_stk[mem] = (StackIndex )((void* )s);
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
#ifdef USE_CALL
- case OP_MEMORY_END_PUSH_REC: SOP_IN(OP_MEMORY_END_PUSH_REC);
+ CASE_OP(MEMORY_END_PUSH_REC)
GET_MEMNUM_INC(mem, p);
STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
STACK_PUSH_MEM_END(mem, s);
mem_start_stk[mem] = GET_STACK_INDEX(stkp);
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_MEMORY_END_REC: SOP_IN(OP_MEMORY_END_REC);
+ CASE_OP(MEMORY_END_REC)
GET_MEMNUM_INC(mem, p);
mem_end_stk[mem] = (StackIndex )((void* )s);
STACK_GET_MEM_START(mem, stkp);
@@ -3262,22 +3322,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
STACK_PUSH_MEM_END_MARK(mem);
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
#endif
- case OP_BACKREF1: SOP_IN(OP_BACKREF1);
+ CASE_OP(BACKREF1)
mem = 1;
goto backref;
- break;
- case OP_BACKREF2: SOP_IN(OP_BACKREF2);
+ CASE_OP(BACKREF2)
mem = 2;
goto backref;
- break;
- case OP_BACKREF_N: SOP_IN(OP_BACKREF_N);
+ CASE_OP(BACKREF_N)
GET_MEMNUM_INC(mem, p);
backref:
{
@@ -3301,13 +3357,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STRING_CMP(pstart, s, n);
while (sprev + (len = enclen(encode, sprev)) < s)
sprev += len;
-
- SOP_OUT;
- continue;
}
- break;
+ JUMP_OUT;
- case OP_BACKREF_N_IC: SOP_IN(OP_BACKREF_N_IC);
+ CASE_OP(BACKREF_N_IC)
GET_MEMNUM_INC(mem, p);
{
int len;
@@ -3330,13 +3383,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STRING_CMP_IC(case_fold_flag, pstart, &s, n);
while (sprev + (len = enclen(encode, sprev)) < s)
sprev += len;
-
- SOP_OUT;
- continue;
}
- break;
+ JUMP_OUT;
- case OP_BACKREF_MULTI: SOP_IN(OP_BACKREF_MULTI);
+ CASE_OP(BACKREF_MULTI)
{
int len, is_fail;
UChar *pstart, *pend, *swork;
@@ -3370,12 +3420,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break; /* success */
}
if (i == tlen) goto fail;
- SOP_OUT;
- continue;
}
- break;
+ JUMP_OUT;
- case OP_BACKREF_MULTI_IC: SOP_IN(OP_BACKREF_MULTI_IC);
+ CASE_OP(BACKREF_MULTI_IC)
{
int len, is_fail;
UChar *pstart, *pend, *swork;
@@ -3409,13 +3457,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break; /* success */
}
if (i == tlen) goto fail;
- SOP_OUT;
- continue;
}
- break;
+ JUMP_OUT;
#ifdef USE_BACKREF_WITH_LEVEL
- case OP_BACKREF_WITH_LEVEL:
+ CASE_OP(BACKREF_WITH_LEVEL)
{
int len;
OnigOptionType ic;
@@ -3436,14 +3482,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
else
goto fail;
-
- SOP_OUT;
- continue;
}
- break;
+ JUMP_OUT;
#endif
- case OP_BACKREF_CHECK: SOP_IN(OP_BACKREF_CHECK);
+ CASE_OP(BACKREF_CHECK)
{
GET_LENGTH_INC(tlen, p);
for (i = 0; i < tlen; i++) {
@@ -3456,13 +3499,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break; /* success */
}
if (i == tlen) goto fail;
- SOP_OUT;
- continue;
}
- break;
+ JUMP_OUT;
#ifdef USE_BACKREF_WITH_LEVEL
- case OP_BACKREF_CHECK_WITH_LEVEL:
+ CASE_OP(BACKREF_CHECK_WITH_LEVEL)
{
LengthType level;
@@ -3475,21 +3516,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
else
goto fail;
-
- SOP_OUT;
- continue;
}
- break;
+ JUMP_OUT;
#endif
- case OP_EMPTY_CHECK_START: SOP_IN(OP_EMPTY_CHECK_START);
+ CASE_OP(EMPTY_CHECK_START)
GET_MEMNUM_INC(mem, p); /* mem: null check id */
STACK_PUSH_EMPTY_CHECK_START(mem, s);
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_EMPTY_CHECK_END: SOP_IN(OP_EMPTY_CHECK_END);
+ CASE_OP(EMPTY_CHECK_END)
{
int is_empty;
@@ -3518,12 +3554,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
}
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
- case OP_EMPTY_CHECK_END_MEMST: SOP_IN(OP_EMPTY_CHECK_END_MEMST);
+ CASE_OP(EMPTY_CHECK_END_MEMST)
{
int is_empty;
@@ -3537,14 +3571,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto empty_check_found;
}
}
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
#endif
#ifdef USE_CALL
- case OP_EMPTY_CHECK_END_MEMST_PUSH:
- SOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH);
+ CASE_OP(EMPTY_CHECK_END_MEMST_PUSH)
{
int is_empty;
@@ -3566,68 +3597,51 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_EMPTY_CHECK_END(mem);
}
}
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
#endif
- case OP_JUMP: SOP_IN(OP_JUMP);
+ CASE_OP(JUMP)
GET_RELADDR_INC(addr, p);
p += addr;
- SOP_OUT;
- CHECK_INTERRUPT_IN_MATCH;
- continue;
- break;
+ CHECK_INTERRUPT_JUMP_OUT;
- case OP_PUSH: SOP_IN(OP_PUSH);
+ CASE_OP(PUSH)
GET_RELADDR_INC(addr, p);
STACK_PUSH_ALT(p + addr, s, sprev);
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_PUSH_SUPER: SOP_IN(OP_PUSH_SUPER);
+ CASE_OP(PUSH_SUPER)
GET_RELADDR_INC(addr, p);
STACK_PUSH_SUPER_ALT(p + addr, s, sprev);
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_POP_OUT: SOP_IN(OP_POP_OUT);
+ CASE_OP(POP_OUT)
STACK_POP_ONE;
/* for stop backtrack */
/* CHECK_RETRY_LIMIT_IN_MATCH; */
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_PUSH_OR_JUMP_EXACT1: SOP_IN(OP_PUSH_OR_JUMP_EXACT1);
+ CASE_OP(PUSH_OR_JUMP_EXACT1)
GET_RELADDR_INC(addr, p);
if (*p == *s && DATA_ENSURE_CHECK1) {
p++;
STACK_PUSH_ALT(p + addr, s, sprev);
- SOP_OUT;
- continue;
+ JUMP_OUT;
}
p += (addr + 1);
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_PUSH_IF_PEEK_NEXT: SOP_IN(OP_PUSH_IF_PEEK_NEXT);
+ CASE_OP(PUSH_IF_PEEK_NEXT)
GET_RELADDR_INC(addr, p);
if (*p == *s) {
p++;
STACK_PUSH_ALT(p + addr, s, sprev);
- SOP_OUT;
- continue;
+ JUMP_OUT;
}
p++;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_REPEAT: SOP_IN(OP_REPEAT);
+ CASE_OP(REPEAT)
{
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
GET_RELADDR_INC(addr, p);
@@ -3640,11 +3654,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_ALT(p + addr, s, sprev);
}
}
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_REPEAT_NG: SOP_IN(OP_REPEAT_NG);
+ CASE_OP(REPEAT_NG)
{
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
GET_RELADDR_INC(addr, p);
@@ -3658,11 +3670,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p += addr;
}
}
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_REPEAT_INC: SOP_IN(OP_REPEAT_INC);
+ CASE_OP(REPEAT_INC)
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
si = repeat_stk[mem];
stkp = STACK_AT(si);
@@ -3680,19 +3690,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p = stkp->u.repeat.pcode;
}
STACK_PUSH_REPEAT_INC(si);
- SOP_OUT;
- CHECK_INTERRUPT_IN_MATCH;
- continue;
- break;
+ CHECK_INTERRUPT_JUMP_OUT;
- case OP_REPEAT_INC_SG: SOP_IN(OP_REPEAT_INC_SG);
+ CASE_OP(REPEAT_INC_SG)
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
STACK_GET_REPEAT(mem, stkp);
si = GET_STACK_INDEX(stkp);
goto repeat_inc;
- break;
- case OP_REPEAT_INC_NG: SOP_IN(OP_REPEAT_INC_NG);
+ CASE_OP(REPEAT_INC_NG)
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
si = repeat_stk[mem];
stkp = STACK_AT(si);
@@ -3714,68 +3720,51 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
STACK_PUSH_REPEAT_INC(si);
}
- SOP_OUT;
- CHECK_INTERRUPT_IN_MATCH;
- continue;
- break;
+ CHECK_INTERRUPT_JUMP_OUT;
- case OP_REPEAT_INC_NG_SG: SOP_IN(OP_REPEAT_INC_NG_SG);
+ CASE_OP(REPEAT_INC_NG_SG)
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
STACK_GET_REPEAT(mem, stkp);
si = GET_STACK_INDEX(stkp);
goto repeat_inc_ng;
- break;
- case OP_PREC_READ_START: SOP_IN(OP_PREC_READ_START);
+ CASE_OP(PREC_READ_START)
STACK_PUSH_POS(s, sprev);
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_PREC_READ_END: SOP_IN(OP_PREC_READ_END);
+ CASE_OP(PREC_READ_END)
{
STACK_EXEC_TO_VOID(stkp);
s = stkp->u.state.pstr;
sprev = stkp->u.state.pstr_prev;
}
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_PREC_READ_NOT_START: SOP_IN(OP_PREC_READ_NOT_START);
+ CASE_OP(PREC_READ_NOT_START)
GET_RELADDR_INC(addr, p);
STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev);
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_PREC_READ_NOT_END: SOP_IN(OP_PREC_READ_NOT_END);
+ CASE_OP(PREC_READ_NOT_END)
STACK_POP_TIL_ALT_PREC_READ_NOT;
goto fail;
- break;
- case OP_ATOMIC_START: SOP_IN(OP_ATOMIC_START);
+ CASE_OP(ATOMIC_START)
STACK_PUSH_TO_VOID_START;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_ATOMIC_END: SOP_IN(OP_ATOMIC_END);
+ CASE_OP(ATOMIC_END)
STACK_EXEC_TO_VOID(stkp);
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_LOOK_BEHIND: SOP_IN(OP_LOOK_BEHIND);
+ CASE_OP(LOOK_BEHIND)
GET_LENGTH_INC(tlen, p);
s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
if (IS_NULL(s)) goto fail;
sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_LOOK_BEHIND_NOT_START: SOP_IN(OP_LOOK_BEHIND_NOT_START);
+ CASE_OP(LOOK_BEHIND_NOT_START)
GET_RELADDR_INC(addr, p);
GET_LENGTH_INC(tlen, p);
q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
@@ -3790,33 +3779,26 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
s = q;
sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
}
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_LOOK_BEHIND_NOT_END: SOP_IN(OP_LOOK_BEHIND_NOT_END);
+ CASE_OP(LOOK_BEHIND_NOT_END)
STACK_POP_TIL_ALT_LOOK_BEHIND_NOT;
goto fail;
- break;
#ifdef USE_CALL
- case OP_CALL: SOP_IN(OP_CALL);
+ CASE_OP(CALL)
GET_ABSADDR_INC(addr, p);
STACK_PUSH_CALL_FRAME(p);
p = reg->p + addr;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_RETURN: SOP_IN(OP_RETURN);
+ CASE_OP(RETURN)
STACK_RETURN(p);
STACK_PUSH_RETURN;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
#endif
- case OP_PUSH_SAVE_VAL: SOP_IN(OP_PUSH_SAVE_VAL);
+ CASE_OP(PUSH_SAVE_VAL)
{
SaveType type;
GET_SAVE_TYPE_INC(type, p);
@@ -3835,11 +3817,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break;
}
}
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- case OP_UPDATE_VAR: SOP_IN(OP_UPDATE_VAR);
+ CASE_OP(UPDATE_VAR)
{
UpdateVarType type;
enum SaveType save_type;
@@ -3867,20 +3847,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break;
}
}
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
#ifdef USE_CALLOUT
- case OP_CALLOUT_CONTENTS: SOP_IN(OP_CALLOUT_CONTENTS);
+ CASE_OP(CALLOUT_CONTENTS)
of = ONIG_CALLOUT_OF_CONTENTS;
goto callout_common_entry;
+ BREAK_OUT;
- SOP_OUT;
- continue;
- break;
-
- case OP_CALLOUT_NAME: SOP_IN(OP_CALLOUT_NAME);
+ CASE_OP(CALLOUT_NAME)
{
int call_result;
int name_id;
@@ -3941,34 +3916,34 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
}
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
#endif
- case OP_FINISH:
+ CASE_OP(FINISH)
goto finish;
- break;
+#ifdef ONIG_DEBUG_STATISTICS
fail:
SOP_OUT;
- /* fall */
- case OP_FAIL: SOP_IN(OP_FAIL);
+ goto fail2;
+#endif
+ CASE_OP(FAIL)
+#ifdef ONIG_DEBUG_STATISTICS
+ fail2:
+#else
+ fail:
+#endif
STACK_POP;
p = stk->u.state.pcode;
s = stk->u.state.pstr;
sprev = stk->u.state.pstr_prev;
CHECK_RETRY_LIMIT_IN_MATCH;
- SOP_OUT;
- continue;
- break;
+ JUMP_OUT;
- default:
+ DEFAULT_OP
goto bytecode_error;
- } /* end of switch */
- sprev = sbegin;
- } /* end of while(1) */
+ } BYTECODE_INTERPRETER_END;
finish:
STACK_SAVE;
@@ -4130,150 +4105,143 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
return (UChar* )NULL;
}
+
static UChar*
-bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end,
- const UChar* text_range)
+sunday_quick_search_step_forward(regex_t* reg,
+ const UChar* target, const UChar* target_end,
+ const UChar* text, const UChar* text_end,
+ const UChar* text_range)
{
const UChar *s, *se, *t, *p, *end;
const UChar *tail;
int skip, tlen1;
+ int map_offset;
+ OnigEncoding enc;
#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_notrev: text: %p, text_end: %p, text_range: %p\n",
- text, text_end, text_range);
+ fprintf(stderr,
+ "sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range);
#endif
+ enc = reg->enc;
+
tail = target_end - 1;
tlen1 = (int )(tail - target);
end = text_range;
if (end + tlen1 > text_end)
end = text_end - tlen1;
+ map_offset = reg->map_offset;
s = text;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- skip = reg->map[*se];
- t = s;
- do {
- s += enclen(reg->enc, s);
- } while ((s - t) < skip && s < end);
- }
- }
- else {
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- skip = reg->int_map[*se];
- t = s;
- do {
- s += enclen(reg->enc, s);
- } while ((s - t) < skip && s < end);
+ while (s < end) {
+ p = se = s + tlen1;
+ t = tail;
+ while (*p == *t) {
+ if (t == target) return (UChar* )s;
+ p--; t--;
}
+ if (se + map_offset >= text_end) break;
+ skip = reg->map[*(se + map_offset)];
+#if 0
+ t = s;
+ do {
+ s += enclen(enc, s);
+ } while ((s - t) < skip && s < end);
+#else
+ s += skip;
+ if (s < end)
+ s = onigenc_get_right_adjust_char_head(enc, text, s);
+#endif
}
return (UChar* )NULL;
}
static UChar*
-bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end, const UChar* text_range)
+sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
+ const UChar* text, const UChar* text_end,
+ const UChar* text_range)
{
const UChar *s, *t, *p, *end;
const UChar *tail;
+ int map_offset;
- end = text_range + (target_end - target) - 1;
+ end = text_range + (target_end - target);
if (end > text_end)
end = text_end;
+ map_offset = reg->map_offset;
tail = target_end - 1;
- s = text + (target_end - target) - 1;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = s;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- s += reg->map[*s];
- }
- }
- else { /* see int_map[] */
- while (s < end) {
- p = s;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- s += reg->int_map[*s];
+ s = text + (tail - target);
+
+ while (s < end) {
+ p = s;
+ t = tail;
+ while (*p == *t) {
+ if (t == target) return (UChar* )p;
+ p--; t--;
}
+ if (s + map_offset >= text_end) break;
+ s += reg->map[*(s + map_offset)];
}
+
return (UChar* )NULL;
}
-#ifdef USE_INT_MAP_BACKWARD
-static int
-set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, int** skip)
+static UChar*
+sunday_quick_search_case_fold(regex_t* reg,
+ const UChar* target, const UChar* target_end,
+ const UChar* text, const UChar* text_end,
+ const UChar* text_range)
{
- int i, len;
-
- if (IS_NULL(*skip)) {
- *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
- if (IS_NULL(*skip)) return ONIGERR_MEMORY;
- }
-
- len = end - s;
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
- (*skip)[i] = len;
+ const UChar *s, *se, *end;
+ const UChar *tail;
+ int skip, tlen1;
+ int map_offset;
+ int case_fold_flag;
+ OnigEncoding enc;
- for (i = len - 1; i > 0; i--)
- (*skip)[s[i]] = i;
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr,
+ "sunday_quick_search_case_fold: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range);
+#endif
- return 0;
-}
+ enc = reg->enc;
+ case_fold_flag = reg->case_fold_flag;
-static UChar*
-bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* adjust_text,
- const UChar* text_end, const UChar* text_start)
-{
- const UChar *s, *t, *p;
+ tail = target_end - 1;
+ tlen1 = (int )(tail - target);
+ end = text_range;
+ if (end + tlen1 > text_end)
+ end = text_end - tlen1;
- s = text_end - (target_end - target);
- if (text_start < s)
- s = text_start;
- else
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
+ map_offset = reg->map_offset;
+ s = text;
- while (s >= text) {
- p = s;
- t = target;
- while (t < target_end && *p == *t) {
- p++; t++;
- }
- if (t == target_end)
+ while (s < end) {
+ if (str_lower_case_match(enc, case_fold_flag, target, target_end,
+ s, text_end))
return (UChar* )s;
- s -= reg->int_map_backward[*s];
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
+ se = s + tlen1;
+ if (se + map_offset >= text_end) break;
+ skip = reg->map[*(se + map_offset)];
+#if 0
+ p = s;
+ do {
+ s += enclen(enc, s);
+ } while ((s - p) < skip && s < end);
+#else
+ /* This is faster than prev code for long text. ex: /(?i)Twain/ */
+ s += skip;
+ if (s < end)
+ s = onigenc_get_right_adjust_char_head(enc, text, s);
+#endif
}
return (UChar* )NULL;
}
-#endif
static UChar*
map_search(OnigEncoding enc, UChar map[],
@@ -4380,20 +4348,26 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
retry:
switch (reg->optimize) {
- case OPTIMIZE_EXACT:
+ case OPTIMIZE_STR:
p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
break;
- case OPTIMIZE_EXACT_IC:
+ case OPTIMIZE_STR_CASE_FOLD:
p = slow_search_ic(reg->enc, reg->case_fold_flag,
reg->exact, reg->exact_end, p, end, range);
break;
- case OPTIMIZE_EXACT_BM:
- p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
+ case OPTIMIZE_STR_CASE_FOLD_FAST:
+ p = sunday_quick_search_case_fold(reg, reg->exact, reg->exact_end, p, end,
+ range);
+ break;
+
+ case OPTIMIZE_STR_FAST:
+ p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);
break;
- case OPTIMIZE_EXACT_BM_NO_REV:
- p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
+ case OPTIMIZE_STR_FAST_STEP_FORWARD:
+ p = sunday_quick_search_step_forward(reg, reg->exact, reg->exact_end,
+ p, end, range);
break;
case OPTIMIZE_MAP:
@@ -4413,7 +4387,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
UChar* prev;
switch (reg->sub_anchor) {
- case ANCHOR_BEGIN_LINE:
+ case ANCR_BEGIN_LINE:
if (!ON_STR_BEGIN(p)) {
prev = onigenc_get_prev_char_head(reg->enc,
(pprev ? pprev : str), p);
@@ -4422,7 +4396,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
}
break;
- case ANCHOR_END_LINE:
+ case ANCR_END_LINE:
if (ON_STR_END(p)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
@@ -4490,8 +4464,6 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
}
-#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
-
static int
backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
UChar* s, const UChar* range, UChar* adjrange,
@@ -4499,41 +4471,29 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
{
UChar *p;
+ if (range == 0) goto fail;
+
range += reg->dmin;
p = s;
retry:
switch (reg->optimize) {
- case OPTIMIZE_EXACT:
+ case OPTIMIZE_STR:
exact_method:
p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
range, adjrange, end, p);
break;
- case OPTIMIZE_EXACT_IC:
+ case OPTIMIZE_STR_CASE_FOLD:
+ case OPTIMIZE_STR_CASE_FOLD_FAST:
p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
reg->exact, reg->exact_end,
range, adjrange, end, p);
break;
- case OPTIMIZE_EXACT_BM:
- case OPTIMIZE_EXACT_BM_NO_REV:
-#ifdef USE_INT_MAP_BACKWARD
- if (IS_NULL(reg->int_map_backward)) {
- int r;
-
- if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
- goto exact_method;
-
- r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
- &(reg->int_map_backward));
- if (r != 0) return r;
- }
- p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
- end, p);
-#else
+ case OPTIMIZE_STR_FAST:
+ case OPTIMIZE_STR_FAST_STEP_FORWARD:
goto exact_method;
-#endif
break;
case OPTIMIZE_MAP:
@@ -4546,17 +4506,17 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
UChar* prev;
switch (reg->sub_anchor) {
- case ANCHOR_BEGIN_LINE:
+ case ANCR_BEGIN_LINE:
if (!ON_STR_BEGIN(p)) {
prev = onigenc_get_prev_char_head(reg->enc, str, p);
- if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
+ if (IS_NOT_NULL(prev) && !ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
p = prev;
goto retry;
}
}
break;
- case ANCHOR_END_LINE:
+ case ANCR_END_LINE:
if (ON_STR_END(p)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
@@ -4682,7 +4642,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
if (reg->anchor != 0 && str < end) {
UChar *min_semi_end, *max_semi_end;
- if (reg->anchor & ANCHOR_BEGIN_POSITION) {
+ if (reg->anchor & ANCR_BEGIN_POSITION) {
/* search start-position only */
begin_position:
if (range > start)
@@ -4690,7 +4650,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
else
range = start;
}
- else if (reg->anchor & ANCHOR_BEGIN_BUF) {
+ else if (reg->anchor & ANCR_BEGIN_BUF) {
/* search str-position only */
if (range > start) {
if (start != str) goto mismatch_no_msa;
@@ -4705,7 +4665,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
goto mismatch_no_msa;
}
}
- else if (reg->anchor & ANCHOR_END_BUF) {
+ else if (reg->anchor & ANCR_END_BUF) {
min_semi_end = max_semi_end = (UChar* )end;
end_buf:
@@ -4737,7 +4697,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
if (range > start) goto mismatch_no_msa;
}
}
- else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
+ else if (reg->anchor & ANCR_SEMI_END_BUF) {
UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
max_semi_end = (UChar* )end;
@@ -4760,7 +4720,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
goto end_buf;
}
}
- else if ((reg->anchor & ANCHOR_ANYCHAR_INF_ML)) {
+ else if ((reg->anchor & ANCR_ANYCHAR_INF_ML)) {
goto begin_position;
}
}
@@ -4833,13 +4793,13 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
if (! forward_search_range(reg, str, end, s, sch_range,
&low, &high, (UChar** )NULL)) goto mismatch;
- if ((reg->anchor & ANCHOR_ANYCHAR_INF) != 0) {
+ if ((reg->anchor & ANCR_ANYCHAR_INF) != 0) {
do {
MATCH_AND_RETURN_CHECK(orig_range);
prev = s;
s += enclen(reg->enc, s);
- if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
+ if ((reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) {
while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
prev = s;
s += enclen(reg->enc, s);
@@ -4862,6 +4822,8 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
}
}
else { /* backward search */
+ if (range < str) goto mismatch;
+
if (orig_start < end)
orig_start += enclen(reg->enc, orig_start); /* is upper range */
diff --git a/src/regint.h b/src/regint.h
index c3d1ee1..d6aec9d 100644
--- a/src/regint.h
+++ b/src/regint.h
@@ -62,7 +62,6 @@
#define USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
-
#define USE_RETRY_LIMIT_IN_MATCH
/* internal config */
@@ -70,27 +69,13 @@
#define USE_QUANT_PEEK_NEXT
#define USE_ST_LIBRARY
-#include "regenc.h"
-
-#ifdef __cplusplus
-# ifndef HAVE_STDARG_PROTOTYPES
-# define HAVE_STDARG_PROTOTYPES 1
-# endif
-#endif
-
-/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
-#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
-# ifndef HAVE_STDARG_PROTOTYPES
-# define HAVE_STDARG_PROTOTYPES 1
-# endif
-#endif
-
-#ifdef HAVE_STDARG_H
-# ifndef HAVE_STDARG_PROTOTYPES
-# define HAVE_STDARG_PROTOTYPES 1
-# endif
-#endif
+#define USE_WORD_BEGIN_END /* "\<", "\>" */
+#define USE_CAPTURE_HISTORY
+#define USE_VARIABLE_META_CHARS
+#define USE_POSIX_API_REGION_OPTION
+#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+#include "regenc.h"
#define INIT_MATCH_STACK_SIZE 160
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
@@ -103,12 +88,6 @@
#undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
-#define USE_WORD_BEGIN_END /* "\<", "\>" */
-#define USE_CAPTURE_HISTORY
-#define USE_VARIABLE_META_CHARS
-#define USE_POSIX_API_REGION_OPTION
-#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-
#define xmalloc malloc
#define xrealloc realloc
#define xcalloc calloc
@@ -152,14 +131,8 @@
#include <stddef.h>
-
-#ifdef HAVE_LIMITS_H
#include <limits.h>
-#endif
-
-#ifdef HAVE_STDLIB_H
#include <stdlib.h>
-#endif
#ifdef HAVE_STDINT_H
#include <stdint.h>
@@ -169,11 +142,7 @@
#include <alloca.h>
#endif
-#ifdef HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-#endif
+#include <string.h>
#include <ctype.h>
#ifdef HAVE_SYS_TYPES_H
@@ -217,6 +186,7 @@ typedef unsigned int uintptr_t;
#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY
#define NULL_UCHARP ((UChar* )0)
+#define CHAR_MAP_SIZE 256
#define INFINITE_LEN ONIG_INFINITE_DISTANCE
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
@@ -292,9 +262,6 @@ typedef struct {
#endif
} RegexExt;
-#define REG_EXTP(reg) ((RegexExt* )((reg)->chain))
-#define REG_EXTPL(reg) ((reg)->chain)
-
struct re_pattern_buffer {
/* common members of BBuf(bytes-buffer) */
unsigned char* p; /* compiled pattern */
@@ -304,7 +271,6 @@ struct re_pattern_buffer {
int num_mem; /* used memory(...) num counted from 1 */
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */
- int num_comb_exp_check; /* no longer used (combination explosion check) */
int num_call; /* number of subexp call */
unsigned int capture_history; /* (?@...) flag (1-31) */
unsigned int bt_mem_start; /* need backtrack flag */
@@ -323,19 +289,16 @@ struct re_pattern_buffer {
int optimize; /* optimize flag */
int threshold_len; /* search str-length for apply optimize */
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
- OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */
- OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */
+ OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */
+ OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */
int sub_anchor; /* start-anchor for exact or map */
unsigned char *exact;
unsigned char *exact_end;
- unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
- int *int_map; /* BM skip for exact_len > 255 */
- int *int_map_backward; /* BM skip for backward search */
- OnigLen dmin; /* min-distance of exact or map */
- OnigLen dmax; /* max-distance of exact or map */
-
- /* regex_t link chain */
- struct re_pattern_buffer* chain; /* escape compile-conflict */
+ unsigned char map[CHAR_MAP_SIZE]; /* used as BMH skip or char-map */
+ int map_offset;
+ OnigLen dmin; /* min-distance of exact or map */
+ OnigLen dmax; /* max-distance of exact or map */
+ RegexExt* extp;
};
@@ -348,12 +311,13 @@ enum StackPopLevel {
/* optimize flags */
enum OptimizeType {
- OPTIMIZE_NONE = 0,
- OPTIMIZE_EXACT = 1, /* Slow Search */
- OPTIMIZE_EXACT_BM = 2, /* Boyer Moore Search */
- OPTIMIZE_EXACT_BM_NO_REV = 3, /* BM (but not simple match) */
- OPTIMIZE_EXACT_IC = 4, /* Slow Search (ignore case) */
- OPTIMIZE_MAP = 5 /* char map */
+ OPTIMIZE_NONE = 0,
+ OPTIMIZE_STR, /* Slow Search */
+ OPTIMIZE_STR_FAST, /* Sunday quick search / BMH */
+ OPTIMIZE_STR_FAST_STEP_FORWARD, /* Sunday quick search / BMH */
+ OPTIMIZE_STR_CASE_FOLD_FAST, /* Sunday quick search / BMH (ignore case) */
+ OPTIMIZE_STR_CASE_FOLD, /* Slow Search (ignore case) */
+ OPTIMIZE_MAP /* char map */
};
/* bit status */
@@ -541,32 +505,32 @@ typedef struct _BBuf {
/* has body */
-#define ANCHOR_PREC_READ (1<<0)
-#define ANCHOR_PREC_READ_NOT (1<<1)
-#define ANCHOR_LOOK_BEHIND (1<<2)
-#define ANCHOR_LOOK_BEHIND_NOT (1<<3)
+#define ANCR_PREC_READ (1<<0)
+#define ANCR_PREC_READ_NOT (1<<1)
+#define ANCR_LOOK_BEHIND (1<<2)
+#define ANCR_LOOK_BEHIND_NOT (1<<3)
/* no body */
-#define ANCHOR_BEGIN_BUF (1<<4)
-#define ANCHOR_BEGIN_LINE (1<<5)
-#define ANCHOR_BEGIN_POSITION (1<<6)
-#define ANCHOR_END_BUF (1<<7)
-#define ANCHOR_SEMI_END_BUF (1<<8)
-#define ANCHOR_END_LINE (1<<9)
-#define ANCHOR_WORD_BOUNDARY (1<<10)
-#define ANCHOR_NO_WORD_BOUNDARY (1<<11)
-#define ANCHOR_WORD_BEGIN (1<<12)
-#define ANCHOR_WORD_END (1<<13)
-#define ANCHOR_ANYCHAR_INF (1<<14)
-#define ANCHOR_ANYCHAR_INF_ML (1<<15)
-#define ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<16)
-#define ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<17)
-
-
-#define ANCHOR_HAS_BODY(a) ((a)->type < ANCHOR_BEGIN_BUF)
+#define ANCR_BEGIN_BUF (1<<4)
+#define ANCR_BEGIN_LINE (1<<5)
+#define ANCR_BEGIN_POSITION (1<<6)
+#define ANCR_END_BUF (1<<7)
+#define ANCR_SEMI_END_BUF (1<<8)
+#define ANCR_END_LINE (1<<9)
+#define ANCR_WORD_BOUNDARY (1<<10)
+#define ANCR_NO_WORD_BOUNDARY (1<<11)
+#define ANCR_WORD_BEGIN (1<<12)
+#define ANCR_WORD_END (1<<13)
+#define ANCR_ANYCHAR_INF (1<<14)
+#define ANCR_ANYCHAR_INF_ML (1<<15)
+#define ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<16)
+#define ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<17)
+
+
+#define ANCHOR_HAS_BODY(a) ((a)->type < ANCR_BEGIN_BUF)
#define IS_WORD_ANCHOR_TYPE(type) \
- ((type) == ANCHOR_WORD_BOUNDARY || (type) == ANCHOR_NO_WORD_BOUNDARY || \
- (type) == ANCHOR_WORD_BEGIN || (type) == ANCHOR_WORD_END)
+ ((type) == ANCR_WORD_BOUNDARY || (type) == ANCR_NO_WORD_BOUNDARY || \
+ (type) == ANCR_WORD_BEGIN || (type) == ANCR_WORD_END)
/* operation code */
enum OpCode {
@@ -851,6 +815,7 @@ extern void onig_transfer P_((regex_t* to, regex_t* from));
extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc));
extern RegexExt* onig_get_regex_ext(regex_t* reg);
extern int onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end);
+extern int onig_positive_int_multiply(int x, int y);
#ifdef USE_CALLOUT
diff --git a/src/regparse.c b/src/regparse.c
index fcc05cf..9e42e71 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -71,7 +71,7 @@ OnigSyntaxType OnigSyntaxOniguruma = {
ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
- , ( SYN_GNU_REGEX_BV |
+ , ( SYN_GNU_REGEX_BV |
ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
@@ -113,7 +113,7 @@ OnigSyntaxType OnigSyntaxRuby = {
ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
- , ( SYN_GNU_REGEX_BV |
+ , ( SYN_GNU_REGEX_BV |
ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
@@ -198,17 +198,6 @@ onig_set_parse_depth_limit(unsigned int depth)
return 0;
}
-static int
-positive_int_multiply(int x, int y)
-{
- if (x == 0 || y == 0) return 0;
-
- if (x < INT_MAX / y)
- return x * y;
- else
- return -1;
-}
-
static void
bbuf_free(BBuf* bbuf)
{
@@ -966,6 +955,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
#ifdef USE_ST_LIBRARY
if (IS_NULL(t)) {
t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);
+ CHECK_NULL_RETURN_MEMERR(t);
reg->name_table = (void* )t;
}
e = (NameEntry* )xmalloc(sizeof(NameEntry));
@@ -1372,6 +1362,7 @@ callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,
#ifdef USE_ST_LIBRARY
if (IS_NULL(t)) {
t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);
+ CHECK_NULL_RETURN_MEMERR(t);
GlobalCalloutNameTable = t;
}
e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));
@@ -1571,6 +1562,7 @@ onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,
}
for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {
if (fe->arg_types[i] == ONIG_TYPE_STRING) {
+ if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT;
OnigValue* val = opt_defaults + j;
UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end);
CHECK_NULL_RETURN_MEMERR(ds);
@@ -1616,6 +1608,7 @@ onig_get_callout_start_func(regex_t* reg, int callout_num)
CalloutListEntry* e;
e = onig_reg_callout_list_at(reg, callout_num);
+ CHECK_NULL_RETURN(e);
return e->start_func;
}
@@ -1623,6 +1616,7 @@ extern const UChar*
onig_get_callout_tag_start(regex_t* reg, int callout_num)
{
CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
+ CHECK_NULL_RETURN(e);
return e->tag_start;
}
@@ -1630,6 +1624,7 @@ extern const UChar*
onig_get_callout_tag_end(regex_t* reg, int callout_num)
{
CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
+ CHECK_NULL_RETURN(e);
return e->tag_end;
}
@@ -1736,7 +1731,7 @@ setup_ext_callout_list_values(regex_t* reg)
int i, j;
RegexExt* ext;
- ext = REG_EXTP(reg);
+ ext = reg->extp;
if (IS_NOT_NULL(ext->tag_table)) {
onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,
(st_data_t )ext);
@@ -1766,13 +1761,13 @@ setup_ext_callout_list_values(regex_t* reg)
extern int
onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)
{
- RegexExt* ext = REG_EXTP(reg);
+ RegexExt* ext = reg->extp;
if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;
if (callout_num > ext->callout_num) return 0;
return (ext->callout_list[callout_num].flag &
- CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0 ? 1 : 0;
+ CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0;
}
static int
@@ -1814,7 +1809,7 @@ onig_get_callout_num_by_tag(regex_t* reg,
RegexExt* ext;
CalloutTagVal e;
- ext = REG_EXTP(reg);
+ ext = reg->extp;
if (IS_NULL(ext) || IS_NULL(ext->tag_table))
return ONIGERR_INVALID_CALLOUT_TAG_NAME;
@@ -1901,9 +1896,11 @@ callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,
if (r != ONIG_NORMAL) return r;
ext = onig_get_regex_ext(reg);
+ CHECK_NULL_RETURN_MEMERR(ext);
r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val);
e = onig_reg_callout_list_at(reg, (int )entry_val);
+ CHECK_NULL_RETURN_MEMERR(e);
e->tag_start = name;
e->tag_end = name_end;
@@ -2008,7 +2005,7 @@ onig_node_free(Node* node)
switch (NODE_TYPE(node)) {
case NODE_STRING:
- if (STR_(node)->capa != 0 &&
+ if (STR_(node)->capacity != 0 &&
IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
xfree(STR_(node)->s);
}
@@ -2040,13 +2037,13 @@ onig_node_free(Node* node)
xfree(BACKREF_(node)->back_dynamic);
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
if (NODE_BODY(node))
onig_node_free(NODE_BODY(node));
{
- EnclosureNode* en = ENCLOSURE_(node);
- if (en->type == ENCLOSURE_IF_ELSE) {
+ BagNode* en = BAG_(node);
+ if (en->type == BAG_IF_ELSE) {
onig_node_free(en->te.Then);
onig_node_free(en->te.Else);
}
@@ -2082,6 +2079,7 @@ node_new(void)
Node* node;
node = (Node* )xmalloc(sizeof(Node));
+ CHECK_NULL_RETURN(node);
xmemset(node, 0, sizeof(*node));
#ifdef DEBUG_NODE_FREE
@@ -2138,6 +2136,8 @@ node_new_anychar_with_fixed_option(OnigOptionType option)
Node* node;
node = node_new_anychar();
+ CHECK_NULL_RETURN(node);
+
ct = CTYPE_(node);
ct->options = option;
NODE_STATUS_ADD(node, FIXED_OPTION);
@@ -2381,62 +2381,62 @@ node_new_quantifier(int lower, int upper, int by_number)
}
static Node*
-node_new_enclosure(enum EnclosureType type)
+node_new_bag(enum BagType type)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
- NODE_SET_TYPE(node, NODE_ENCLOSURE);
- ENCLOSURE_(node)->type = type;
+ NODE_SET_TYPE(node, NODE_BAG);
+ BAG_(node)->type = type;
switch (type) {
- case ENCLOSURE_MEMORY:
- ENCLOSURE_(node)->m.regnum = 0;
- ENCLOSURE_(node)->m.called_addr = -1;
- ENCLOSURE_(node)->m.entry_count = 1;
- ENCLOSURE_(node)->m.called_state = 0;
+ case BAG_MEMORY:
+ BAG_(node)->m.regnum = 0;
+ BAG_(node)->m.called_addr = -1;
+ BAG_(node)->m.entry_count = 1;
+ BAG_(node)->m.called_state = 0;
break;
- case ENCLOSURE_OPTION:
- ENCLOSURE_(node)->o.options = 0;
+ case BAG_OPTION:
+ BAG_(node)->o.options = 0;
break;
- case ENCLOSURE_STOP_BACKTRACK:
+ case BAG_STOP_BACKTRACK:
break;
- case ENCLOSURE_IF_ELSE:
- ENCLOSURE_(node)->te.Then = 0;
- ENCLOSURE_(node)->te.Else = 0;
+ case BAG_IF_ELSE:
+ BAG_(node)->te.Then = 0;
+ BAG_(node)->te.Else = 0;
break;
}
- ENCLOSURE_(node)->opt_count = 0;
+ BAG_(node)->opt_count = 0;
return node;
}
extern Node*
-onig_node_new_enclosure(int type)
+onig_node_new_bag(enum BagType type)
{
- return node_new_enclosure(type);
+ return node_new_bag(type);
}
static Node*
-node_new_enclosure_if_else(Node* cond, Node* Then, Node* Else)
+node_new_bag_if_else(Node* cond, Node* Then, Node* Else)
{
Node* n;
- n = node_new_enclosure(ENCLOSURE_IF_ELSE);
+ n = node_new_bag(BAG_IF_ELSE);
CHECK_NULL_RETURN(n);
NODE_BODY(n) = cond;
- ENCLOSURE_(n)->te.Then = Then;
- ENCLOSURE_(n)->te.Else = Else;
+ BAG_(n)->te.Then = Then;
+ BAG_(n)->te.Else = Else;
return n;
}
static Node*
node_new_memory(int is_named)
{
- Node* node = node_new_enclosure(ENCLOSURE_MEMORY);
+ Node* node = node_new_bag(BAG_MEMORY);
CHECK_NULL_RETURN(node);
if (is_named != 0)
NODE_STATUS_ADD(node, NAMED_GROUP);
@@ -2447,12 +2447,37 @@ node_new_memory(int is_named)
static Node*
node_new_option(OnigOptionType option)
{
- Node* node = node_new_enclosure(ENCLOSURE_OPTION);
+ Node* node = node_new_bag(BAG_OPTION);
+ CHECK_NULL_RETURN(node);
+ BAG_(node)->o.options = option;
+ return node;
+}
+
+static Node*
+node_new_group(Node* content)
+{
+ Node* node;
+
+ node = node_new();
CHECK_NULL_RETURN(node);
- ENCLOSURE_(node)->o.options = option;
+ NODE_SET_TYPE(node, NODE_LIST);
+ NODE_CAR(node) = content;
+ NODE_CDR(node) = NULL_NODE;
+
return node;
}
+static Node*
+node_drop_group(Node* group)
+{
+ Node* content;
+
+ content = NODE_CAR(group);
+ NODE_CAR(group) = NULL_NODE;
+ onig_node_free(group);
+ return content;
+}
+
static int
node_new_fail(Node** node, ScanEnv* env)
{
@@ -2543,7 +2568,7 @@ onig_free_reg_callout_list(int n, CalloutListEntry* list)
extern CalloutListEntry*
onig_reg_callout_list_at(regex_t* reg, int num)
{
- RegexExt* ext = REG_EXTP(reg);
+ RegexExt* ext = reg->extp;
CHECK_NULL_RETURN(ext);
if (num <= 0 || num > ext->callout_num)
@@ -2634,7 +2659,7 @@ make_extended_grapheme_cluster(Node** node, ScanEnv* env)
ns[1] = NULL_NODE;
r = ONIGERR_MEMORY;
- ns[0] = onig_node_new_anchor(ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0);
+ ns[0] = onig_node_new_anchor(ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0);
if (IS_NULL(ns[0])) goto err;
r = node_new_true_anychar(&ns[1], env);
@@ -2661,7 +2686,7 @@ make_extended_grapheme_cluster(Node** node, ScanEnv* env)
ns[0] = x;
ns[1] = NULL_NODE;
- x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
+ x = node_new_bag(BAG_STOP_BACKTRACK);
if (IS_NULL(x)) goto err;
NODE_BODY(x) = ns[0];
@@ -2721,7 +2746,7 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
ns[0] = x;
if (possessive != 0) {
- x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
+ x = node_new_bag(BAG_STOP_BACKTRACK);
if (IS_NULL(x)) goto err0;
NODE_BODY(x) = ns[0];
@@ -2873,11 +2898,11 @@ is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,
quant = node;
}
else {
- if (NODE_TYPE(node) == NODE_ENCLOSURE) {
- EnclosureNode* en = ENCLOSURE_(node);
- if (en->type == ENCLOSURE_STOP_BACKTRACK) {
+ if (NODE_TYPE(node) == NODE_BAG) {
+ BagNode* en = BAG_(node);
+ if (en->type == BAG_STOP_BACKTRACK) {
*is_possessive = 1;
- quant = NODE_ENCLOSURE_BODY(en);
+ quant = NODE_BAG_BODY(en);
if (NODE_TYPE(quant) != NODE_QUANT)
return 0;
}
@@ -3054,7 +3079,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
else {
r = make_absent_tail(&ns[5], &ns[6], id1, env);
if (r != 0) goto err;
-
+
x = make_list(7, ns);
if (IS_NULL(x)) goto err0;
}
@@ -3066,7 +3091,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
r = ONIGERR_MEMORY;
err:
for (i = 0; i < 7; i++) onig_node_free(ns[i]);
- return r;
+ return r;
}
extern int
@@ -3077,11 +3102,11 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
if (addlen > 0) {
int len = (int )(STR_(node)->end - STR_(node)->s);
- if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {
+ if (STR_(node)->capacity > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {
UChar* p;
int capa = len + addlen + NODE_STRING_MARGIN;
- if (capa <= STR_(node)->capa) {
+ if (capa <= STR_(node)->capacity) {
onig_strcpy(STR_(node)->s + len, s, end);
}
else {
@@ -3092,8 +3117,8 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa);
CHECK_NULL_RETURN_MEMERR(p);
- STR_(node)->s = p;
- STR_(node)->capa = capa;
+ STR_(node)->s = p;
+ STR_(node)->capacity = capa;
}
}
else {
@@ -3125,24 +3150,24 @@ extern void
onig_node_conv_to_str_node(Node* node, int flag)
{
NODE_SET_TYPE(node, NODE_STRING);
- STR_(node)->flag = flag;
- STR_(node)->capa = 0;
- STR_(node)->s = STR_(node)->buf;
- STR_(node)->end = STR_(node)->buf;
+ STR_(node)->flag = flag;
+ STR_(node)->capacity = 0;
+ STR_(node)->s = STR_(node)->buf;
+ STR_(node)->end = STR_(node)->buf;
}
extern void
onig_node_str_clear(Node* node)
{
- if (STR_(node)->capa != 0 &&
+ if (STR_(node)->capacity != 0 &&
IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
xfree(STR_(node)->s);
}
- STR_(node)->capa = 0;
- STR_(node)->flag = 0;
- STR_(node)->s = STR_(node)->buf;
- STR_(node)->end = STR_(node)->buf;
+ STR_(node)->capacity = 0;
+ STR_(node)->flag = 0;
+ STR_(node)->s = STR_(node)->buf;
+ STR_(node)->end = STR_(node)->buf;
}
static Node*
@@ -3152,10 +3177,10 @@ node_new_str(const UChar* s, const UChar* end)
CHECK_NULL_RETURN(node);
NODE_SET_TYPE(node, NODE_STRING);
- STR_(node)->capa = 0;
- STR_(node)->flag = 0;
- STR_(node)->s = STR_(node)->buf;
- STR_(node)->end = STR_(node)->buf;
+ STR_(node)->capacity = 0;
+ STR_(node)->flag = 0;
+ STR_(node)->s = STR_(node)->buf;
+ STR_(node)->end = STR_(node)->buf;
if (onig_node_str_cat(node, s, end)) {
onig_node_free(node);
return NULL;
@@ -3173,6 +3198,7 @@ static Node*
node_new_str_raw(UChar* s, UChar* end)
{
Node* node = node_new_str(s, end);
+ CHECK_NULL_RETURN(node);
NODE_STRING_SET_RAW(node);
return node;
}
@@ -3205,6 +3231,7 @@ str_node_split_last_char(Node* node, OnigEncoding enc)
p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
if (p && p > sn->s) { /* can be split. */
rn = node_new_str(p, sn->end);
+ CHECK_NULL_RETURN(rn);
if (NODE_STRING_IS_RAW(node))
NODE_STRING_SET_RAW(rn);
@@ -3795,7 +3822,7 @@ is_invalid_quantifier_target(Node* node)
return 1;
break;
- case NODE_ENCLOSURE:
+ case NODE_BAG:
/* allow enclosed elements */
/* return is_invalid_quantifier_target(NODE_BODY(node)); */
break;
@@ -3877,7 +3904,7 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
if (pnum < 0 || cnum < 0) {
if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) {
if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) {
- int n = positive_int_multiply(p->lower, c->lower);
+ int n = onig_positive_int_multiply(p->lower, c->lower);
if (n >= 0) {
p->lower = p->upper = n;
NODE_BODY(pnode) = NODE_BODY(cnode);
@@ -3972,7 +3999,7 @@ node_new_general_newline(Node** node, ScanEnv* env)
if (r != 0) goto err1;
}
- x = node_new_enclosure_if_else(crnl, 0, ncc);
+ x = node_new_bag_if_else(crnl, 0, ncc);
if (IS_NULL(x)) goto err1;
*node = x;
@@ -4552,7 +4579,7 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
OnigCodePoint x;
UChar *q;
UChar *p = from;
-
+
while (p < to) {
x = ONIGENC_MBC_TO_CODE(enc, p, to);
q = p + enclen(enc, p);
@@ -4701,12 +4728,12 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
PINC;
tok->type = TK_CHAR_PROPERTY;
- tok->u.prop.not = (c == 'P' ? 1 : 0);
+ tok->u.prop.not = c == 'P';
if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
PFETCH(c2);
if (c2 == '^') {
- tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
+ tok->u.prop.not = tok->u.prop.not == 0;
}
else
PUNFETCH;
@@ -4986,38 +5013,38 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case 'b':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
tok->type = TK_ANCHOR;
- tok->u.anchor = ANCHOR_WORD_BOUNDARY;
+ tok->u.anchor = ANCR_WORD_BOUNDARY;
break;
case 'B':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
tok->type = TK_ANCHOR;
- tok->u.anchor = ANCHOR_NO_WORD_BOUNDARY;
+ tok->u.anchor = ANCR_NO_WORD_BOUNDARY;
break;
case 'y':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;
tok->type = TK_ANCHOR;
- tok->u.anchor = ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
+ tok->u.anchor = ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
break;
case 'Y':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;
tok->type = TK_ANCHOR;
- tok->u.anchor = ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
+ tok->u.anchor = ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
break;
#ifdef USE_WORD_BEGIN_END
case '<':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
tok->type = TK_ANCHOR;
- tok->u.anchor = ANCHOR_WORD_BEGIN;
+ tok->u.anchor = ANCR_WORD_BEGIN;
break;
case '>':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
tok->type = TK_ANCHOR;
- tok->u.anchor = ANCHOR_WORD_END;
+ tok->u.anchor = ANCR_WORD_END;
break;
#endif
@@ -5092,26 +5119,26 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
begin_buf:
tok->type = TK_ANCHOR;
- tok->u.subtype = ANCHOR_BEGIN_BUF;
+ tok->u.subtype = ANCR_BEGIN_BUF;
break;
case 'Z':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
tok->type = TK_ANCHOR;
- tok->u.subtype = ANCHOR_SEMI_END_BUF;
+ tok->u.subtype = ANCR_SEMI_END_BUF;
break;
case 'z':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
end_buf:
tok->type = TK_ANCHOR;
- tok->u.subtype = ANCHOR_END_BUF;
+ tok->u.subtype = ANCR_END_BUF;
break;
case 'G':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
tok->type = TK_ANCHOR;
- tok->u.subtype = ANCHOR_BEGIN_POSITION;
+ tok->u.subtype = ANCR_BEGIN_POSITION;
break;
case '`':
@@ -5214,7 +5241,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
goto skip_backref;
}
- if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
(num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node))
@@ -5382,13 +5409,13 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
PINC;
tok->type = TK_CHAR_PROPERTY;
- tok->u.prop.not = (c == 'P' ? 1 : 0);
+ tok->u.prop.not = c == 'P';
if (!PEND &&
IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
PFETCH(c);
if (c == '^') {
- tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
+ tok->u.prop.not = tok->u.prop.not == 0;
}
else
PUNFETCH;
@@ -5606,14 +5633,14 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
tok->type = TK_ANCHOR;
tok->u.subtype = (IS_SINGLELINE(env->options)
- ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
+ ? ANCR_BEGIN_BUF : ANCR_BEGIN_LINE);
break;
case '$':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
tok->type = TK_ANCHOR;
tok->u.subtype = (IS_SINGLELINE(env->options)
- ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
+ ? ANCR_SEMI_END_BUF : ANCR_END_LINE);
break;
case '[':
@@ -6509,7 +6536,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* en
}
static int parse_subexp(Node** top, OnigToken* tok, int term,
- UChar** src, UChar* end, ScanEnv* env);
+ UChar** src, UChar* end, ScanEnv* env, int group_head);
#ifdef USE_CALLOUT
@@ -6605,6 +6632,7 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv
if (r != 0) return r;
ext = onig_get_regex_ext(env->reg);
+ CHECK_NULL_RETURN_MEMERR(ext);
if (IS_NULL(ext->pattern)) {
r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
if (r != ONIG_NORMAL) return r;
@@ -6625,6 +6653,11 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv
}
e = onig_reg_callout_list_at(env->reg, num);
+ if (IS_NULL(e)) {
+ xfree(contents);
+ return ONIGERR_MEMORY;
+ }
+
e->of = ONIG_CALLOUT_OF_CONTENTS;
e->in = in;
e->name_id = ONIG_NON_NAME_ID;
@@ -6920,6 +6953,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
if (r != 0) return r;
ext = onig_get_regex_ext(env->reg);
+ CHECK_NULL_RETURN_MEMERR(ext);
if (IS_NULL(ext->pattern)) {
r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
if (r != ONIG_NORMAL) return r;
@@ -6934,6 +6968,8 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
if (r != ONIG_NORMAL) return r;
e = onig_reg_callout_list_at(env->reg, num);
+ CHECK_NULL_RETURN_MEMERR(e);
+
e->of = ONIG_CALLOUT_OF_NAME;
e->in = in;
e->name_id = name_id;
@@ -6957,8 +6993,8 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
#endif
static int
-parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
- ScanEnv* env)
+parse_bag(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
+ ScanEnv* env)
{
int r, num;
Node *target;
@@ -6985,20 +7021,20 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
group:
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
- r = parse_subexp(np, tok, term, &p, end, env);
+ r = parse_subexp(np, tok, term, &p, end, env, 0);
if (r < 0) return r;
*src = p;
return 1; /* group */
break;
case '=':
- *np = onig_node_new_anchor(ANCHOR_PREC_READ, 0);
+ *np = onig_node_new_anchor(ANCR_PREC_READ, 0);
break;
case '!': /* preceding read */
- *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT, 0);
+ *np = onig_node_new_anchor(ANCR_PREC_READ_NOT, 0);
break;
case '>': /* (?>...) stop backtrack */
- *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
+ *np = node_new_bag(BAG_STOP_BACKTRACK);
break;
case '\'':
@@ -7013,9 +7049,9 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
PFETCH(c);
if (c == '=')
- *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND, 0);
+ *np = onig_node_new_anchor(ANCR_LOOK_BEHIND, 0);
else if (c == '!')
- *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT, 0);
+ *np = onig_node_new_anchor(ANCR_LOOK_BEHIND_NOT, 0);
else {
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
UChar *name;
@@ -7043,7 +7079,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
if (r != 0) return r;
*np = node_new_memory(1);
CHECK_NULL_RETURN_MEMERR(*np);
- ENCLOSURE_(*np)->m.regnum = num;
+ BAG_(*np)->m.regnum = num;
if (list_capture != 0)
MEM_STATUS_ON_SIMPLE(env->capture_history, num);
env->num_named++;
@@ -7080,7 +7116,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
- r = parse_subexp(&absent, tok, term, &p, end, env);
+ r = parse_subexp(&absent, tok, term, &p, end, env, 1);
if (r < 0) {
onig_node_free(absent);
return r;
@@ -7258,7 +7294,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
condition_is_checker = 0;
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
- r = parse_subexp(&condition, tok, term, &p, end, env);
+ r = parse_subexp(&condition, tok, term, &p, end, env, 0);
if (r < 0) {
onig_node_free(condition);
return r;
@@ -7299,7 +7335,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
onig_node_free(condition);
return r;
}
- r = parse_subexp(&target, tok, term, &p, end, env);
+ r = parse_subexp(&target, tok, term, &p, end, env, 1);
if (r < 0) {
onig_node_free(condition);
onig_node_free(target);
@@ -7327,7 +7363,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
}
}
- *np = node_new_enclosure_if_else(condition, Then, Else);
+ *np = node_new_bag_if_else(condition, Then, Else);
if (IS_NULL(*np)) {
onig_node_free(condition);
onig_node_free(Then);
@@ -7362,7 +7398,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
else if (num >= (int )MEM_STATUS_BITS_NUM) {
return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
}
- ENCLOSURE_(*np)->m.regnum = num;
+ BAG_(*np)->m.regnum = num;
MEM_STATUS_ON_SIMPLE(env->capture_history, num);
}
else {
@@ -7431,7 +7467,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
env->options = option;
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
- r = parse_subexp(&target, tok, term, &p, end, env);
+ r = parse_subexp(&target, tok, term, &p, end, env, 0);
env->options = prev;
if (r < 0) {
onig_node_free(target);
@@ -7472,13 +7508,13 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
CHECK_NULL_RETURN_MEMERR(*np);
num = scan_env_add_mem_entry(env);
if (num < 0) return num;
- ENCLOSURE_(*np)->m.regnum = num;
+ BAG_(*np)->m.regnum = num;
}
CHECK_NULL_RETURN_MEMERR(*np);
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
- r = parse_subexp(&target, tok, term, &p, end, env);
+ r = parse_subexp(&target, tok, term, &p, end, env, 0);
if (r < 0) {
onig_node_free(target);
return r;
@@ -7486,10 +7522,10 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
NODE_BODY(*np) = target;
- if (NODE_TYPE(*np) == NODE_ENCLOSURE) {
- if (ENCLOSURE_(*np)->type == ENCLOSURE_MEMORY) {
+ if (NODE_TYPE(*np) == NODE_BAG) {
+ if (BAG_(*np)->type == BAG_MEMORY) {
/* Don't move this to previous of parse_subexp() */
- r = scan_env_set_mem_node(env, ENCLOSURE_(*np)->m.regnum, *np);
+ r = scan_env_set_mem_node(env, BAG_(*np)->m.regnum, *np);
if (r != 0) return r;
}
}
@@ -7518,7 +7554,7 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
switch (NODE_TYPE(target)) {
case NODE_STRING:
- if (! group) {
+ if (group == 0) {
if (str_node_can_be_split(target, env->enc)) {
Node* n = str_node_split_last_char(target, env->enc);
if (IS_NOT_NULL(n)) {
@@ -7710,7 +7746,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)
static int
parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
- ScanEnv* env)
+ ScanEnv* env, int group_head)
{
int r, len, group = 0;
Node* qn;
@@ -7724,22 +7760,35 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
case TK_ALT:
case TK_EOT:
end_of_token:
- *np = node_new_empty();
- return tok->type;
+ *np = node_new_empty();
+ CHECK_NULL_RETURN_MEMERR(*np);
+ return tok->type;
break;
case TK_SUBEXP_OPEN:
- r = parse_enclosure(np, tok, TK_SUBEXP_CLOSE, src, end, env);
+ r = parse_bag(np, tok, TK_SUBEXP_CLOSE, src, end, env);
if (r < 0) return r;
- if (r == 1) group = 1;
+ if (r == 1) { /* group */
+ if (group_head == 0)
+ group = 1;
+ else {
+ Node* target = *np;
+ *np = node_new_group(target);
+ if (IS_NULL(*np)) {
+ onig_node_free(target);
+ return ONIGERR_MEMORY;
+ }
+ group = 2;
+ }
+ }
else if (r == 2) { /* option only */
Node* target;
OnigOptionType prev = env->options;
- env->options = ENCLOSURE_(*np)->o.options;
+ env->options = BAG_(*np)->o.options;
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
- r = parse_subexp(&target, tok, term, src, end, env);
+ r = parse_subexp(&target, tok, term, src, end, env, 0);
env->options = prev;
if (r < 0) {
onig_node_free(target);
@@ -7968,6 +8017,7 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
int ascii_mode =
IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0;
*np = onig_node_new_anchor(tok->u.anchor, ascii_mode);
+ CHECK_NULL_RETURN_MEMERR(*np);
}
break;
@@ -7976,8 +8026,10 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
- else
+ else {
*np = node_new_empty();
+ CHECK_NULL_RETURN_MEMERR(*np);
+ }
}
else {
goto tk_byte;
@@ -8023,14 +8075,23 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
repeat:
if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
+ Node* target;
+
if (is_invalid_quantifier_target(*targetp))
return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
- (r == TK_INTERVAL ? 1 : 0));
+ r == TK_INTERVAL);
CHECK_NULL_RETURN_MEMERR(qn);
QUANT_(qn)->greedy = tok->u.repeat.greedy;
- r = set_quantifier(qn, *targetp, group, env);
+ if (group == 2) {
+ target = node_drop_group(*np);
+ *np = NULL_NODE;
+ }
+ else {
+ target = *targetp;
+ }
+ r = set_quantifier(qn, target, group, env);
if (r < 0) {
onig_node_free(qn);
return r;
@@ -8038,7 +8099,7 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
if (tok->u.repeat.possessive != 0) {
Node* en;
- en = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
+ en = node_new_bag(BAG_STOP_BACKTRACK);
if (IS_NULL(en)) {
onig_node_free(qn);
return ONIGERR_MEMORY;
@@ -8077,13 +8138,13 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
static int
parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,
- ScanEnv* env)
+ ScanEnv* env, int group_head)
{
int r;
Node *node, **headp;
*top = NULL;
- r = parse_exp(&node, tok, term, src, end, env);
+ r = parse_exp(&node, tok, term, src, end, env, group_head);
if (r < 0) {
onig_node_free(node);
return r;
@@ -8094,9 +8155,14 @@ parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,
}
else {
*top = node_new_list(node, NULL);
+ if (IS_NULL(*top)) {
+ onig_node_free(node);
+ return ONIGERR_MEMORY;
+ }
+
headp = &(NODE_CDR(*top));
while (r != TK_EOT && r != term && r != TK_ALT) {
- r = parse_exp(&node, tok, term, src, end, env);
+ r = parse_exp(&node, tok, term, src, end, env, 0);
if (r < 0) {
onig_node_free(node);
return r;
@@ -8120,7 +8186,7 @@ parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,
/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
static int
parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,
- ScanEnv* env)
+ ScanEnv* env, int group_head)
{
int r;
Node *node, **headp;
@@ -8129,7 +8195,8 @@ parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,
env->parse_depth++;
if (env->parse_depth > ParseDepthLimit)
return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
- r = parse_branch(&node, tok, term, src, end, env);
+
+ r = parse_branch(&node, tok, term, src, end, env, group_head);
if (r < 0) {
onig_node_free(node);
return r;
@@ -8140,16 +8207,27 @@ parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,
}
else if (r == TK_ALT) {
*top = onig_node_new_alt(node, NULL);
+ if (IS_NULL(*top)) {
+ onig_node_free(node);
+ return ONIGERR_MEMORY;
+ }
+
headp = &(NODE_CDR(*top));
while (r == TK_ALT) {
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
- r = parse_branch(&node, tok, term, src, end, env);
+ r = parse_branch(&node, tok, term, src, end, env, 0);
if (r < 0) {
onig_node_free(node);
return r;
}
*headp = onig_node_new_alt(node, NULL);
+ if (IS_NULL(*headp)) {
+ onig_node_free(node);
+ onig_node_free(*top);
+ return ONIGERR_MEMORY;
+ }
+
headp = &(NODE_CDR(*headp));
}
@@ -8177,7 +8255,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
r = fetch_token(&tok, src, end, env);
if (r < 0) return r;
- r = parse_subexp(top, &tok, TK_EOT, src, end, env);
+ r = parse_subexp(top, &tok, TK_EOT, src, end, env, 0);
if (r < 0) return r;
return 0;
@@ -8193,7 +8271,7 @@ make_call_zero_body(Node* node, ScanEnv* env, Node** rnode)
CHECK_NULL_RETURN_MEMERR(x);
NODE_BODY(x) = node;
- ENCLOSURE_(x)->m.regnum = 0;
+ BAG_(x)->m.regnum = 0;
r = scan_env_set_mem_node(env, 0, x);
if (r != 0) {
onig_node_free(x);
@@ -8249,7 +8327,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
reg->num_mem = env->num_mem;
#ifdef USE_CALLOUT
- ext = REG_EXTP(reg);
+ ext = reg->extp;
if (IS_NOT_NULL(ext) && ext->callout_num > 0) {
r = setup_ext_callout_list_values(reg);
}
diff --git a/src/regparse.h b/src/regparse.h
index ff24eeb..ede9bb8 100644
--- a/src/regparse.h
+++ b/src/regparse.h
@@ -31,6 +31,10 @@
#include "regint.h"
+#define NODE_STRING_MARGIN 16
+#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
+#define NODE_BACKREFS_SIZE 6
+
/* node type */
typedef enum {
NODE_STRING = 0,
@@ -38,7 +42,7 @@ typedef enum {
NODE_CTYPE = 2,
NODE_BACKREF = 3,
NODE_QUANT = 4,
- NODE_ENCLOSURE = 5,
+ NODE_BAG = 5,
NODE_ANCHOR = 6,
NODE_LIST = 7,
NODE_ALT = 8,
@@ -46,95 +50,23 @@ typedef enum {
NODE_GIMMICK = 10
} NodeType;
+enum BagType {
+ BAG_MEMORY = 0,
+ BAG_OPTION = 1,
+ BAG_STOP_BACKTRACK = 2,
+ BAG_IF_ELSE = 3,
+};
+
enum GimmickType {
- GIMMICK_FAIL = 0,
- GIMMICK_KEEP = 1,
- GIMMICK_SAVE = 2,
+ GIMMICK_FAIL = 0,
+ GIMMICK_KEEP = 1,
+ GIMMICK_SAVE = 2,
GIMMICK_UPDATE_VAR = 3,
#ifdef USE_CALLOUT
- GIMMICK_CALLOUT = 4,
+ GIMMICK_CALLOUT = 4,
#endif
};
-
-/* node type bit */
-#define NODE_TYPE2BIT(type) (1<<(type))
-
-#define NODE_BIT_STRING NODE_TYPE2BIT(NODE_STRING)
-#define NODE_BIT_CCLASS NODE_TYPE2BIT(NODE_CCLASS)
-#define NODE_BIT_CTYPE NODE_TYPE2BIT(NODE_CTYPE)
-#define NODE_BIT_BACKREF NODE_TYPE2BIT(NODE_BACKREF)
-#define NODE_BIT_QUANT NODE_TYPE2BIT(NODE_QUANT)
-#define NODE_BIT_ENCLOSURE NODE_TYPE2BIT(NODE_ENCLOSURE)
-#define NODE_BIT_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR)
-#define NODE_BIT_LIST NODE_TYPE2BIT(NODE_LIST)
-#define NODE_BIT_ALT NODE_TYPE2BIT(NODE_ALT)
-#define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL)
-#define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK)
-
-#define NODE_IS_SIMPLE_TYPE(node) \
- ((NODE_TYPE2BIT(NODE_TYPE(node)) & \
- (NODE_BIT_STRING | NODE_BIT_CCLASS | NODE_BIT_CTYPE | NODE_BIT_BACKREF)) != 0)
-
-#define NODE_TYPE(node) ((node)->u.base.node_type)
-#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
-
-#define STR_(node) (&((node)->u.str))
-#define CCLASS_(node) (&((node)->u.cclass))
-#define CTYPE_(node) (&((node)->u.ctype))
-#define BACKREF_(node) (&((node)->u.backref))
-#define QUANT_(node) (&((node)->u.quant))
-#define ENCLOSURE_(node) (&((node)->u.enclosure))
-#define ANCHOR_(node) (&((node)->u.anchor))
-#define CONS_(node) (&((node)->u.cons))
-#define CALL_(node) (&((node)->u.call))
-#define GIMMICK_(node) (&((node)->u.gimmick))
-
-#define NODE_CAR(node) (CONS_(node)->car)
-#define NODE_CDR(node) (CONS_(node)->cdr)
-
-#define CTYPE_ANYCHAR -1
-#define NODE_IS_ANYCHAR(node) \
- (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)
-
-#define CTYPE_OPTION(node, reg) \
- (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options)
-
-
-#define ANCHOR_ANYCHAR_INF_MASK (ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML)
-#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
-
-enum EnclosureType {
- ENCLOSURE_MEMORY = 0,
- ENCLOSURE_OPTION = 1,
- ENCLOSURE_STOP_BACKTRACK = 2,
- ENCLOSURE_IF_ELSE = 3,
-};
-
-#define NODE_STRING_MARGIN 16
-#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
-#define NODE_BACKREFS_SIZE 6
-
-#define NODE_STRING_RAW (1<<0) /* by backslashed number */
-#define NODE_STRING_AMBIG (1<<1)
-#define NODE_STRING_DONT_GET_OPT_INFO (1<<2)
-
-#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s)
-#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= NODE_STRING_RAW
-#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NODE_STRING_RAW
-#define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= NODE_STRING_AMBIG
-#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \
- (node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO
-#define NODE_STRING_IS_RAW(node) \
- (((node)->u.str.flag & NODE_STRING_RAW) != 0)
-#define NODE_STRING_IS_AMBIG(node) \
- (((node)->u.str.flag & NODE_STRING_AMBIG) != 0)
-#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \
- (((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0)
-
-#define BACKREFS_P(br) \
- (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
-
enum QuantBodyEmpty {
QUANT_BODY_IS_NOT_EMPTY = 0,
QUANT_BODY_IS_EMPTY = 1,
@@ -142,65 +74,6 @@ enum QuantBodyEmpty {
QUANT_BODY_IS_EMPTY_REC = 3
};
-/* node status bits */
-#define NODE_ST_MIN_FIXED (1<<0)
-#define NODE_ST_MAX_FIXED (1<<1)
-#define NODE_ST_CLEN_FIXED (1<<2)
-#define NODE_ST_MARK1 (1<<3)
-#define NODE_ST_MARK2 (1<<4)
-#define NODE_ST_STOP_BT_SIMPLE_REPEAT (1<<5)
-#define NODE_ST_RECURSION (1<<6)
-#define NODE_ST_CALLED (1<<7)
-#define NODE_ST_ADDR_FIXED (1<<8)
-#define NODE_ST_NAMED_GROUP (1<<9)
-#define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */
-#define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */
-#define NODE_ST_IN_MULTI_ENTRY (1<<12)
-#define NODE_ST_NEST_LEVEL (1<<13)
-#define NODE_ST_BY_NUMBER (1<<14) /* {n,m} */
-#define NODE_ST_BY_NAME (1<<15) /* backref by name */
-#define NODE_ST_BACKREF (1<<16)
-#define NODE_ST_CHECKER (1<<17)
-#define NODE_ST_FIXED_OPTION (1<<18)
-#define NODE_ST_PROHIBIT_RECURSION (1<<19)
-#define NODE_ST_SUPER (1<<20)
-
-
-#define NODE_STATUS(node) (((Node* )node)->u.base.status)
-#define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (NODE_ST_ ## f))
-#define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(NODE_ST_ ## f))
-
-#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NODE_ST_BY_NUMBER) != 0)
-#define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_REAL_REPEAT) != 0)
-#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NODE_ST_CALLED) != 0)
-#define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NODE_ST_IN_MULTI_ENTRY) != 0)
-#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NODE_ST_RECURSION) != 0)
-#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0)
-#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP) != 0)
-#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED) != 0)
-#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED) != 0)
-#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MIN_FIXED) != 0)
-#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MAX_FIXED) != 0)
-#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NODE_ST_MARK1) != 0)
-#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NODE_ST_MARK2) != 0)
-#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL) != 0)
-#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NODE_ST_BY_NAME) != 0)
-#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NODE_ST_BACKREF) != 0)
-#define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NODE_ST_CHECKER) != 0)
-#define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0)
-#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0)
-#define NODE_IS_PROHIBIT_RECURSION(node) \
- ((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)
-#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \
- ((NODE_STATUS(node) & NODE_ST_STOP_BT_SIMPLE_REPEAT) != 0)
-
-#define NODE_BODY(node) ((node)->u.base.body)
-#define NODE_QUANT_BODY(node) ((node)->body)
-#define NODE_ENCLOSURE_BODY(node) ((node)->body)
-#define NODE_CALL_BODY(node) ((node)->body)
-#define NODE_ANCHOR_BODY(node) ((node)->body)
-
-
typedef struct {
NodeType node_type;
int status;
@@ -208,7 +81,7 @@ typedef struct {
UChar* s;
UChar* end;
unsigned int flag;
- int capa; /* (allocated size - 1) or 0: use buf[] */
+ int capacity; /* (allocated size - 1) or 0: use buf[] */
UChar buf[NODE_STRING_BUF_SIZE];
} StrNode;
@@ -240,7 +113,7 @@ typedef struct {
int status;
struct _Node* body;
- enum EnclosureType type;
+ enum BagType type;
union {
struct {
int regnum;
@@ -262,7 +135,7 @@ typedef struct {
OnigLen max_len; /* max length (byte) */
int char_len; /* character length */
int opt_count; /* referenced count in optimize_nodes() */
-} EnclosureNode;
+} BagNode;
#ifdef USE_CALL
@@ -280,7 +153,7 @@ typedef struct {
typedef struct {
NodeType node_type;
int status;
- struct _Node* body; /* to EnclosureNode : ENCLOSURE_MEMORY */
+ struct _Node* body; /* to BagNode : BAG_MEMORY */
int by_number;
int group_num;
@@ -350,7 +223,7 @@ typedef struct _Node {
StrNode str;
CClassNode cclass;
QuantNode quant;
- EnclosureNode enclosure;
+ BagNode bag;
BackRefNode backref;
AnchorNode anchor;
ConsAltNode cons;
@@ -362,9 +235,138 @@ typedef struct _Node {
} u;
} Node;
-
#define NULL_NODE ((Node* )0)
+
+/* node type bit */
+#define NODE_TYPE2BIT(type) (1<<(type))
+
+#define NODE_BIT_STRING NODE_TYPE2BIT(NODE_STRING)
+#define NODE_BIT_CCLASS NODE_TYPE2BIT(NODE_CCLASS)
+#define NODE_BIT_CTYPE NODE_TYPE2BIT(NODE_CTYPE)
+#define NODE_BIT_BACKREF NODE_TYPE2BIT(NODE_BACKREF)
+#define NODE_BIT_QUANT NODE_TYPE2BIT(NODE_QUANT)
+#define NODE_BIT_BAG NODE_TYPE2BIT(NODE_BAG)
+#define NODE_BIT_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR)
+#define NODE_BIT_LIST NODE_TYPE2BIT(NODE_LIST)
+#define NODE_BIT_ALT NODE_TYPE2BIT(NODE_ALT)
+#define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL)
+#define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK)
+
+#define NODE_IS_SIMPLE_TYPE(node) \
+ ((NODE_TYPE2BIT(NODE_TYPE(node)) & \
+ (NODE_BIT_STRING | NODE_BIT_CCLASS | NODE_BIT_CTYPE | NODE_BIT_BACKREF)) != 0)
+
+#define NODE_TYPE(node) ((node)->u.base.node_type)
+#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
+
+#define STR_(node) (&((node)->u.str))
+#define CCLASS_(node) (&((node)->u.cclass))
+#define CTYPE_(node) (&((node)->u.ctype))
+#define BACKREF_(node) (&((node)->u.backref))
+#define QUANT_(node) (&((node)->u.quant))
+#define BAG_(node) (&((node)->u.bag))
+#define ANCHOR_(node) (&((node)->u.anchor))
+#define CONS_(node) (&((node)->u.cons))
+#define CALL_(node) (&((node)->u.call))
+#define GIMMICK_(node) (&((node)->u.gimmick))
+
+#define NODE_CAR(node) (CONS_(node)->car)
+#define NODE_CDR(node) (CONS_(node)->cdr)
+
+#define CTYPE_ANYCHAR -1
+#define NODE_IS_ANYCHAR(node) \
+ (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)
+
+#define CTYPE_OPTION(node, reg) \
+ (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options)
+
+
+#define ANCR_ANYCHAR_INF_MASK (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML)
+#define ANCR_END_BUF_MASK (ANCR_END_BUF | ANCR_SEMI_END_BUF)
+
+#define NODE_STRING_RAW (1<<0) /* by backslashed number */
+#define NODE_STRING_AMBIG (1<<1)
+#define NODE_STRING_GOOD_AMBIG (1<<2)
+#define NODE_STRING_DONT_GET_OPT_INFO (1<<3)
+
+#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s)
+#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= NODE_STRING_RAW
+#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NODE_STRING_RAW
+#define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= NODE_STRING_AMBIG
+#define NODE_STRING_SET_GOOD_AMBIG(node) (node)->u.str.flag |= NODE_STRING_GOOD_AMBIG
+#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \
+ (node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO
+#define NODE_STRING_IS_RAW(node) \
+ (((node)->u.str.flag & NODE_STRING_RAW) != 0)
+#define NODE_STRING_IS_AMBIG(node) \
+ (((node)->u.str.flag & NODE_STRING_AMBIG) != 0)
+#define NODE_STRING_IS_GOOD_AMBIG(node) \
+ (((node)->u.str.flag & NODE_STRING_GOOD_AMBIG) != 0)
+#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \
+ (((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0)
+
+#define BACKREFS_P(br) \
+ (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
+
+/* node status bits */
+#define NODE_ST_MIN_FIXED (1<<0)
+#define NODE_ST_MAX_FIXED (1<<1)
+#define NODE_ST_CLEN_FIXED (1<<2)
+#define NODE_ST_MARK1 (1<<3)
+#define NODE_ST_MARK2 (1<<4)
+#define NODE_ST_STOP_BT_SIMPLE_REPEAT (1<<5)
+#define NODE_ST_RECURSION (1<<6)
+#define NODE_ST_CALLED (1<<7)
+#define NODE_ST_ADDR_FIXED (1<<8)
+#define NODE_ST_NAMED_GROUP (1<<9)
+#define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */
+#define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */
+#define NODE_ST_IN_MULTI_ENTRY (1<<12)
+#define NODE_ST_NEST_LEVEL (1<<13)
+#define NODE_ST_BY_NUMBER (1<<14) /* {n,m} */
+#define NODE_ST_BY_NAME (1<<15) /* backref by name */
+#define NODE_ST_BACKREF (1<<16)
+#define NODE_ST_CHECKER (1<<17)
+#define NODE_ST_FIXED_OPTION (1<<18)
+#define NODE_ST_PROHIBIT_RECURSION (1<<19)
+#define NODE_ST_SUPER (1<<20)
+
+
+#define NODE_STATUS(node) (((Node* )node)->u.base.status)
+#define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (NODE_ST_ ## f))
+#define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(NODE_ST_ ## f))
+
+#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NODE_ST_BY_NUMBER) != 0)
+#define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_REAL_REPEAT) != 0)
+#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NODE_ST_CALLED) != 0)
+#define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NODE_ST_IN_MULTI_ENTRY) != 0)
+#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NODE_ST_RECURSION) != 0)
+#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0)
+#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP) != 0)
+#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED) != 0)
+#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED) != 0)
+#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MIN_FIXED) != 0)
+#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MAX_FIXED) != 0)
+#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NODE_ST_MARK1) != 0)
+#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NODE_ST_MARK2) != 0)
+#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL) != 0)
+#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NODE_ST_BY_NAME) != 0)
+#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NODE_ST_BACKREF) != 0)
+#define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NODE_ST_CHECKER) != 0)
+#define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0)
+#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0)
+#define NODE_IS_PROHIBIT_RECURSION(node) \
+ ((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)
+#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \
+ ((NODE_STATUS(node) & NODE_ST_STOP_BT_SIMPLE_REPEAT) != 0)
+
+#define NODE_BODY(node) ((node)->u.base.body)
+#define NODE_QUANT_BODY(node) ((node)->body)
+#define NODE_BAG_BODY(node) ((node)->body)
+#define NODE_CALL_BODY(node) ((node)->body)
+#define NODE_ANCHOR_BODY(node) ((node)->body)
+
#define SCANENV_MEMENV_SIZE 8
#define SCANENV_MEMENV(senv) \
(IS_NOT_NULL((senv)->mem_env_dynamic) ? \
@@ -434,7 +436,7 @@ extern void onig_node_conv_to_str_node P_((Node* node, int raw));
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
extern void onig_node_free P_((Node* node));
-extern Node* onig_node_new_enclosure P_((int type));
+extern Node* onig_node_new_bag P_((enum BagType type));
extern Node* onig_node_new_anchor P_((int type, int ascii_mode));
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
extern Node* onig_node_new_list P_((Node* left, Node* right));
diff --git a/src/regposerr.c b/src/regposerr.c
index 2e2a8e2..c640a81 100644
--- a/src/regposerr.c
+++ b/src/regposerr.c
@@ -37,11 +37,7 @@
#include "config.h"
#include "onigposix.h"
-#ifdef HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-#endif
+#include <string.h>
#if defined(__GNUC__)
# define ARG_UNUSED __attribute__ ((unused))
diff --git a/src/regsyntax.c b/src/regsyntax.c
index aa95479..98b815c 100644
--- a/src/regsyntax.c
+++ b/src/regsyntax.c
@@ -67,8 +67,8 @@ OnigSyntaxType OnigSyntaxPosixExtended = {
ONIG_SYN_OP_BRACE_INTERVAL |
ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
, 0
- , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
- ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
+ , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
+ ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
diff --git a/src/sjis.c b/src/sjis.c
index 4ffd44b..aaf4cb8 100644
--- a/src/sjis.c
+++ b/src/sjis.c
@@ -113,10 +113,7 @@ static int
code_to_mbclen(OnigCodePoint code)
{
if (code < 256) {
- if (EncLen_SJIS[(int )code] == 1)
- return 1;
- else
- return 0;
+ return EncLen_SJIS[(int )code] == 1;
}
else if (code <= 0xffff) {
return 2;
@@ -188,7 +185,7 @@ is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end);
-
+
}
#endif
@@ -223,7 +220,7 @@ left_adjust_char_head(const UChar* start, const UChar* s)
p++;
break;
}
- }
+ }
}
len = enclen(ONIG_ENCODING_SJIS, p);
if (p + len > s) return (UChar* )p;
@@ -338,6 +335,6 @@ OnigEncodingType OnigEncodingSJIS = {
NULL, /* init */
NULL, /* is_initialized */
is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1_OR_0,
0, 0
};
diff --git a/src/unicode.c b/src/unicode.c
index 63bc65c..04944b9 100644
--- a/src/unicode.c
+++ b/src/unicode.c
@@ -657,8 +657,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
if (! ONIGENC_IS_UNICODE_ENCODING(enc)) {
- if (from == 0x000d && to == 0x000a) return 0;
- else return 1;
+ return from != 0x000d || to != 0x000a;
}
btype = unicode_egcb_is_break_2code(from, to);
@@ -701,8 +700,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
return 1;
#else
- if (from == 0x000d && to == 0x000a) return 0;
- else return 1;
+ return from != 0x000d || to != 0x000a;
#endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */
}
@@ -729,6 +727,7 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
int len;
int c;
char* s;
+ UChar* uname;
if (UserDefinedPropertyNum >= USER_DEFINED_PROPERTY_MAX_NUM)
return ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS;
@@ -741,10 +740,11 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
if (s == 0)
return ONIGERR_MEMORY;
+ uname = (UChar* )name;
n = 0;
for (i = 0; i < len; i++) {
- c = name[i];
- if (c <= 0 || c >= 0x80) {
+ c = uname[i];
+ if (c < 0x20 || c >= 0x80) {
xfree(s);
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
}
@@ -758,6 +758,10 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
if (UserDefinedPropertyTable == 0) {
UserDefinedPropertyTable = onig_st_init_strend_table_with_size(10);
+ if (IS_NULL(UserDefinedPropertyTable)) {
+ xfree(s);
+ return ONIGERR_MEMORY;
+ }
}
e = UserDefinedPropertyRanges + UserDefinedPropertyNum;
diff --git a/src/unicode_fold1_key.c b/src/unicode_fold1_key.c
index 0f4712a..07916b4 100644
--- a/src/unicode_fold1_key.c
+++ b/src/unicode_fold1_key.c
@@ -2988,5 +2988,3 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[])
}
return -1;
}
-
-
diff --git a/src/unicode_fold2_key.c b/src/unicode_fold2_key.c
index 1469a46..3bc4bd6 100644
--- a/src/unicode_fold2_key.c
+++ b/src/unicode_fold2_key.c
@@ -225,5 +225,3 @@ onigenc_unicode_fold2_key(OnigCodePoint codes[])
}
return -1;
}
-
-
diff --git a/src/unicode_fold3_key.c b/src/unicode_fold3_key.c
index 70e70c8..b935db6 100644
--- a/src/unicode_fold3_key.c
+++ b/src/unicode_fold3_key.c
@@ -135,5 +135,3 @@ onigenc_unicode_fold3_key(OnigCodePoint codes[])
}
return -1;
}
-
-
diff --git a/src/unicode_fold_data.c b/src/unicode_fold_data.c
index 7f7e24e..0b2c519 100644
--- a/src/unicode_fold_data.c
+++ b/src/unicode_fold_data.c
@@ -1513,4 +1513,3 @@ OnigCodePoint OnigUnicodeFolds3[] = {
/* ----- LOCALE ----- */
#define FOLDS3_END_INDEX 72
};
-
diff --git a/src/unicode_unfold_key.c b/src/unicode_unfold_key.c
index b4c0318..bf71df0 100644
--- a/src/unicode_unfold_key.c
+++ b/src/unicode_unfold_key.c
@@ -3283,5 +3283,3 @@ onigenc_unicode_unfold_key(OnigCodePoint code)
}
return 0;
}
-
-
diff --git a/src/utf16_be.c b/src/utf16_be.c
index 8f5b8bf..a812a32 100644
--- a/src/utf16_be.c
+++ b/src/utf16_be.c
@@ -278,6 +278,6 @@ OnigEncodingType OnigEncodingUTF16_BE = {
init,
0, /* is_initialized */
is_valid_mbc_string,
- ENC_FLAG_UNICODE,
+ ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_2,
0, 0
};
diff --git a/src/utf16_le.c b/src/utf16_le.c
index 92bf318..da9571f 100644
--- a/src/utf16_le.c
+++ b/src/utf16_le.c
@@ -286,6 +286,6 @@ OnigEncodingType OnigEncodingUTF16_LE = {
init,
0, /* is_initialized */
is_valid_mbc_string,
- ENC_FLAG_UNICODE,
+ ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/utf32_be.c b/src/utf32_be.c
index 92476ec..9339b15 100644
--- a/src/utf32_be.c
+++ b/src/utf32_be.c
@@ -191,6 +191,6 @@ OnigEncodingType OnigEncodingUTF32_BE = {
NULL, /* init */
NULL, /* is_initialized */
is_valid_mbc_string,
- ENC_FLAG_UNICODE,
+ ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_4,
0, 0
};
diff --git a/src/utf32_le.c b/src/utf32_le.c
index dc3fd92..22e007c 100644
--- a/src/utf32_le.c
+++ b/src/utf32_le.c
@@ -191,6 +191,6 @@ OnigEncodingType OnigEncodingUTF32_LE = {
NULL, /* init */
NULL, /* is_initialized */
is_valid_mbc_string,
- ENC_FLAG_UNICODE,
+ ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};
diff --git a/src/utf8.c b/src/utf8.c
index 22a8db1..4d1f9ec 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -57,7 +57,7 @@ static const int EncLen_UTF8[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
+ 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
static int
@@ -280,7 +280,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingUTF8 = {
mbc_enc_len,
"UTF-8", /* name */
- 6, /* max enc length */
+ 4, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
mbc_to_code,
@@ -297,6 +297,6 @@ OnigEncodingType OnigEncodingUTF8 = {
NULL, /* init */
NULL, /* is_initialized */
is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_UNICODE,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1_OR_0,
0, 0
};
diff --git a/test/.gitignore b/test/.gitignore
deleted file mode 100644
index 04918c1..0000000
--- a/test/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-/test_utf8
-/testc
-/testcu
-/testp
diff --git a/test/test_utf8.c b/test/test_utf8.c
index d5a966b..02aa06b 100644
--- a/test/test_utf8.c
+++ b/test/test_utf8.c
@@ -9,11 +9,7 @@
#include "oniguruma.h"
-#ifdef HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-#endif
+#include <string.h>
#define SLEN(s) strlen(s)
@@ -276,7 +272,33 @@ extern int main(int argc, char* argv[])
x2("(?i:a)", "a", 0, 1);
x2("(?i:a)", "A", 0, 1);
x2("(?i:A)", "a", 0, 1);
+ x2("(?i:i)", "I", 0, 1);
+ x2("(?i:I)", "i", 0, 1);
+ x2("(?i:[A-Z])", "i", 0, 1);
+ x2("(?i:[a-z])", "I", 0, 1);
n("(?i:A)", "b");
+ x2("(?i:ss)", "ss", 0, 2);
+ x2("(?i:ss)", "Ss", 0, 2);
+ x2("(?i:ss)", "SS", 0, 2);
+ /* 0xc5,0xbf == 017F: # LATIN SMALL LETTER LONG S */
+ x2("(?i:ss)", "\xc5\xbfS", 0, 3);
+ x2("(?i:ss)", "s\xc5\xbf", 0, 3);
+ /* 0xc3,0x9f == 00DF: # LATIN SMALL LETTER SHARP S */
+ x2("(?i:ss)", "\xc3\x9f", 0, 2);
+ /* 0xe1,0xba,0x9e == 1E9E # LATIN CAPITAL LETTER SHARP S */
+ x2("(?i:ss)", "\xe1\xba\x9e", 0, 3);
+ x2("(?i:xssy)", "xssy", 0, 4);
+ x2("(?i:xssy)", "xSsy", 0, 4);
+ x2("(?i:xssy)", "xSSy", 0, 4);
+ x2("(?i:xssy)", "x\xc5\xbfSy", 0, 5);
+ x2("(?i:xssy)", "xs\xc5\xbfy", 0, 5);
+ x2("(?i:xssy)", "x\xc3\x9fy", 0, 4);
+ x2("(?i:xssy)", "x\xe1\xba\x9ey", 0, 5);
+ x2("(?i:\xc3\x9f)", "ss", 0, 2);
+ x2("(?i:\xc3\x9f)", "SS", 0, 2);
+ x2("(?i:[\xc3\x9f])", "ss", 0, 2);
+ x2("(?i:[\xc3\x9f])", "SS", 0, 2);
+ x2("(?i)(?<!ss)z", "qqz", 2, 3);
x2("(?i:[A-Z])", "a", 0, 1);
x2("(?i:[f-m])", "H", 0, 1);
x2("(?i:[f-m])", "h", 0, 1);
@@ -603,6 +625,7 @@ extern int main(int argc, char* argv[])
x2("(?:()|())*\\2", "abc", 0, 0);
x2("(?:()|()|())*\\3\\1", "abc", 0, 0);
x2("(|(?:a(?:\\g'1')*))b|", "abc", 0, 2);
+ x2("^(\"|)(.*)\\1$", "XX", 0, 2);
x2("(?~)", "", 0, 0);
x2("(?~)", "A", 0, 0);
diff --git a/test/testc.c b/test/testc.c
index e37665a..4aa8807 100644
--- a/test/testc.c
+++ b/test/testc.c
@@ -10,11 +10,7 @@
#include "oniguruma.h"
#endif
-#ifdef HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-#endif
+#include <string.h>
#define SLEN(s) strlen(s)
diff --git a/windows/testc.c b/windows/testc.c
index e17842f..2e13350 100644
--- a/windows/testc.c
+++ b/windows/testc.c
@@ -1,869 +1,865 @@
-/*
- * This program was generated by testconv.rb.
- */
-#ifdef ONIG_ESCAPE_UCHAR_COLLISION
-#undef ONIG_ESCAPE_UCHAR_COLLISION
-#endif
-#include <stdio.h>
-
-#ifdef POSIX_TEST
-#include "onigposix.h"
-#else
-#include "oniguruma.h"
-#endif
-
-#ifdef _WIN32
-# include <string.h>
-#else
-# include <strings.h>
-#endif
-
-#define SLEN(s) strlen(s)
-
-static int nsucc = 0;
-static int nfail = 0;
-static int nerror = 0;
-
-static FILE* err_file;
-
-#ifndef POSIX_TEST
-static OnigRegion* region;
-#endif
-
-static void xx(char* pattern, char* str, int from, int to, int mem, int not)
-{
- int r;
-
-#ifdef POSIX_TEST
- regex_t reg;
- char buf[200];
- regmatch_t pmatch[25];
-
- r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
- if (r) {
- regerror(r, &reg, buf, sizeof(buf));
- fprintf(err_file, "ERROR: %s\n", buf);
- nerror++;
- return ;
- }
-
- r = regexec(&reg, str, reg.re_nsub + 1, pmatch, 0);
- if (r != 0 && r != REG_NOMATCH) {
- regerror(r, &reg, buf, sizeof(buf));
- fprintf(err_file, "ERROR: %s\n", buf);
- nerror++;
- return ;
- }
-
- if (r == REG_NOMATCH) {
- if (not) {
- fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
- nsucc++;
- }
- else {
- fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
- nfail++;
- }
- }
- else {
- if (not) {
- fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
- nfail++;
- }
- else {
- if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) {
- fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
- nsucc++;
- }
- else {
- fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
- from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo);
- nfail++;
- }
- }
- }
- regfree(&reg);
-
-#else
- regex_t* reg;
- OnigErrorInfo einfo;
-
- r = onig_new(&reg, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)),
- ONIG_OPTION_DEFAULT, ONIG_ENCODING_SJIS, ONIG_SYNTAX_DEFAULT, &einfo);
- if (r) {
- char s[ONIG_MAX_ERROR_MESSAGE_LEN];
- onig_error_code_to_str((UChar* )s, r, &einfo);
- fprintf(err_file, "ERROR: %s\n", s);
- nerror++;
- return ;
- }
-
- r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)),
- (UChar* )str, (UChar* )(str + SLEN(str)),
- region, ONIG_OPTION_NONE);
- if (r < ONIG_MISMATCH) {
- char s[ONIG_MAX_ERROR_MESSAGE_LEN];
- onig_error_code_to_str((UChar* )s, r);
- fprintf(err_file, "ERROR: %s\n", s);
- nerror++;
- return ;
- }
-
- if (r == ONIG_MISMATCH) {
- if (not) {
- fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
- nsucc++;
- }
- else {
- fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
- nfail++;
- }
- }
- else {
- if (not) {
- fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
- nfail++;
- }
- else {
- if (region->beg[mem] == from && region->end[mem] == to) {
- fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
- nsucc++;
- }
- else {
- fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
- from, to, region->beg[mem], region->end[mem]);
- nfail++;
- }
- }
- }
- onig_free(reg);
-#endif
-}
-
-static void x2(char* pattern, char* str, int from, int to)
-{
- xx(pattern, str, from, to, 0, 0);
-}
-
-static void x3(char* pattern, char* str, int from, int to, int mem)
-{
- xx(pattern, str, from, to, mem, 0);
-}
-
-static void n(char* pattern, char* str)
-{
- xx(pattern, str, 0, 0, 0, 1);
-}
-
-extern int main(int argc, char* argv[])
-{
-#ifndef POSIX_TEST
- static OnigEncoding use_encs[1];
-
- use_encs[0] = ONIG_ENCODING_SJIS;
- onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0]));
-#endif
-
- err_file = stdout;
-
-#ifdef POSIX_TEST
- reg_set_encoding(REG_POSIX_ENCODING_SJIS);
-#else
- region = onig_region_new();
-#endif
-
- x2("", "", 0, 0);
- x2("^", "", 0, 0);
- x2("$", "", 0, 0);
- x2("\\G", "", 0, 0);
- x2("\\A", "", 0, 0);
- x2("\\Z", "", 0, 0);
- x2("\\z", "", 0, 0);
- x2("^$", "", 0, 0);
- x2("\\ca", "\001", 0, 1);
- x2("\\C-b", "\002", 0, 1);
- x2("\\c\\\\", "\034", 0, 1);
- x2("q[\\c\\\\]", "q\034", 0, 2);
- x2("", "a", 0, 0);
- x2("a", "a", 0, 1);
- x2("\\x61", "a", 0, 1);
- x2("aa", "aa", 0, 2);
- x2("aaa", "aaa", 0, 3);
- x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35);
- x2("ab", "ab", 0, 2);
- x2("b", "ab", 1, 2);
- x2("bc", "abc", 1, 3);
- x2("(?i:#RET#)", "#INS##RET#", 5, 10);
- x2("\\17", "\017", 0, 1);
- x2("\\x1f", "\x1f", 0, 1);
- x2("a(?#....\\\\JJJJ)b", "ab", 0, 2);
- x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7);
- x2(".", "a", 0, 1);
- n(".", "");
- x2("..", "ab", 0, 2);
- x2("\\w", "e", 0, 1);
- n("\\W", "e");
- x2("\\s", " ", 0, 1);
- x2("\\S", "b", 0, 1);
- x2("\\d", "4", 0, 1);
- n("\\D", "4");
- x2("\\b", "z ", 0, 0);
- x2("\\b", " z", 1, 1);
- x2("\\B", "zz ", 1, 1);
- x2("\\B", "z ", 2, 2);
- x2("\\B", " z", 0, 0);
- x2("[ab]", "b", 0, 1);
- n("[ab]", "c");
- x2("[a-z]", "t", 0, 1);
- n("[^a]", "a");
- x2("[^a]", "\n", 0, 1);
- x2("[]]", "]", 0, 1);
- n("[^]]", "]");
- x2("[\\^]+", "0^^1", 1, 3);
- x2("[b-]", "b", 0, 1);
- x2("[b-]", "-", 0, 1);
- x2("[\\w]", "z", 0, 1);
- n("[\\w]", " ");
- x2("[\\W]", "b$", 1, 2);
- x2("[\\d]", "5", 0, 1);
- n("[\\d]", "e");
- x2("[\\D]", "t", 0, 1);
- n("[\\D]", "3");
- x2("[\\s]", " ", 0, 1);
- n("[\\s]", "a");
- x2("[\\S]", "b", 0, 1);
- n("[\\S]", " ");
- x2("[\\w\\d]", "2", 0, 1);
- n("[\\w\\d]", " ");
- x2("[[:upper:]]", "B", 0, 1);
- x2("[*[:xdigit:]+]", "+", 0, 1);
- x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7);
- x2("[*[:xdigit:]+]", "-@^+", 3, 4);
- n("[[:upper]]", "A");
- x2("[[:upper]]", ":", 0, 1);
- x2("[\\044-\\047]", "\046", 0, 1);
- x2("[\\x5a-\\x5c]", "\x5b", 0, 1);
- x2("[\\x6A-\\x6D]", "\x6c", 0, 1);
- n("[\\x6A-\\x6D]", "\x6E");
- n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply");
- x2("[\\[]", "[", 0, 1);
- x2("[\\]]", "]", 0, 1);
- x2("[&]", "&", 0, 1);
- x2("[[ab]]", "b", 0, 1);
- x2("[[ab]c]", "c", 0, 1);
- n("[[^a]]", "a");
- n("[^[a]]", "a");
- x2("[[ab]&&bc]", "b", 0, 1);
- n("[[ab]&&bc]", "a");
- n("[[ab]&&bc]", "c");
- x2("[a-z&&b-y&&c-x]", "w", 0, 1);
- n("[^a-z&&b-y&&c-x]", "w");
- x2("[[^a&&a]&&a-z]", "b", 0, 1);
- n("[[^a&&a]&&a-z]", "a");
- x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1);
- n("[[^a-z&&bcdef]&&[^c-g]]", "c");
- x2("[^[^abc]&&[^cde]]", "c", 0, 1);
- x2("[^[^abc]&&[^cde]]", "e", 0, 1);
- n("[^[^abc]&&[^cde]]", "f");
- x2("[a-&&-a]", "-", 0, 1);
- n("[a\\-&&\\-a]", "&");
- n("\\wabc", " abc");
- x2("a\\Wbc", "a bc", 0, 4);
- x2("a.b.c", "aabbc", 0, 5);
- x2(".\\wb\\W..c", "abb bcc", 0, 7);
- x2("\\s\\wzzz", " zzzz", 0, 5);
- x2("aa.b", "aabb", 0, 4);
- n(".a", "ab");
- x2(".a", "aa", 0, 2);
- x2("^a", "a", 0, 1);
- x2("^a$", "a", 0, 1);
- x2("^\\w$", "a", 0, 1);
- n("^\\w$", " ");
- x2("^\\wab$", "zab", 0, 3);
- x2("^\\wabcdef$", "zabcdef", 0, 7);
- x2("^\\w...def$", "zabcdef", 0, 7);
- x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8);
- x2("\\A\\Z", "", 0, 0);
- x2("\\Axyz", "xyz", 0, 3);
- x2("xyz\\Z", "xyz", 0, 3);
- x2("xyz\\z", "xyz", 0, 3);
- x2("a\\Z", "a", 0, 1);
- x2("\\Gaz", "az", 0, 2);
- n("\\Gz", "bza");
- n("az\\G", "az");
- n("az\\A", "az");
- n("a\\Az", "az");
- x2("\\^\\$", "^$", 0, 2);
- x2("^x?y", "xy", 0, 2);
- x2("^(x?y)", "xy", 0, 2);
- x2("\\w", "_", 0, 1);
- n("\\W", "_");
- x2("(?=z)z", "z", 0, 1);
- n("(?=z).", "a");
- x2("(?!z)a", "a", 0, 1);
- n("(?!z)a", "z");
- x2("(?i:a)", "a", 0, 1);
- x2("(?i:a)", "A", 0, 1);
- x2("(?i:A)", "a", 0, 1);
- n("(?i:A)", "b");
- x2("(?i:[A-Z])", "a", 0, 1);
- x2("(?i:[f-m])", "H", 0, 1);
- x2("(?i:[f-m])", "h", 0, 1);
- n("(?i:[f-m])", "e");
- x2("(?i:[A-c])", "D", 0, 1);
- n("(?i:[^a-z])", "A");
- n("(?i:[^a-z])", "a");
- x2("(?i:[!-k])", "Z", 0, 1);
- x2("(?i:[!-k])", "7", 0, 1);
- x2("(?i:[T-}])", "b", 0, 1);
- x2("(?i:[T-}])", "{", 0, 1);
- x2("(?i:\\?a)", "?A", 0, 2);
- x2("(?i:\\*A)", "*a", 0, 2);
- n(".", "\n");
- x2("(?m:.)", "\n", 0, 1);
- x2("(?m:a.)", "a\n", 0, 2);
- x2("(?m:.b)", "a\nb", 1, 3);
- x2(".*abc", "dddabdd\nddabc", 8, 13);
- x2("(?m:.*abc)", "dddabddabc", 0, 10);
- n("(?i)(?-i)a", "A");
- n("(?i)(?-i:a)", "A");
- x2("a?", "", 0, 0);
- x2("a?", "b", 0, 0);
- x2("a?", "a", 0, 1);
- x2("a*", "", 0, 0);
- x2("a*", "a", 0, 1);
- x2("a*", "aaa", 0, 3);
- x2("a*", "baaaa", 0, 0);
- n("a+", "");
- x2("a+", "a", 0, 1);
- x2("a+", "aaaa", 0, 4);
- x2("a+", "aabbb", 0, 2);
- x2("a+", "baaaa", 1, 5);
- x2(".?", "", 0, 0);
- x2(".?", "f", 0, 1);
- x2(".?", "\n", 0, 0);
- x2(".*", "", 0, 0);
- x2(".*", "abcde", 0, 5);
- x2(".+", "z", 0, 1);
- x2(".+", "zdswer\n", 0, 6);
- x2("(.*)a\\1f", "babfbac", 0, 4);
- x2("(.*)a\\1f", "bacbabf", 3, 7);
- x2("((.*)a\\2f)", "bacbabf", 3, 7);
- x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23);
- x2("a|b", "a", 0, 1);
- x2("a|b", "b", 0, 1);
- x2("|a", "a", 0, 0);
- x2("(|a)", "a", 0, 0);
- x2("ab|bc", "ab", 0, 2);
- x2("ab|bc", "bc", 0, 2);
- x2("z(?:ab|bc)", "zbc", 0, 3);
- x2("a(?:ab|bc)c", "aabc", 0, 4);
- x2("ab|(?:ac|az)", "az", 0, 2);
- x2("a|b|c", "dc", 1, 2);
- x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2);
- n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn");
- x2("a|^z", "ba", 1, 2);
- x2("a|^z", "za", 0, 1);
- x2("a|\\Gz", "bza", 2, 3);
- x2("a|\\Gz", "za", 0, 1);
- x2("a|\\Az", "bza", 2, 3);
- x2("a|\\Az", "za", 0, 1);
- x2("a|b\\Z", "ba", 1, 2);
- x2("a|b\\Z", "b", 0, 1);
- x2("a|b\\z", "ba", 1, 2);
- x2("a|b\\z", "b", 0, 1);
- x2("\\w|\\s", " ", 0, 1);
- n("\\w|\\w", " ");
- x2("\\w|%", "%", 0, 1);
- x2("\\w|[&$]", "&", 0, 1);
- x2("[b-d]|[^e-z]", "a", 0, 1);
- x2("(?:a|[c-f])|bz", "dz", 0, 1);
- x2("(?:a|[c-f])|bz", "bz", 0, 2);
- x2("abc|(?=zz)..f", "zzf", 0, 3);
- x2("abc|(?!zz)..f", "abf", 0, 3);
- x2("(?=za)..a|(?=zz)..a", "zza", 0, 3);
- n("(?>a|abd)c", "abdc");
- x2("(?>abd|a)c", "abdc", 0, 4);
- x2("a?|b", "a", 0, 1);
- x2("a?|b", "b", 0, 0);
- x2("a?|b", "", 0, 0);
- x2("a*|b", "aa", 0, 2);
- x2("a*|b*", "ba", 0, 0);
- x2("a*|b*", "ab", 0, 1);
- x2("a+|b*", "", 0, 0);
- x2("a+|b*", "bbb", 0, 3);
- x2("a+|b*", "abbb", 0, 1);
- n("a+|b+", "");
- x2("(a|b)?", "b", 0, 1);
- x2("(a|b)*", "ba", 0, 2);
- x2("(a|b)+", "bab", 0, 3);
- x2("(ab|ca)+", "caabbc", 0, 4);
- x2("(ab|ca)+", "aabca", 1, 5);
- x2("(ab|ca)+", "abzca", 0, 2);
- x2("(a|bab)+", "ababa", 0, 5);
- x2("(a|bab)+", "ba", 1, 2);
- x2("(a|bab)+", "baaaba", 1, 4);
- x2("(?:a|b)(?:a|b)", "ab", 0, 2);
- x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3);
- x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6);
- x2("(?:a+|b+){2}", "aaabbb", 0, 6);
- x2("h{0,}", "hhhh", 0, 4);
- x2("(?:a+|b+){1,2}", "aaabbb", 0, 6);
- n("ax{2}*a", "0axxxa1");
- n("a.{0,2}a", "0aXXXa0");
- n("a.{0,2}?a", "0aXXXa0");
- n("a.{0,2}?a", "0aXXXXa0");
- x2("^a{2,}?a$", "aaa", 0, 3);
- x2("^[a-z]{2,}?$", "aaa", 0, 3);
- x2("(?:a+|\\Ab*)cc", "cc", 0, 2);
- n("(?:a+|\\Ab*)cc", "abcc");
- x2("(?:^a+|b+)*c", "aabbbabc", 6, 8);
- x2("(?:^a+|b+)*c", "aabbbbc", 0, 7);
- x2("a|(?i)c", "C", 0, 1);
- x2("(?i)c|a", "C", 0, 1);
- x2("(?i)c|a", "A", 0, 1);
- x2("(?i:c)|a", "C", 0, 1);
- n("(?i:c)|a", "A");
- x2("[abc]?", "abc", 0, 1);
- x2("[abc]*", "abc", 0, 3);
- x2("[^abc]*", "abc", 0, 0);
- n("[^abc]+", "abc");
- x2("a?\?", "aaa", 0, 0);
- x2("ba?\?b", "bab", 0, 3);
- x2("a*?", "aaa", 0, 0);
- x2("ba*?", "baa", 0, 1);
- x2("ba*?b", "baab", 0, 4);
- x2("a+?", "aaa", 0, 1);
- x2("ba+?", "baa", 0, 2);
- x2("ba+?b", "baab", 0, 4);
- x2("(?:a?)?\?", "a", 0, 0);
- x2("(?:a?\?)?", "a", 0, 0);
- x2("(?:a?)+?", "aaa", 0, 1);
- x2("(?:a+)?\?", "aaa", 0, 0);
- x2("(?:a+)?\?b", "aaab", 0, 4);
- x2("(?:ab)?{2}", "", 0, 0);
- x2("(?:ab)?{2}", "ababa", 0, 4);
- x2("(?:ab)*{0}", "ababa", 0, 0);
- x2("(?:ab){3,}", "abababab", 0, 8);
- n("(?:ab){3,}", "abab");
- x2("(?:ab){2,4}", "ababab", 0, 6);
- x2("(?:ab){2,4}", "ababababab", 0, 8);
- x2("(?:ab){2,4}?", "ababababab", 0, 4);
- x2("(?:ab){,}", "ab{,}", 0, 5);
- x2("(?:abc)+?{2}", "abcabcabc", 0, 6);
- x2("(?:X*)(?i:xa)", "XXXa", 0, 4);
- x2("(d+)([^abc]z)", "dddz", 0, 4);
- x2("([^abc]*)([^abc]z)", "dddz", 0, 4);
- x2("(\\w+)(\\wz)", "dddz", 0, 4);
- x3("(a)", "a", 0, 1, 1);
- x3("(ab)", "ab", 0, 2, 1);
- x2("((ab))", "ab", 0, 2);
- x3("((ab))", "ab", 0, 2, 1);
- x3("((ab))", "ab", 0, 2, 2);
- x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20);
- x3("(ab)(cd)", "abcd", 0, 2, 1);
- x3("(ab)(cd)", "abcd", 2, 4, 2);
- x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3);
- x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4);
- x2("(^a)", "a", 0, 1);
- x3("(a)|(a)", "ba", 1, 2, 1);
- x3("(^a)|(a)", "ba", 1, 2, 2);
- x3("(a?)", "aaa", 0, 1, 1);
- x3("(a*)", "aaa", 0, 3, 1);
- x3("(a*)", "", 0, 0, 1);
- x3("(a+)", "aaaaaaa", 0, 7, 1);
- x3("(a+|b*)", "bbbaa", 0, 3, 1);
- x3("(a+|b?)", "bbbaa", 0, 1, 1);
- x3("(abc)?", "abc", 0, 3, 1);
- x3("(abc)*", "abc", 0, 3, 1);
- x3("(abc)+", "abc", 0, 3, 1);
- x3("(xyz|abc)+", "abc", 0, 3, 1);
- x3("([xyz][abc]|abc)+", "abc", 0, 3, 1);
- x3("((?i:abc))", "AbC", 0, 3, 1);
- x2("(abc)(?i:\\1)", "abcABC", 0, 6);
- x3("((?m:a.c))", "a\nc", 0, 3, 1);
- x3("((?=az)a)", "azb", 0, 1, 1);
- x3("abc|(.abd)", "zabd", 0, 4, 1);
- x2("(?:abc)|(ABC)", "abc", 0, 3);
- x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1);
- x3("a*(.)", "aaaaz", 4, 5, 1);
- x3("a*?(.)", "aaaaz", 0, 1, 1);
- x3("a*?(c)", "aaaac", 4, 5, 1);
- x3("[bcd]a*(.)", "caaaaz", 5, 6, 1);
- x3("(\\Abb)cc", "bbcc", 0, 2, 1);
- n("(\\Abb)cc", "zbbcc");
- x3("(^bb)cc", "bbcc", 0, 2, 1);
- n("(^bb)cc", "zbbcc");
- x3("cc(bb$)", "ccbb", 2, 4, 1);
- n("cc(bb$)", "ccbbb");
- n("(\\1)", "");
- n("\\1(a)", "aa");
- n("(a(b)\\1)\\2+", "ababb");
- n("(?:(?:\\1|z)(a))+$", "zaa");
- x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4);
- x2("(a)(?=\\1)", "aa", 0, 1);
- n("(a)$|\\1", "az");
- x2("(a)\\1", "aa", 0, 2);
- n("(a)\\1", "ab");
- x2("(a?)\\1", "aa", 0, 2);
- x2("(a?\?)\\1", "aa", 0, 0);
- x2("(a*)\\1", "aaaaa", 0, 4);
- x3("(a*)\\1", "aaaaa", 0, 2, 1);
- x2("a(b*)\\1", "abbbb", 0, 5);
- x2("a(b*)\\1", "ab", 0, 1);
- x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10);
- x2("(a*)(b*)\\2", "aaabbbb", 0, 7);
- x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8);
- x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7);
- x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6);
- x2("([a-d])\\1", "cc", 0, 2);
- x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6);
- n("(\\w\\d\\s)\\1", "f5 f5");
- x2("(who|[a-c]{3})\\1", "whowho", 0, 6);
- x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9);
- x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6);
- x2("(^a)\\1", "aa", 0, 2);
- n("(^a)\\1", "baa");
- n("(a$)\\1", "aa");
- n("(ab\\Z)\\1", "ab");
- x2("(a*\\Z)\\1", "a", 1, 1);
- x2(".(a*\\Z)\\1", "ba", 1, 2);
- x3("(.(abc)\\2)", "zabcabc", 0, 7, 1);
- x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1);
- x2("((?i:az))\\1", "AzAz", 0, 4);
- n("((?i:az))\\1", "Azaz");
- x2("(?<=a)b", "ab", 1, 2);
- n("(?<=a)b", "bb");
- x2("(?<=a|b)b", "bb", 1, 2);
- x2("(?<=a|bc)b", "bcb", 2, 3);
- x2("(?<=a|bc)b", "ab", 1, 2);
- x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2);
- x2("(a)\\g<1>", "aa", 0, 2);
- x2("(?<!a)b", "cb", 1, 2);
- n("(?<!a)b", "ab");
- x2("(?<!a|bc)b", "bbb", 0, 1);
- n("(?<!a|bc)z", "bcz");
- x2("(?<name1>a)", "a", 0, 1);
- x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4);
- x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8);
- x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3);
- x2("(?<n>|a\\g<n>)+", "", 0, 0);
- x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6);
- x3("\\g<n>(?<n>.){0}", "X", 0, 1, 1);
- x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3);
- x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4);
- x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8);
- x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18);
- x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1);
- x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3);
- x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2);
- x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0);
- x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9);
- n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg");
- x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10);
- x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14);
- x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16);
- x2("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1);
- x2("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13);
- x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1);
- x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9);
- x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3);
- x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1);
- x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7);
- x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4);
- x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5);
- x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10);
- x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5);
- x2("()*\\1", "", 0, 0);
- x2("(?:()|())*\\1\\2", "", 0, 0);
- x3("(?:\\1a|())*", "a", 0, 0, 1);
- x2("x((.)*)*x", "0x1x2x3", 1, 6);
- x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
- x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0);
- x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1);
- x2("\\xED\\xF2", "\xed\xf2", 0, 2);
- x2("", "‚ ", 0, 0);
- x2("‚ ", "‚ ", 0, 2);
- n("‚¢", "‚ ");
- x2("‚¤‚¤", "‚¤‚¤", 0, 4);
- x2("‚ ‚¢‚¤", "‚ ‚¢‚¤", 0, 6);
- x2("‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±", "‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±", 0, 70);
- x2("‚ ", "‚¢‚ ", 2, 4);
- x2("‚¢‚¤", "‚ ‚¢‚¤", 2, 6);
- x2("\\xca\\xb8", "\xca\xb8", 0, 2);
- x2(".", "‚ ", 0, 2);
- x2("..", "‚©‚«", 0, 4);
- x2("\\w", "‚¨", 0, 2);
- n("\\W", "‚ ");
- x2("[\\W]", "‚¤$", 2, 3);
- x2("\\S", "‚»", 0, 2);
- x2("\\S", "Š¿", 0, 2);
- x2("\\b", "‹C ", 0, 0);
- x2("\\b", " ‚Ù", 1, 1);
- x2("\\B", "‚¹‚» ", 2, 2);
- x2("\\B", "‚¤ ", 3, 3);
- x2("\\B", " ‚¢", 0, 0);
- x2("[‚½‚¿]", "‚¿", 0, 2);
- n("[‚È‚É]", "‚Ê");
- x2("[‚¤-‚¨]", "‚¦", 0, 2);
- n("[^‚¯]", "‚¯");
- x2("[\\w]", "‚Ë", 0, 2);
- n("[\\d]", "‚Ó");
- x2("[\\D]", "‚Í", 0, 2);
- n("[\\s]", "‚­");
- x2("[\\S]", "‚Ö", 0, 2);
- x2("[\\w\\d]", "‚æ", 0, 2);
- x2("[\\w\\d]", " ‚æ", 3, 5);
- n("\\w‹SŽÔ", " ‹SŽÔ");
- x2("‹S\\WŽÔ", "‹S ŽÔ", 0, 5);
- x2("‚ .‚¢.‚¤", "‚ ‚ ‚¢‚¢‚¤", 0, 10);
- x2(".\\w‚¤\\W..‚¼", "‚¦‚¤‚¤ ‚¤‚¼‚¼", 0, 13);
- x2("\\s\\w‚±‚±‚±", " ‚±‚±‚±‚±", 0, 9);
- x2("‚ ‚ .‚¯", "‚ ‚ ‚¯‚¯", 0, 8);
- n(".‚¢", "‚¢‚¦");
- x2(".‚¨", "‚¨‚¨", 0, 4);
- x2("^‚ ", "‚ ", 0, 2);
- x2("^‚Þ$", "‚Þ", 0, 2);
- x2("^\\w$", "‚É", 0, 2);
- x2("^\\w‚©‚«‚­‚¯‚±$", "z‚©‚«‚­‚¯‚±", 0, 11);
- x2("^\\w...‚¤‚¦‚¨$", "z‚ ‚¢‚¤‚¤‚¦‚¨", 0, 13);
- x2("\\w\\w\\s\\W‚¨‚¨‚¨\\d", "a‚¨ ‚¨‚¨‚¨4", 0, 12);
- x2("\\A‚½‚¿‚Â", "‚½‚¿‚Â", 0, 6);
- x2("‚Þ‚ß‚à\\Z", "‚Þ‚ß‚à", 0, 6);
- x2("‚©‚«‚­\\z", "‚©‚«‚­", 0, 6);
- x2("‚©‚«‚­\\Z", "‚©‚«‚­\n", 0, 6);
- x2("\\G‚Û‚Ò", "‚Û‚Ò", 0, 4);
- n("\\G‚¦", "‚¤‚¦‚¨");
- n("‚Æ‚Ä\\G", "‚Æ‚Ä");
- n("‚Ü‚Ý\\A", "‚Ü‚Ý");
- n("‚Ü\\A‚Ý", "‚Ü‚Ý");
- x2("(?=‚¹)‚¹", "‚¹", 0, 2);
- n("(?=‚¤).", "‚¢");
- x2("(?!‚¤)‚©", "‚©", 0, 2);
- n("(?!‚Æ)‚ ", "‚Æ");
- x2("(?i:‚ )", "‚ ", 0, 2);
- x2("(?i:‚Ô‚×)", "‚Ô‚×", 0, 4);
- n("(?i:‚¢)", "‚¤");
- x2("(?m:‚æ.)", "‚æ\n", 0, 3);
- x2("(?m:.‚ß)", "‚Ü\n‚ß", 2, 5);
- x2("‚ ?", "", 0, 0);
- x2("•Ï?", "‰»", 0, 0);
- x2("•Ï?", "•Ï", 0, 2);
- x2("—Ê*", "", 0, 0);
- x2("—Ê*", "—Ê", 0, 2);
- x2("Žq*", "ŽqŽqŽq", 0, 6);
- x2("”n*", "Ž­”n”n”n”n", 0, 0);
- n("ŽR+", "");
- x2("‰Í+", "‰Í", 0, 2);
- x2("Žž+", "ŽžŽžŽžŽž", 0, 8);
- x2("‚¦+", "‚¦‚¦‚¤‚¤‚¤", 0, 4);
- x2("‚¤+", "‚¨‚¤‚¤‚¤‚¤", 2, 10);
- x2(".?", "‚½", 0, 2);
- x2(".*", "‚Ï‚Ò‚Õ‚Ø", 0, 8);
- x2(".+", "‚ë", 0, 2);
- x2(".+", "‚¢‚¤‚¦‚©\n", 0, 8);
- x2("‚ |‚¢", "‚ ", 0, 2);
- x2("‚ |‚¢", "‚¢", 0, 2);
- x2("‚ ‚¢|‚¢‚¤", "‚ ‚¢", 0, 4);
- x2("‚ ‚¢|‚¢‚¤", "‚¢‚¤", 0, 4);
- x2("‚ð(?:‚©‚«|‚«‚­)", "‚ð‚©‚«", 0, 6);
- x2("‚ð(?:‚©‚«|‚«‚­)‚¯", "‚ð‚«‚­‚¯", 0, 8);
- x2("‚ ‚¢|(?:‚ ‚¤|‚ ‚ð)", "‚ ‚ð", 0, 4);
- x2("‚ |‚¢|‚¤", "‚¦‚¤", 2, 4);
- x2("‚ |‚¢|‚¤‚¦|‚¨‚©‚«|‚­|‚¯‚±‚³|‚µ‚·‚¹|‚»|‚½‚¿|‚‚ĂƂȂÉ|‚Ê‚Ë", "‚µ‚·‚¹", 0, 6);
- n("‚ |‚¢|‚¤‚¦|‚¨‚©‚«|‚­|‚¯‚±‚³|‚µ‚·‚¹|‚»|‚½‚¿|‚‚ĂƂȂÉ|‚Ê‚Ë", "‚·‚¹");
- x2("‚ |^‚í", "‚Ô‚ ", 2, 4);
- x2("‚ |^‚ð", "‚ð‚ ", 0, 2);
- x2("‹S|\\GŽÔ", "‚¯ŽÔ‹S", 4, 6);
- x2("‹S|\\GŽÔ", "ŽÔ‹S", 0, 2);
- x2("‹S|\\AŽÔ", "bŽÔ‹S", 3, 5);
- x2("‹S|\\AŽÔ", "ŽÔ", 0, 2);
- x2("‹S|ŽÔ\\Z", "ŽÔ‹S", 2, 4);
- x2("‹S|ŽÔ\\Z", "ŽÔ", 0, 2);
- x2("‹S|ŽÔ\\Z", "ŽÔ\n", 0, 2);
- x2("‹S|ŽÔ\\z", "ŽÔ‹S", 2, 4);
- x2("‹S|ŽÔ\\z", "ŽÔ", 0, 2);
- x2("\\w|\\s", "‚¨", 0, 2);
- x2("\\w|%", "%‚¨", 0, 1);
- x2("\\w|[&$]", "‚¤&", 0, 2);
- x2("[‚¢-‚¯]", "‚¤", 0, 2);
- x2("[‚¢-‚¯]|[^‚©-‚±]", "‚ ", 0, 2);
- x2("[‚¢-‚¯]|[^‚©-‚±]", "‚©", 0, 2);
- x2("[^‚ ]", "\n", 0, 1);
- x2("(?:‚ |[‚¤-‚«])|‚¢‚ð", "‚¤‚ð", 0, 2);
- x2("(?:‚ |[‚¤-‚«])|‚¢‚ð", "‚¢‚ð", 0, 4);
- x2("‚ ‚¢‚¤|(?=‚¯‚¯)..‚Ù", "‚¯‚¯‚Ù", 0, 6);
- x2("‚ ‚¢‚¤|(?!‚¯‚¯)..‚Ù", "‚ ‚¢‚Ù", 0, 6);
- x2("(?=‚ð‚ )..‚ |(?=‚ð‚ð)..‚ ", "‚ð‚ð‚ ", 0, 6);
- x2("(?<=‚ |‚¢‚¤)‚¢", "‚¢‚¤‚¢", 4, 6);
- n("(?>‚ |‚ ‚¢‚¦)‚¤", "‚ ‚¢‚¦‚¤");
- x2("(?>‚ ‚¢‚¦|‚ )‚¤", "‚ ‚¢‚¦‚¤", 0, 8);
- x2("‚ ?|‚¢", "‚ ", 0, 2);
- x2("‚ ?|‚¢", "‚¢", 0, 0);
- x2("‚ ?|‚¢", "", 0, 0);
- x2("‚ *|‚¢", "‚ ‚ ", 0, 4);
- x2("‚ *|‚¢*", "‚¢‚ ", 0, 0);
- x2("‚ *|‚¢*", "‚ ‚¢", 0, 2);
- x2("[a‚ ]*|‚¢*", "a‚ ‚¢‚¢‚¢", 0, 3);
- x2("‚ +|‚¢*", "", 0, 0);
- x2("‚ +|‚¢*", "‚¢‚¢‚¢", 0, 6);
- x2("‚ +|‚¢*", "‚ ‚¢‚¢‚¢", 0, 2);
- x2("‚ +|‚¢*", "a‚ ‚¢‚¢‚¢", 0, 0);
- n("‚ +|‚¢+", "");
- x2("(‚ |‚¢)?", "‚¢", 0, 2);
- x2("(‚ |‚¢)*", "‚¢‚ ", 0, 4);
- x2("(‚ |‚¢)+", "‚¢‚ ‚¢", 0, 6);
- x2("(‚ ‚¢|‚¤‚ )+", "‚¤‚ ‚ ‚¢‚¤‚¦", 0, 8);
- x2("(‚ ‚¢|‚¤‚¦)+", "‚¤‚ ‚ ‚¢‚¤‚¦", 4, 12);
- x2("(‚ ‚¢|‚¤‚ )+", "‚ ‚ ‚¢‚¤‚ ", 2, 10);
- x2("(‚ ‚¢|‚¤‚ )+", "‚ ‚¢‚ð‚¤‚ ", 0, 4);
- x2("(‚ ‚¢|‚¤‚ )+", "$$zzzz‚ ‚¢‚ð‚¤‚ ", 6, 10);
- x2("(‚ |‚¢‚ ‚¢)+", "‚ ‚¢‚ ‚¢‚ ", 0, 10);
- x2("(‚ |‚¢‚ ‚¢)+", "‚¢‚ ", 2, 4);
- x2("(‚ |‚¢‚ ‚¢)+", "‚¢‚ ‚ ‚ ‚¢‚ ", 2, 8);
- x2("(?:‚ |‚¢)(?:‚ |‚¢)", "‚ ‚¢", 0, 4);
- x2("(?:‚ *|‚¢*)(?:‚ *|‚¢*)", "‚ ‚ ‚ ‚¢‚¢‚¢", 0, 6);
- x2("(?:‚ *|‚¢*)(?:‚ +|‚¢+)", "‚ ‚ ‚ ‚¢‚¢‚¢", 0, 12);
- x2("(?:‚ +|‚¢+){2}", "‚ ‚ ‚ ‚¢‚¢‚¢", 0, 12);
- x2("(?:‚ +|‚¢+){1,2}", "‚ ‚ ‚ ‚¢‚¢‚¢", 0, 12);
- x2("(?:‚ +|\\A‚¢*)‚¤‚¤", "‚¤‚¤", 0, 4);
- n("(?:‚ +|\\A‚¢*)‚¤‚¤", "‚ ‚¢‚¤‚¤");
- x2("(?:^‚ +|‚¢+)*‚¤", "‚ ‚ ‚¢‚¢‚¢‚ ‚¢‚¤", 12, 16);
- x2("(?:^‚ +|‚¢+)*‚¤", "‚ ‚ ‚¢‚¢‚¢‚¢‚¤", 0, 14);
- x2("‚¤{0,}", "‚¤‚¤‚¤‚¤", 0, 8);
- x2("‚ |(?i)c", "C", 0, 1);
- x2("(?i)c|‚ ", "C", 0, 1);
- x2("(?i:‚ )|a", "a", 0, 1);
- n("(?i:‚ )|a", "A");
- x2("[‚ ‚¢‚¤]?", "‚ ‚¢‚¤", 0, 2);
- x2("[‚ ‚¢‚¤]*", "‚ ‚¢‚¤", 0, 6);
- x2("[^‚ ‚¢‚¤]*", "‚ ‚¢‚¤", 0, 0);
- n("[^‚ ‚¢‚¤]+", "‚ ‚¢‚¤");
- x2("‚ ?\?", "‚ ‚ ‚ ", 0, 0);
- x2("‚¢‚ ?\?‚¢", "‚¢‚ ‚¢", 0, 6);
- x2("‚ *?", "‚ ‚ ‚ ", 0, 0);
- x2("‚¢‚ *?", "‚¢‚ ‚ ", 0, 2);
- x2("‚¢‚ *?‚¢", "‚¢‚ ‚ ‚¢", 0, 8);
- x2("‚ +?", "‚ ‚ ‚ ", 0, 2);
- x2("‚¢‚ +?", "‚¢‚ ‚ ", 0, 4);
- x2("‚¢‚ +?‚¢", "‚¢‚ ‚ ‚¢", 0, 8);
- x2("(?:“V?)?\?", "“V", 0, 0);
- x2("(?:“V?\?)?", "“V", 0, 0);
- x2("(?:–²?)+?", "–²–²–²", 0, 2);
- x2("(?:•—+)?\?", "•—•—•—", 0, 0);
- x2("(?:á+)?\?‘š", "ááá‘š", 0, 8);
- x2("(?:‚ ‚¢)?{2}", "", 0, 0);
- x2("(?:‹SŽÔ)?{2}", "‹SŽÔ‹SŽÔ‹S", 0, 8);
- x2("(?:‹SŽÔ)*{0}", "‹SŽÔ‹SŽÔ‹S", 0, 0);
- x2("(?:‹SŽÔ){3,}", "‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ", 0, 16);
- n("(?:‹SŽÔ){3,}", "‹SŽÔ‹SŽÔ");
- x2("(?:‹SŽÔ){2,4}", "‹SŽÔ‹SŽÔ‹SŽÔ", 0, 12);
- x2("(?:‹SŽÔ){2,4}", "‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ", 0, 16);
- x2("(?:‹SŽÔ){2,4}?", "‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ", 0, 8);
- x2("(?:‹SŽÔ){,}", "‹SŽÔ{,}", 0, 7);
- x2("(?:‚©‚«‚­)+?{2}", "‚©‚«‚­‚©‚«‚­‚©‚«‚­", 0, 12);
- x3("(‰Î)", "‰Î", 0, 2, 1);
- x3("(‰Î…)", "‰Î…", 0, 4, 1);
- x2("((ŽžŠÔ))", "ŽžŠÔ", 0, 4);
- x3("((•—…))", "•—…", 0, 4, 1);
- x3("((ð“ú))", "ð“ú", 0, 4, 2);
- x3("((((((((((((((((((((—ÊŽq))))))))))))))))))))", "—ÊŽq", 0, 4, 20);
- x3("(‚ ‚¢)(‚¤‚¦)", "‚ ‚¢‚¤‚¦", 0, 4, 1);
- x3("(‚ ‚¢)(‚¤‚¦)", "‚ ‚¢‚¤‚¦", 4, 8, 2);
- x3("()(‚ )‚¢‚¤(‚¦‚¨‚©)‚«‚­‚¯‚±", "‚ ‚¢‚¤‚¦‚¨‚©‚«‚­‚¯‚±", 6, 12, 3);
- x3("(()(‚ )‚¢‚¤(‚¦‚¨‚©)‚«‚­‚¯‚±)", "‚ ‚¢‚¤‚¦‚¨‚©‚«‚­‚¯‚±", 6, 12, 4);
- x3(".*(ƒtƒH)ƒ“Eƒ}(ƒ“()ƒVƒ…ƒ^)ƒCƒ“", "ƒtƒHƒ“Eƒ}ƒ“ƒVƒ…ƒ^ƒCƒ“", 10, 18, 2);
- x2("(^‚ )", "‚ ", 0, 2);
- x3("(‚ )|(‚ )", "‚¢‚ ", 2, 4, 1);
- x3("(^‚ )|(‚ )", "‚¢‚ ", 2, 4, 2);
- x3("(‚ ?)", "‚ ‚ ‚ ", 0, 2, 1);
- x3("(‚Ü*)", "‚Ü‚Ü‚Ü", 0, 6, 1);
- x3("(‚Æ*)", "", 0, 0, 1);
- x3("(‚é+)", "‚é‚é‚é‚é‚é‚é‚é", 0, 14, 1);
- x3("(‚Ó+|‚Ö*)", "‚Ó‚Ó‚Ó‚Ö‚Ö", 0, 6, 1);
- x3("(‚ +|‚¢?)", "‚¢‚¢‚¢‚ ‚ ", 0, 2, 1);
- x3("(‚ ‚¢‚¤)?", "‚ ‚¢‚¤", 0, 6, 1);
- x3("(‚ ‚¢‚¤)*", "‚ ‚¢‚¤", 0, 6, 1);
- x3("(‚ ‚¢‚¤)+", "‚ ‚¢‚¤", 0, 6, 1);
- x3("(‚³‚µ‚·|‚ ‚¢‚¤)+", "‚ ‚¢‚¤", 0, 6, 1);
- x3("([‚È‚É‚Ê][‚©‚«‚­]|‚©‚«‚­)+", "‚©‚«‚­", 0, 6, 1);
- x3("((?i:‚ ‚¢‚¤))", "‚ ‚¢‚¤", 0, 6, 1);
- x3("((?m:‚ .‚¤))", "‚ \n‚¤", 0, 5, 1);
- x3("((?=‚ ‚ñ)‚ )", "‚ ‚ñ‚¢", 0, 2, 1);
- x3("‚ ‚¢‚¤|(.‚ ‚¢‚¦)", "‚ñ‚ ‚¢‚¦", 0, 8, 1);
- x3("‚ *(.)", "‚ ‚ ‚ ‚ ‚ñ", 8, 10, 1);
- x3("‚ *?(.)", "‚ ‚ ‚ ‚ ‚ñ", 0, 2, 1);
- x3("‚ *?(‚ñ)", "‚ ‚ ‚ ‚ ‚ñ", 8, 10, 1);
- x3("[‚¢‚¤‚¦]‚ *(.)", "‚¦‚ ‚ ‚ ‚ ‚ñ", 10, 12, 1);
- x3("(\\A‚¢‚¢)‚¤‚¤", "‚¢‚¢‚¤‚¤", 0, 4, 1);
- n("(\\A‚¢‚¢)‚¤‚¤", "‚ñ‚¢‚¢‚¤‚¤");
- x3("(^‚¢‚¢)‚¤‚¤", "‚¢‚¢‚¤‚¤", 0, 4, 1);
- n("(^‚¢‚¢)‚¤‚¤", "‚ñ‚¢‚¢‚¤‚¤");
- x3("‚ë‚ë(‚é‚é$)", "‚ë‚ë‚é‚é", 4, 8, 1);
- n("‚ë‚ë(‚é‚é$)", "‚ë‚ë‚é‚é‚é");
- x2("(–³)\\1", "–³–³", 0, 4);
- n("(–³)\\1", "–³•");
- x2("(‹ó?)\\1", "‹ó‹ó", 0, 4);
- x2("(‹ó?\?)\\1", "‹ó‹ó", 0, 0);
- x2("(‹ó*)\\1", "‹ó‹ó‹ó‹ó‹ó", 0, 8);
- x3("(‹ó*)\\1", "‹ó‹ó‹ó‹ó‹ó", 0, 4, 1);
- x2("‚ (‚¢*)\\1", "‚ ‚¢‚¢‚¢‚¢", 0, 10);
- x2("‚ (‚¢*)\\1", "‚ ‚¢", 0, 2);
- x2("(‚ *)(‚¢*)\\1\\2", "‚ ‚ ‚ ‚¢‚¢‚ ‚ ‚ ‚¢‚¢", 0, 20);
- x2("(‚ *)(‚¢*)\\2", "‚ ‚ ‚ ‚¢‚¢‚¢‚¢", 0, 14);
- x3("(‚ *)(‚¢*)\\2", "‚ ‚ ‚ ‚¢‚¢‚¢‚¢", 6, 10, 2);
- x2("(((((((‚Û*)‚Ø))))))‚Ò\\7", "‚Û‚Û‚Û‚Ø‚Ò‚Û‚Û‚Û", 0, 16);
- x3("(((((((‚Û*)‚Ø))))))‚Ò\\7", "‚Û‚Û‚Û‚Ø‚Ò‚Û‚Û‚Û", 0, 6, 7);
- x2("(‚Í)(‚Ð)(‚Ó)\\2\\1\\3", "‚͂ЂӂЂ͂Ó", 0, 12);
- x2("([‚«-‚¯])\\1", "‚­‚­", 0, 4);
- x2("(\\w\\d\\s)\\1", "‚ 5 ‚ 5 ", 0, 8);
- n("(\\w\\d\\s)\\1", "‚ 5 ‚ 5");
- x2("(’NH|[‚ -‚¤]{3})\\1", "’NH’NH", 0, 8);
- x2("...(’NH|[‚ -‚¤]{3})\\1", "‚ a‚ ’NH’NH", 0, 13);
- x2("(’NH|[‚ -‚¤]{3})\\1", "‚¤‚¢‚¤‚¤‚¢‚¤", 0, 12);
- x2("(^‚±)\\1", "‚±‚±", 0, 4);
- n("(^‚Þ)\\1", "‚ß‚Þ‚Þ");
- n("(‚ $)\\1", "‚ ‚ ");
- n("(‚ ‚¢\\Z)\\1", "‚ ‚¢");
- x2("(‚ *\\Z)\\1", "‚ ", 2, 2);
- x2(".(‚ *\\Z)\\1", "‚¢‚ ", 2, 4);
- x3("(.(‚â‚¢‚ä)\\2)", "z‚â‚¢‚ä‚â‚¢‚ä", 0, 13, 1);
- x3("(.(..\\d.)\\2)", "‚ 12341234", 0, 10, 1);
- x2("((?i:‚ v‚¸))\\1", "‚ v‚¸‚ v‚¸", 0, 10);
- x2("(?<‹ð‚©>•Ï|\\(\\g<‹ð‚©>\\))", "((((((•Ï))))))", 0, 14);
- x2("\\A(?:\\g<ˆ¢_1>|\\g<‰]_2>|\\zI—¹ (?<ˆ¢_1>ŠÏ|Ž©\\g<‰]_2>Ž©)(?<‰]_2>Ý|•ìŽF\\g<ˆ¢_1>•ìŽF))$", "•ìŽFŽ©•ìŽFŽ©ÝŽ©•ìŽFŽ©•ìŽF", 0, 26);
- x2("[[‚ЂÓ]]", "‚Ó", 0, 2);
- x2("[[‚¢‚¨‚¤]‚©]", "‚©", 0, 2);
- n("[[^‚ ]]", "‚ ");
- n("[^[‚ ]]", "‚ ");
- x2("[^[^‚ ]]", "‚ ", 0, 2);
- x2("[[‚©‚«‚­]&&‚«‚­]", "‚­", 0, 2);
- n("[[‚©‚«‚­]&&‚«‚­]", "‚©");
- n("[[‚©‚«‚­]&&‚«‚­]", "‚¯");
- x2("[‚ -‚ñ&&‚¢-‚ð&&‚¤-‚ï]", "‚ï", 0, 2);
- n("[^‚ -‚ñ&&‚¢-‚ð&&‚¤-‚ï]", "‚ï");
- x2("[[^‚ &&‚ ]&&‚ -‚ñ]", "‚¢", 0, 2);
- n("[[^‚ &&‚ ]&&‚ -‚ñ]", "‚ ");
- x2("[[^‚ -‚ñ&&‚¢‚¤‚¦‚¨]&&[^‚¤-‚©]]", "‚«", 0, 2);
- n("[[^‚ -‚ñ&&‚¢‚¤‚¦‚¨]&&[^‚¤-‚©]]", "‚¢");
- x2("[^[^‚ ‚¢‚¤]&&[^‚¤‚¦‚¨]]", "‚¤", 0, 2);
- x2("[^[^‚ ‚¢‚¤]&&[^‚¤‚¦‚¨]]", "‚¦", 0, 2);
- n("[^[^‚ ‚¢‚¤]&&[^‚¤‚¦‚¨]]", "‚©");
- x2("[‚ -&&-‚ ]", "-", 0, 1);
- x2("[^[^a-z‚ ‚¢‚¤]&&[^bcdefg‚¤‚¦‚¨]q-w]", "‚¦", 0, 2);
- x2("[^[^a-z‚ ‚¢‚¤]&&[^bcdefg‚¤‚¦‚¨]g-w]", "f", 0, 1);
- x2("[^[^a-z‚ ‚¢‚¤]&&[^bcdefg‚¤‚¦‚¨]g-w]", "g", 0, 1);
- n("[^[^a-z‚ ‚¢‚¤]&&[^bcdefg‚¤‚¦‚¨]g-w]", "2");
- x2("a<b>ƒo[ƒWƒ‡ƒ“‚̃_ƒEƒ“ƒ[ƒh<\\/b>", "a<b>ƒo[ƒWƒ‡ƒ“‚̃_ƒEƒ“ƒ[ƒh</b>", 0, 32);
- x2(".<b>ƒo[ƒWƒ‡ƒ“‚̃_ƒEƒ“ƒ[ƒh<\\/b>", "a<b>ƒo[ƒWƒ‡ƒ“‚̃_ƒEƒ“ƒ[ƒh</b>", 0, 32);
- fprintf(stdout,
- "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n",
- nsucc, nfail, nerror, onig_version());
-
-#ifndef POSIX_TEST
- onig_region_free(region, 1);
- onig_end();
-#endif
-
- return ((nfail == 0 && nerror == 0) ? 0 : -1);
-}
+/*
+ * This program was generated by testconv.rb.
+ */
+#ifdef ONIG_ESCAPE_UCHAR_COLLISION
+#undef ONIG_ESCAPE_UCHAR_COLLISION
+#endif
+#include <stdio.h>
+
+#ifdef POSIX_TEST
+#include "onigposix.h"
+#else
+#include "oniguruma.h"
+#endif
+
+#include <string.h>
+
+#define SLEN(s) strlen(s)
+
+static int nsucc = 0;
+static int nfail = 0;
+static int nerror = 0;
+
+static FILE* err_file;
+
+#ifndef POSIX_TEST
+static OnigRegion* region;
+#endif
+
+static void xx(char* pattern, char* str, int from, int to, int mem, int not)
+{
+ int r;
+
+#ifdef POSIX_TEST
+ regex_t reg;
+ char buf[200];
+ regmatch_t pmatch[25];
+
+ r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
+ if (r) {
+ regerror(r, &reg, buf, sizeof(buf));
+ fprintf(err_file, "ERROR: %s\n", buf);
+ nerror++;
+ return ;
+ }
+
+ r = regexec(&reg, str, reg.re_nsub + 1, pmatch, 0);
+ if (r != 0 && r != REG_NOMATCH) {
+ regerror(r, &reg, buf, sizeof(buf));
+ fprintf(err_file, "ERROR: %s\n", buf);
+ nerror++;
+ return ;
+ }
+
+ if (r == REG_NOMATCH) {
+ if (not) {
+ fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
+ nsucc++;
+ }
+ else {
+ fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
+ nfail++;
+ }
+ }
+ else {
+ if (not) {
+ fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
+ nfail++;
+ }
+ else {
+ if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) {
+ fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
+ nsucc++;
+ }
+ else {
+ fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
+ from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo);
+ nfail++;
+ }
+ }
+ }
+ regfree(&reg);
+
+#else
+ regex_t* reg;
+ OnigErrorInfo einfo;
+
+ r = onig_new(&reg, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)),
+ ONIG_OPTION_DEFAULT, ONIG_ENCODING_SJIS, ONIG_SYNTAX_DEFAULT, &einfo);
+ if (r) {
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str((UChar* )s, r, &einfo);
+ fprintf(err_file, "ERROR: %s\n", s);
+ nerror++;
+ return ;
+ }
+
+ r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)),
+ (UChar* )str, (UChar* )(str + SLEN(str)),
+ region, ONIG_OPTION_NONE);
+ if (r < ONIG_MISMATCH) {
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str((UChar* )s, r);
+ fprintf(err_file, "ERROR: %s\n", s);
+ nerror++;
+ return ;
+ }
+
+ if (r == ONIG_MISMATCH) {
+ if (not) {
+ fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
+ nsucc++;
+ }
+ else {
+ fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
+ nfail++;
+ }
+ }
+ else {
+ if (not) {
+ fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
+ nfail++;
+ }
+ else {
+ if (region->beg[mem] == from && region->end[mem] == to) {
+ fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
+ nsucc++;
+ }
+ else {
+ fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
+ from, to, region->beg[mem], region->end[mem]);
+ nfail++;
+ }
+ }
+ }
+ onig_free(reg);
+#endif
+}
+
+static void x2(char* pattern, char* str, int from, int to)
+{
+ xx(pattern, str, from, to, 0, 0);
+}
+
+static void x3(char* pattern, char* str, int from, int to, int mem)
+{
+ xx(pattern, str, from, to, mem, 0);
+}
+
+static void n(char* pattern, char* str)
+{
+ xx(pattern, str, 0, 0, 0, 1);
+}
+
+extern int main(int argc, char* argv[])
+{
+#ifndef POSIX_TEST
+ static OnigEncoding use_encs[1];
+
+ use_encs[0] = ONIG_ENCODING_SJIS;
+ onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0]));
+#endif
+
+ err_file = stdout;
+
+#ifdef POSIX_TEST
+ reg_set_encoding(REG_POSIX_ENCODING_SJIS);
+#else
+ region = onig_region_new();
+#endif
+
+ x2("", "", 0, 0);
+ x2("^", "", 0, 0);
+ x2("$", "", 0, 0);
+ x2("\\G", "", 0, 0);
+ x2("\\A", "", 0, 0);
+ x2("\\Z", "", 0, 0);
+ x2("\\z", "", 0, 0);
+ x2("^$", "", 0, 0);
+ x2("\\ca", "\001", 0, 1);
+ x2("\\C-b", "\002", 0, 1);
+ x2("\\c\\\\", "\034", 0, 1);
+ x2("q[\\c\\\\]", "q\034", 0, 2);
+ x2("", "a", 0, 0);
+ x2("a", "a", 0, 1);
+ x2("\\x61", "a", 0, 1);
+ x2("aa", "aa", 0, 2);
+ x2("aaa", "aaa", 0, 3);
+ x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35);
+ x2("ab", "ab", 0, 2);
+ x2("b", "ab", 1, 2);
+ x2("bc", "abc", 1, 3);
+ x2("(?i:#RET#)", "#INS##RET#", 5, 10);
+ x2("\\17", "\017", 0, 1);
+ x2("\\x1f", "\x1f", 0, 1);
+ x2("a(?#....\\\\JJJJ)b", "ab", 0, 2);
+ x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7);
+ x2(".", "a", 0, 1);
+ n(".", "");
+ x2("..", "ab", 0, 2);
+ x2("\\w", "e", 0, 1);
+ n("\\W", "e");
+ x2("\\s", " ", 0, 1);
+ x2("\\S", "b", 0, 1);
+ x2("\\d", "4", 0, 1);
+ n("\\D", "4");
+ x2("\\b", "z ", 0, 0);
+ x2("\\b", " z", 1, 1);
+ x2("\\B", "zz ", 1, 1);
+ x2("\\B", "z ", 2, 2);
+ x2("\\B", " z", 0, 0);
+ x2("[ab]", "b", 0, 1);
+ n("[ab]", "c");
+ x2("[a-z]", "t", 0, 1);
+ n("[^a]", "a");
+ x2("[^a]", "\n", 0, 1);
+ x2("[]]", "]", 0, 1);
+ n("[^]]", "]");
+ x2("[\\^]+", "0^^1", 1, 3);
+ x2("[b-]", "b", 0, 1);
+ x2("[b-]", "-", 0, 1);
+ x2("[\\w]", "z", 0, 1);
+ n("[\\w]", " ");
+ x2("[\\W]", "b$", 1, 2);
+ x2("[\\d]", "5", 0, 1);
+ n("[\\d]", "e");
+ x2("[\\D]", "t", 0, 1);
+ n("[\\D]", "3");
+ x2("[\\s]", " ", 0, 1);
+ n("[\\s]", "a");
+ x2("[\\S]", "b", 0, 1);
+ n("[\\S]", " ");
+ x2("[\\w\\d]", "2", 0, 1);
+ n("[\\w\\d]", " ");
+ x2("[[:upper:]]", "B", 0, 1);
+ x2("[*[:xdigit:]+]", "+", 0, 1);
+ x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7);
+ x2("[*[:xdigit:]+]", "-@^+", 3, 4);
+ n("[[:upper]]", "A");
+ x2("[[:upper]]", ":", 0, 1);
+ x2("[\\044-\\047]", "\046", 0, 1);
+ x2("[\\x5a-\\x5c]", "\x5b", 0, 1);
+ x2("[\\x6A-\\x6D]", "\x6c", 0, 1);
+ n("[\\x6A-\\x6D]", "\x6E");
+ n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply");
+ x2("[\\[]", "[", 0, 1);
+ x2("[\\]]", "]", 0, 1);
+ x2("[&]", "&", 0, 1);
+ x2("[[ab]]", "b", 0, 1);
+ x2("[[ab]c]", "c", 0, 1);
+ n("[[^a]]", "a");
+ n("[^[a]]", "a");
+ x2("[[ab]&&bc]", "b", 0, 1);
+ n("[[ab]&&bc]", "a");
+ n("[[ab]&&bc]", "c");
+ x2("[a-z&&b-y&&c-x]", "w", 0, 1);
+ n("[^a-z&&b-y&&c-x]", "w");
+ x2("[[^a&&a]&&a-z]", "b", 0, 1);
+ n("[[^a&&a]&&a-z]", "a");
+ x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1);
+ n("[[^a-z&&bcdef]&&[^c-g]]", "c");
+ x2("[^[^abc]&&[^cde]]", "c", 0, 1);
+ x2("[^[^abc]&&[^cde]]", "e", 0, 1);
+ n("[^[^abc]&&[^cde]]", "f");
+ x2("[a-&&-a]", "-", 0, 1);
+ n("[a\\-&&\\-a]", "&");
+ n("\\wabc", " abc");
+ x2("a\\Wbc", "a bc", 0, 4);
+ x2("a.b.c", "aabbc", 0, 5);
+ x2(".\\wb\\W..c", "abb bcc", 0, 7);
+ x2("\\s\\wzzz", " zzzz", 0, 5);
+ x2("aa.b", "aabb", 0, 4);
+ n(".a", "ab");
+ x2(".a", "aa", 0, 2);
+ x2("^a", "a", 0, 1);
+ x2("^a$", "a", 0, 1);
+ x2("^\\w$", "a", 0, 1);
+ n("^\\w$", " ");
+ x2("^\\wab$", "zab", 0, 3);
+ x2("^\\wabcdef$", "zabcdef", 0, 7);
+ x2("^\\w...def$", "zabcdef", 0, 7);
+ x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8);
+ x2("\\A\\Z", "", 0, 0);
+ x2("\\Axyz", "xyz", 0, 3);
+ x2("xyz\\Z", "xyz", 0, 3);
+ x2("xyz\\z", "xyz", 0, 3);
+ x2("a\\Z", "a", 0, 1);
+ x2("\\Gaz", "az", 0, 2);
+ n("\\Gz", "bza");
+ n("az\\G", "az");
+ n("az\\A", "az");
+ n("a\\Az", "az");
+ x2("\\^\\$", "^$", 0, 2);
+ x2("^x?y", "xy", 0, 2);
+ x2("^(x?y)", "xy", 0, 2);
+ x2("\\w", "_", 0, 1);
+ n("\\W", "_");
+ x2("(?=z)z", "z", 0, 1);
+ n("(?=z).", "a");
+ x2("(?!z)a", "a", 0, 1);
+ n("(?!z)a", "z");
+ x2("(?i:a)", "a", 0, 1);
+ x2("(?i:a)", "A", 0, 1);
+ x2("(?i:A)", "a", 0, 1);
+ n("(?i:A)", "b");
+ x2("(?i:[A-Z])", "a", 0, 1);
+ x2("(?i:[f-m])", "H", 0, 1);
+ x2("(?i:[f-m])", "h", 0, 1);
+ n("(?i:[f-m])", "e");
+ x2("(?i:[A-c])", "D", 0, 1);
+ n("(?i:[^a-z])", "A");
+ n("(?i:[^a-z])", "a");
+ x2("(?i:[!-k])", "Z", 0, 1);
+ x2("(?i:[!-k])", "7", 0, 1);
+ x2("(?i:[T-}])", "b", 0, 1);
+ x2("(?i:[T-}])", "{", 0, 1);
+ x2("(?i:\\?a)", "?A", 0, 2);
+ x2("(?i:\\*A)", "*a", 0, 2);
+ n(".", "\n");
+ x2("(?m:.)", "\n", 0, 1);
+ x2("(?m:a.)", "a\n", 0, 2);
+ x2("(?m:.b)", "a\nb", 1, 3);
+ x2(".*abc", "dddabdd\nddabc", 8, 13);
+ x2("(?m:.*abc)", "dddabddabc", 0, 10);
+ n("(?i)(?-i)a", "A");
+ n("(?i)(?-i:a)", "A");
+ x2("a?", "", 0, 0);
+ x2("a?", "b", 0, 0);
+ x2("a?", "a", 0, 1);
+ x2("a*", "", 0, 0);
+ x2("a*", "a", 0, 1);
+ x2("a*", "aaa", 0, 3);
+ x2("a*", "baaaa", 0, 0);
+ n("a+", "");
+ x2("a+", "a", 0, 1);
+ x2("a+", "aaaa", 0, 4);
+ x2("a+", "aabbb", 0, 2);
+ x2("a+", "baaaa", 1, 5);
+ x2(".?", "", 0, 0);
+ x2(".?", "f", 0, 1);
+ x2(".?", "\n", 0, 0);
+ x2(".*", "", 0, 0);
+ x2(".*", "abcde", 0, 5);
+ x2(".+", "z", 0, 1);
+ x2(".+", "zdswer\n", 0, 6);
+ x2("(.*)a\\1f", "babfbac", 0, 4);
+ x2("(.*)a\\1f", "bacbabf", 3, 7);
+ x2("((.*)a\\2f)", "bacbabf", 3, 7);
+ x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23);
+ x2("a|b", "a", 0, 1);
+ x2("a|b", "b", 0, 1);
+ x2("|a", "a", 0, 0);
+ x2("(|a)", "a", 0, 0);
+ x2("ab|bc", "ab", 0, 2);
+ x2("ab|bc", "bc", 0, 2);
+ x2("z(?:ab|bc)", "zbc", 0, 3);
+ x2("a(?:ab|bc)c", "aabc", 0, 4);
+ x2("ab|(?:ac|az)", "az", 0, 2);
+ x2("a|b|c", "dc", 1, 2);
+ x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2);
+ n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn");
+ x2("a|^z", "ba", 1, 2);
+ x2("a|^z", "za", 0, 1);
+ x2("a|\\Gz", "bza", 2, 3);
+ x2("a|\\Gz", "za", 0, 1);
+ x2("a|\\Az", "bza", 2, 3);
+ x2("a|\\Az", "za", 0, 1);
+ x2("a|b\\Z", "ba", 1, 2);
+ x2("a|b\\Z", "b", 0, 1);
+ x2("a|b\\z", "ba", 1, 2);
+ x2("a|b\\z", "b", 0, 1);
+ x2("\\w|\\s", " ", 0, 1);
+ n("\\w|\\w", " ");
+ x2("\\w|%", "%", 0, 1);
+ x2("\\w|[&$]", "&", 0, 1);
+ x2("[b-d]|[^e-z]", "a", 0, 1);
+ x2("(?:a|[c-f])|bz", "dz", 0, 1);
+ x2("(?:a|[c-f])|bz", "bz", 0, 2);
+ x2("abc|(?=zz)..f", "zzf", 0, 3);
+ x2("abc|(?!zz)..f", "abf", 0, 3);
+ x2("(?=za)..a|(?=zz)..a", "zza", 0, 3);
+ n("(?>a|abd)c", "abdc");
+ x2("(?>abd|a)c", "abdc", 0, 4);
+ x2("a?|b", "a", 0, 1);
+ x2("a?|b", "b", 0, 0);
+ x2("a?|b", "", 0, 0);
+ x2("a*|b", "aa", 0, 2);
+ x2("a*|b*", "ba", 0, 0);
+ x2("a*|b*", "ab", 0, 1);
+ x2("a+|b*", "", 0, 0);
+ x2("a+|b*", "bbb", 0, 3);
+ x2("a+|b*", "abbb", 0, 1);
+ n("a+|b+", "");
+ x2("(a|b)?", "b", 0, 1);
+ x2("(a|b)*", "ba", 0, 2);
+ x2("(a|b)+", "bab", 0, 3);
+ x2("(ab|ca)+", "caabbc", 0, 4);
+ x2("(ab|ca)+", "aabca", 1, 5);
+ x2("(ab|ca)+", "abzca", 0, 2);
+ x2("(a|bab)+", "ababa", 0, 5);
+ x2("(a|bab)+", "ba", 1, 2);
+ x2("(a|bab)+", "baaaba", 1, 4);
+ x2("(?:a|b)(?:a|b)", "ab", 0, 2);
+ x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3);
+ x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6);
+ x2("(?:a+|b+){2}", "aaabbb", 0, 6);
+ x2("h{0,}", "hhhh", 0, 4);
+ x2("(?:a+|b+){1,2}", "aaabbb", 0, 6);
+ n("ax{2}*a", "0axxxa1");
+ n("a.{0,2}a", "0aXXXa0");
+ n("a.{0,2}?a", "0aXXXa0");
+ n("a.{0,2}?a", "0aXXXXa0");
+ x2("^a{2,}?a$", "aaa", 0, 3);
+ x2("^[a-z]{2,}?$", "aaa", 0, 3);
+ x2("(?:a+|\\Ab*)cc", "cc", 0, 2);
+ n("(?:a+|\\Ab*)cc", "abcc");
+ x2("(?:^a+|b+)*c", "aabbbabc", 6, 8);
+ x2("(?:^a+|b+)*c", "aabbbbc", 0, 7);
+ x2("a|(?i)c", "C", 0, 1);
+ x2("(?i)c|a", "C", 0, 1);
+ x2("(?i)c|a", "A", 0, 1);
+ x2("(?i:c)|a", "C", 0, 1);
+ n("(?i:c)|a", "A");
+ x2("[abc]?", "abc", 0, 1);
+ x2("[abc]*", "abc", 0, 3);
+ x2("[^abc]*", "abc", 0, 0);
+ n("[^abc]+", "abc");
+ x2("a?\?", "aaa", 0, 0);
+ x2("ba?\?b", "bab", 0, 3);
+ x2("a*?", "aaa", 0, 0);
+ x2("ba*?", "baa", 0, 1);
+ x2("ba*?b", "baab", 0, 4);
+ x2("a+?", "aaa", 0, 1);
+ x2("ba+?", "baa", 0, 2);
+ x2("ba+?b", "baab", 0, 4);
+ x2("(?:a?)?\?", "a", 0, 0);
+ x2("(?:a?\?)?", "a", 0, 0);
+ x2("(?:a?)+?", "aaa", 0, 1);
+ x2("(?:a+)?\?", "aaa", 0, 0);
+ x2("(?:a+)?\?b", "aaab", 0, 4);
+ x2("(?:ab)?{2}", "", 0, 0);
+ x2("(?:ab)?{2}", "ababa", 0, 4);
+ x2("(?:ab)*{0}", "ababa", 0, 0);
+ x2("(?:ab){3,}", "abababab", 0, 8);
+ n("(?:ab){3,}", "abab");
+ x2("(?:ab){2,4}", "ababab", 0, 6);
+ x2("(?:ab){2,4}", "ababababab", 0, 8);
+ x2("(?:ab){2,4}?", "ababababab", 0, 4);
+ x2("(?:ab){,}", "ab{,}", 0, 5);
+ x2("(?:abc)+?{2}", "abcabcabc", 0, 6);
+ x2("(?:X*)(?i:xa)", "XXXa", 0, 4);
+ x2("(d+)([^abc]z)", "dddz", 0, 4);
+ x2("([^abc]*)([^abc]z)", "dddz", 0, 4);
+ x2("(\\w+)(\\wz)", "dddz", 0, 4);
+ x3("(a)", "a", 0, 1, 1);
+ x3("(ab)", "ab", 0, 2, 1);
+ x2("((ab))", "ab", 0, 2);
+ x3("((ab))", "ab", 0, 2, 1);
+ x3("((ab))", "ab", 0, 2, 2);
+ x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20);
+ x3("(ab)(cd)", "abcd", 0, 2, 1);
+ x3("(ab)(cd)", "abcd", 2, 4, 2);
+ x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3);
+ x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4);
+ x2("(^a)", "a", 0, 1);
+ x3("(a)|(a)", "ba", 1, 2, 1);
+ x3("(^a)|(a)", "ba", 1, 2, 2);
+ x3("(a?)", "aaa", 0, 1, 1);
+ x3("(a*)", "aaa", 0, 3, 1);
+ x3("(a*)", "", 0, 0, 1);
+ x3("(a+)", "aaaaaaa", 0, 7, 1);
+ x3("(a+|b*)", "bbbaa", 0, 3, 1);
+ x3("(a+|b?)", "bbbaa", 0, 1, 1);
+ x3("(abc)?", "abc", 0, 3, 1);
+ x3("(abc)*", "abc", 0, 3, 1);
+ x3("(abc)+", "abc", 0, 3, 1);
+ x3("(xyz|abc)+", "abc", 0, 3, 1);
+ x3("([xyz][abc]|abc)+", "abc", 0, 3, 1);
+ x3("((?i:abc))", "AbC", 0, 3, 1);
+ x2("(abc)(?i:\\1)", "abcABC", 0, 6);
+ x3("((?m:a.c))", "a\nc", 0, 3, 1);
+ x3("((?=az)a)", "azb", 0, 1, 1);
+ x3("abc|(.abd)", "zabd", 0, 4, 1);
+ x2("(?:abc)|(ABC)", "abc", 0, 3);
+ x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1);
+ x3("a*(.)", "aaaaz", 4, 5, 1);
+ x3("a*?(.)", "aaaaz", 0, 1, 1);
+ x3("a*?(c)", "aaaac", 4, 5, 1);
+ x3("[bcd]a*(.)", "caaaaz", 5, 6, 1);
+ x3("(\\Abb)cc", "bbcc", 0, 2, 1);
+ n("(\\Abb)cc", "zbbcc");
+ x3("(^bb)cc", "bbcc", 0, 2, 1);
+ n("(^bb)cc", "zbbcc");
+ x3("cc(bb$)", "ccbb", 2, 4, 1);
+ n("cc(bb$)", "ccbbb");
+ n("(\\1)", "");
+ n("\\1(a)", "aa");
+ n("(a(b)\\1)\\2+", "ababb");
+ n("(?:(?:\\1|z)(a))+$", "zaa");
+ x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4);
+ x2("(a)(?=\\1)", "aa", 0, 1);
+ n("(a)$|\\1", "az");
+ x2("(a)\\1", "aa", 0, 2);
+ n("(a)\\1", "ab");
+ x2("(a?)\\1", "aa", 0, 2);
+ x2("(a?\?)\\1", "aa", 0, 0);
+ x2("(a*)\\1", "aaaaa", 0, 4);
+ x3("(a*)\\1", "aaaaa", 0, 2, 1);
+ x2("a(b*)\\1", "abbbb", 0, 5);
+ x2("a(b*)\\1", "ab", 0, 1);
+ x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10);
+ x2("(a*)(b*)\\2", "aaabbbb", 0, 7);
+ x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8);
+ x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7);
+ x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6);
+ x2("([a-d])\\1", "cc", 0, 2);
+ x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6);
+ n("(\\w\\d\\s)\\1", "f5 f5");
+ x2("(who|[a-c]{3})\\1", "whowho", 0, 6);
+ x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9);
+ x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6);
+ x2("(^a)\\1", "aa", 0, 2);
+ n("(^a)\\1", "baa");
+ n("(a$)\\1", "aa");
+ n("(ab\\Z)\\1", "ab");
+ x2("(a*\\Z)\\1", "a", 1, 1);
+ x2(".(a*\\Z)\\1", "ba", 1, 2);
+ x3("(.(abc)\\2)", "zabcabc", 0, 7, 1);
+ x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1);
+ x2("((?i:az))\\1", "AzAz", 0, 4);
+ n("((?i:az))\\1", "Azaz");
+ x2("(?<=a)b", "ab", 1, 2);
+ n("(?<=a)b", "bb");
+ x2("(?<=a|b)b", "bb", 1, 2);
+ x2("(?<=a|bc)b", "bcb", 2, 3);
+ x2("(?<=a|bc)b", "ab", 1, 2);
+ x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2);
+ x2("(a)\\g<1>", "aa", 0, 2);
+ x2("(?<!a)b", "cb", 1, 2);
+ n("(?<!a)b", "ab");
+ x2("(?<!a|bc)b", "bbb", 0, 1);
+ n("(?<!a|bc)z", "bcz");
+ x2("(?<name1>a)", "a", 0, 1);
+ x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4);
+ x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8);
+ x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3);
+ x2("(?<n>|a\\g<n>)+", "", 0, 0);
+ x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6);
+ x3("\\g<n>(?<n>.){0}", "X", 0, 1, 1);
+ x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3);
+ x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4);
+ x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8);
+ x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18);
+ x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1);
+ x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3);
+ x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2);
+ x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0);
+ x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9);
+ n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg");
+ x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10);
+ x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14);
+ x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16);
+ x2("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1);
+ x2("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13);
+ x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1);
+ x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9);
+ x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3);
+ x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1);
+ x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7);
+ x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4);
+ x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5);
+ x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10);
+ x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5);
+ x2("()*\\1", "", 0, 0);
+ x2("(?:()|())*\\1\\2", "", 0, 0);
+ x3("(?:\\1a|())*", "a", 0, 0, 1);
+ x2("x((.)*)*x", "0x1x2x3", 1, 6);
+ x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
+ x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0);
+ x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1);
+ x2("\\xED\\xF2", "\xed\xf2", 0, 2);
+ x2("", "‚ ", 0, 0);
+ x2("‚ ", "‚ ", 0, 2);
+ n("‚¢", "‚ ");
+ x2("‚¤‚¤", "‚¤‚¤", 0, 4);
+ x2("‚ ‚¢‚¤", "‚ ‚¢‚¤", 0, 6);
+ x2("‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±", "‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±‚±", 0, 70);
+ x2("‚ ", "‚¢‚ ", 2, 4);
+ x2("‚¢‚¤", "‚ ‚¢‚¤", 2, 6);
+ x2("\\xca\\xb8", "\xca\xb8", 0, 2);
+ x2(".", "‚ ", 0, 2);
+ x2("..", "‚©‚«", 0, 4);
+ x2("\\w", "‚¨", 0, 2);
+ n("\\W", "‚ ");
+ x2("[\\W]", "‚¤$", 2, 3);
+ x2("\\S", "‚»", 0, 2);
+ x2("\\S", "Š¿", 0, 2);
+ x2("\\b", "‹C ", 0, 0);
+ x2("\\b", " ‚Ù", 1, 1);
+ x2("\\B", "‚¹‚» ", 2, 2);
+ x2("\\B", "‚¤ ", 3, 3);
+ x2("\\B", " ‚¢", 0, 0);
+ x2("[‚½‚¿]", "‚¿", 0, 2);
+ n("[‚È‚É]", "‚Ê");
+ x2("[‚¤-‚¨]", "‚¦", 0, 2);
+ n("[^‚¯]", "‚¯");
+ x2("[\\w]", "‚Ë", 0, 2);
+ n("[\\d]", "‚Ó");
+ x2("[\\D]", "‚Í", 0, 2);
+ n("[\\s]", "‚­");
+ x2("[\\S]", "‚Ö", 0, 2);
+ x2("[\\w\\d]", "‚æ", 0, 2);
+ x2("[\\w\\d]", " ‚æ", 3, 5);
+ n("\\w‹SŽÔ", " ‹SŽÔ");
+ x2("‹S\\WŽÔ", "‹S ŽÔ", 0, 5);
+ x2("‚ .‚¢.‚¤", "‚ ‚ ‚¢‚¢‚¤", 0, 10);
+ x2(".\\w‚¤\\W..‚¼", "‚¦‚¤‚¤ ‚¤‚¼‚¼", 0, 13);
+ x2("\\s\\w‚±‚±‚±", " ‚±‚±‚±‚±", 0, 9);
+ x2("‚ ‚ .‚¯", "‚ ‚ ‚¯‚¯", 0, 8);
+ n(".‚¢", "‚¢‚¦");
+ x2(".‚¨", "‚¨‚¨", 0, 4);
+ x2("^‚ ", "‚ ", 0, 2);
+ x2("^‚Þ$", "‚Þ", 0, 2);
+ x2("^\\w$", "‚É", 0, 2);
+ x2("^\\w‚©‚«‚­‚¯‚±$", "z‚©‚«‚­‚¯‚±", 0, 11);
+ x2("^\\w...‚¤‚¦‚¨$", "z‚ ‚¢‚¤‚¤‚¦‚¨", 0, 13);
+ x2("\\w\\w\\s\\W‚¨‚¨‚¨\\d", "a‚¨ ‚¨‚¨‚¨4", 0, 12);
+ x2("\\A‚½‚¿‚Â", "‚½‚¿‚Â", 0, 6);
+ x2("‚Þ‚ß‚à\\Z", "‚Þ‚ß‚à", 0, 6);
+ x2("‚©‚«‚­\\z", "‚©‚«‚­", 0, 6);
+ x2("‚©‚«‚­\\Z", "‚©‚«‚­\n", 0, 6);
+ x2("\\G‚Û‚Ò", "‚Û‚Ò", 0, 4);
+ n("\\G‚¦", "‚¤‚¦‚¨");
+ n("‚Æ‚Ä\\G", "‚Æ‚Ä");
+ n("‚Ü‚Ý\\A", "‚Ü‚Ý");
+ n("‚Ü\\A‚Ý", "‚Ü‚Ý");
+ x2("(?=‚¹)‚¹", "‚¹", 0, 2);
+ n("(?=‚¤).", "‚¢");
+ x2("(?!‚¤)‚©", "‚©", 0, 2);
+ n("(?!‚Æ)‚ ", "‚Æ");
+ x2("(?i:‚ )", "‚ ", 0, 2);
+ x2("(?i:‚Ô‚×)", "‚Ô‚×", 0, 4);
+ n("(?i:‚¢)", "‚¤");
+ x2("(?m:‚æ.)", "‚æ\n", 0, 3);
+ x2("(?m:.‚ß)", "‚Ü\n‚ß", 2, 5);
+ x2("‚ ?", "", 0, 0);
+ x2("•Ï?", "‰»", 0, 0);
+ x2("•Ï?", "•Ï", 0, 2);
+ x2("—Ê*", "", 0, 0);
+ x2("—Ê*", "—Ê", 0, 2);
+ x2("Žq*", "ŽqŽqŽq", 0, 6);
+ x2("”n*", "Ž­”n”n”n”n", 0, 0);
+ n("ŽR+", "");
+ x2("‰Í+", "‰Í", 0, 2);
+ x2("Žž+", "ŽžŽžŽžŽž", 0, 8);
+ x2("‚¦+", "‚¦‚¦‚¤‚¤‚¤", 0, 4);
+ x2("‚¤+", "‚¨‚¤‚¤‚¤‚¤", 2, 10);
+ x2(".?", "‚½", 0, 2);
+ x2(".*", "‚Ï‚Ò‚Õ‚Ø", 0, 8);
+ x2(".+", "‚ë", 0, 2);
+ x2(".+", "‚¢‚¤‚¦‚©\n", 0, 8);
+ x2("‚ |‚¢", "‚ ", 0, 2);
+ x2("‚ |‚¢", "‚¢", 0, 2);
+ x2("‚ ‚¢|‚¢‚¤", "‚ ‚¢", 0, 4);
+ x2("‚ ‚¢|‚¢‚¤", "‚¢‚¤", 0, 4);
+ x2("‚ð(?:‚©‚«|‚«‚­)", "‚ð‚©‚«", 0, 6);
+ x2("‚ð(?:‚©‚«|‚«‚­)‚¯", "‚ð‚«‚­‚¯", 0, 8);
+ x2("‚ ‚¢|(?:‚ ‚¤|‚ ‚ð)", "‚ ‚ð", 0, 4);
+ x2("‚ |‚¢|‚¤", "‚¦‚¤", 2, 4);
+ x2("‚ |‚¢|‚¤‚¦|‚¨‚©‚«|‚­|‚¯‚±‚³|‚µ‚·‚¹|‚»|‚½‚¿|‚‚ĂƂȂÉ|‚Ê‚Ë", "‚µ‚·‚¹", 0, 6);
+ n("‚ |‚¢|‚¤‚¦|‚¨‚©‚«|‚­|‚¯‚±‚³|‚µ‚·‚¹|‚»|‚½‚¿|‚‚ĂƂȂÉ|‚Ê‚Ë", "‚·‚¹");
+ x2("‚ |^‚í", "‚Ô‚ ", 2, 4);
+ x2("‚ |^‚ð", "‚ð‚ ", 0, 2);
+ x2("‹S|\\GŽÔ", "‚¯ŽÔ‹S", 4, 6);
+ x2("‹S|\\GŽÔ", "ŽÔ‹S", 0, 2);
+ x2("‹S|\\AŽÔ", "bŽÔ‹S", 3, 5);
+ x2("‹S|\\AŽÔ", "ŽÔ", 0, 2);
+ x2("‹S|ŽÔ\\Z", "ŽÔ‹S", 2, 4);
+ x2("‹S|ŽÔ\\Z", "ŽÔ", 0, 2);
+ x2("‹S|ŽÔ\\Z", "ŽÔ\n", 0, 2);
+ x2("‹S|ŽÔ\\z", "ŽÔ‹S", 2, 4);
+ x2("‹S|ŽÔ\\z", "ŽÔ", 0, 2);
+ x2("\\w|\\s", "‚¨", 0, 2);
+ x2("\\w|%", "%‚¨", 0, 1);
+ x2("\\w|[&$]", "‚¤&", 0, 2);
+ x2("[‚¢-‚¯]", "‚¤", 0, 2);
+ x2("[‚¢-‚¯]|[^‚©-‚±]", "‚ ", 0, 2);
+ x2("[‚¢-‚¯]|[^‚©-‚±]", "‚©", 0, 2);
+ x2("[^‚ ]", "\n", 0, 1);
+ x2("(?:‚ |[‚¤-‚«])|‚¢‚ð", "‚¤‚ð", 0, 2);
+ x2("(?:‚ |[‚¤-‚«])|‚¢‚ð", "‚¢‚ð", 0, 4);
+ x2("‚ ‚¢‚¤|(?=‚¯‚¯)..‚Ù", "‚¯‚¯‚Ù", 0, 6);
+ x2("‚ ‚¢‚¤|(?!‚¯‚¯)..‚Ù", "‚ ‚¢‚Ù", 0, 6);
+ x2("(?=‚ð‚ )..‚ |(?=‚ð‚ð)..‚ ", "‚ð‚ð‚ ", 0, 6);
+ x2("(?<=‚ |‚¢‚¤)‚¢", "‚¢‚¤‚¢", 4, 6);
+ n("(?>‚ |‚ ‚¢‚¦)‚¤", "‚ ‚¢‚¦‚¤");
+ x2("(?>‚ ‚¢‚¦|‚ )‚¤", "‚ ‚¢‚¦‚¤", 0, 8);
+ x2("‚ ?|‚¢", "‚ ", 0, 2);
+ x2("‚ ?|‚¢", "‚¢", 0, 0);
+ x2("‚ ?|‚¢", "", 0, 0);
+ x2("‚ *|‚¢", "‚ ‚ ", 0, 4);
+ x2("‚ *|‚¢*", "‚¢‚ ", 0, 0);
+ x2("‚ *|‚¢*", "‚ ‚¢", 0, 2);
+ x2("[a‚ ]*|‚¢*", "a‚ ‚¢‚¢‚¢", 0, 3);
+ x2("‚ +|‚¢*", "", 0, 0);
+ x2("‚ +|‚¢*", "‚¢‚¢‚¢", 0, 6);
+ x2("‚ +|‚¢*", "‚ ‚¢‚¢‚¢", 0, 2);
+ x2("‚ +|‚¢*", "a‚ ‚¢‚¢‚¢", 0, 0);
+ n("‚ +|‚¢+", "");
+ x2("(‚ |‚¢)?", "‚¢", 0, 2);
+ x2("(‚ |‚¢)*", "‚¢‚ ", 0, 4);
+ x2("(‚ |‚¢)+", "‚¢‚ ‚¢", 0, 6);
+ x2("(‚ ‚¢|‚¤‚ )+", "‚¤‚ ‚ ‚¢‚¤‚¦", 0, 8);
+ x2("(‚ ‚¢|‚¤‚¦)+", "‚¤‚ ‚ ‚¢‚¤‚¦", 4, 12);
+ x2("(‚ ‚¢|‚¤‚ )+", "‚ ‚ ‚¢‚¤‚ ", 2, 10);
+ x2("(‚ ‚¢|‚¤‚ )+", "‚ ‚¢‚ð‚¤‚ ", 0, 4);
+ x2("(‚ ‚¢|‚¤‚ )+", "$$zzzz‚ ‚¢‚ð‚¤‚ ", 6, 10);
+ x2("(‚ |‚¢‚ ‚¢)+", "‚ ‚¢‚ ‚¢‚ ", 0, 10);
+ x2("(‚ |‚¢‚ ‚¢)+", "‚¢‚ ", 2, 4);
+ x2("(‚ |‚¢‚ ‚¢)+", "‚¢‚ ‚ ‚ ‚¢‚ ", 2, 8);
+ x2("(?:‚ |‚¢)(?:‚ |‚¢)", "‚ ‚¢", 0, 4);
+ x2("(?:‚ *|‚¢*)(?:‚ *|‚¢*)", "‚ ‚ ‚ ‚¢‚¢‚¢", 0, 6);
+ x2("(?:‚ *|‚¢*)(?:‚ +|‚¢+)", "‚ ‚ ‚ ‚¢‚¢‚¢", 0, 12);
+ x2("(?:‚ +|‚¢+){2}", "‚ ‚ ‚ ‚¢‚¢‚¢", 0, 12);
+ x2("(?:‚ +|‚¢+){1,2}", "‚ ‚ ‚ ‚¢‚¢‚¢", 0, 12);
+ x2("(?:‚ +|\\A‚¢*)‚¤‚¤", "‚¤‚¤", 0, 4);
+ n("(?:‚ +|\\A‚¢*)‚¤‚¤", "‚ ‚¢‚¤‚¤");
+ x2("(?:^‚ +|‚¢+)*‚¤", "‚ ‚ ‚¢‚¢‚¢‚ ‚¢‚¤", 12, 16);
+ x2("(?:^‚ +|‚¢+)*‚¤", "‚ ‚ ‚¢‚¢‚¢‚¢‚¤", 0, 14);
+ x2("‚¤{0,}", "‚¤‚¤‚¤‚¤", 0, 8);
+ x2("‚ |(?i)c", "C", 0, 1);
+ x2("(?i)c|‚ ", "C", 0, 1);
+ x2("(?i:‚ )|a", "a", 0, 1);
+ n("(?i:‚ )|a", "A");
+ x2("[‚ ‚¢‚¤]?", "‚ ‚¢‚¤", 0, 2);
+ x2("[‚ ‚¢‚¤]*", "‚ ‚¢‚¤", 0, 6);
+ x2("[^‚ ‚¢‚¤]*", "‚ ‚¢‚¤", 0, 0);
+ n("[^‚ ‚¢‚¤]+", "‚ ‚¢‚¤");
+ x2("‚ ?\?", "‚ ‚ ‚ ", 0, 0);
+ x2("‚¢‚ ?\?‚¢", "‚¢‚ ‚¢", 0, 6);
+ x2("‚ *?", "‚ ‚ ‚ ", 0, 0);
+ x2("‚¢‚ *?", "‚¢‚ ‚ ", 0, 2);
+ x2("‚¢‚ *?‚¢", "‚¢‚ ‚ ‚¢", 0, 8);
+ x2("‚ +?", "‚ ‚ ‚ ", 0, 2);
+ x2("‚¢‚ +?", "‚¢‚ ‚ ", 0, 4);
+ x2("‚¢‚ +?‚¢", "‚¢‚ ‚ ‚¢", 0, 8);
+ x2("(?:“V?)?\?", "“V", 0, 0);
+ x2("(?:“V?\?)?", "“V", 0, 0);
+ x2("(?:–²?)+?", "–²–²–²", 0, 2);
+ x2("(?:•—+)?\?", "•—•—•—", 0, 0);
+ x2("(?:á+)?\?‘š", "ááá‘š", 0, 8);
+ x2("(?:‚ ‚¢)?{2}", "", 0, 0);
+ x2("(?:‹SŽÔ)?{2}", "‹SŽÔ‹SŽÔ‹S", 0, 8);
+ x2("(?:‹SŽÔ)*{0}", "‹SŽÔ‹SŽÔ‹S", 0, 0);
+ x2("(?:‹SŽÔ){3,}", "‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ", 0, 16);
+ n("(?:‹SŽÔ){3,}", "‹SŽÔ‹SŽÔ");
+ x2("(?:‹SŽÔ){2,4}", "‹SŽÔ‹SŽÔ‹SŽÔ", 0, 12);
+ x2("(?:‹SŽÔ){2,4}", "‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ", 0, 16);
+ x2("(?:‹SŽÔ){2,4}?", "‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ‹SŽÔ", 0, 8);
+ x2("(?:‹SŽÔ){,}", "‹SŽÔ{,}", 0, 7);
+ x2("(?:‚©‚«‚­)+?{2}", "‚©‚«‚­‚©‚«‚­‚©‚«‚­", 0, 12);
+ x3("(‰Î)", "‰Î", 0, 2, 1);
+ x3("(‰Î…)", "‰Î…", 0, 4, 1);
+ x2("((ŽžŠÔ))", "ŽžŠÔ", 0, 4);
+ x3("((•—…))", "•—…", 0, 4, 1);
+ x3("((ð“ú))", "ð“ú", 0, 4, 2);
+ x3("((((((((((((((((((((—ÊŽq))))))))))))))))))))", "—ÊŽq", 0, 4, 20);
+ x3("(‚ ‚¢)(‚¤‚¦)", "‚ ‚¢‚¤‚¦", 0, 4, 1);
+ x3("(‚ ‚¢)(‚¤‚¦)", "‚ ‚¢‚¤‚¦", 4, 8, 2);
+ x3("()(‚ )‚¢‚¤(‚¦‚¨‚©)‚«‚­‚¯‚±", "‚ ‚¢‚¤‚¦‚¨‚©‚«‚­‚¯‚±", 6, 12, 3);
+ x3("(()(‚ )‚¢‚¤(‚¦‚¨‚©)‚«‚­‚¯‚±)", "‚ ‚¢‚¤‚¦‚¨‚©‚«‚­‚¯‚±", 6, 12, 4);
+ x3(".*(ƒtƒH)ƒ“Eƒ}(ƒ“()ƒVƒ…ƒ^)ƒCƒ“", "ƒtƒHƒ“Eƒ}ƒ“ƒVƒ…ƒ^ƒCƒ“", 10, 18, 2);
+ x2("(^‚ )", "‚ ", 0, 2);
+ x3("(‚ )|(‚ )", "‚¢‚ ", 2, 4, 1);
+ x3("(^‚ )|(‚ )", "‚¢‚ ", 2, 4, 2);
+ x3("(‚ ?)", "‚ ‚ ‚ ", 0, 2, 1);
+ x3("(‚Ü*)", "‚Ü‚Ü‚Ü", 0, 6, 1);
+ x3("(‚Æ*)", "", 0, 0, 1);
+ x3("(‚é+)", "‚é‚é‚é‚é‚é‚é‚é", 0, 14, 1);
+ x3("(‚Ó+|‚Ö*)", "‚Ó‚Ó‚Ó‚Ö‚Ö", 0, 6, 1);
+ x3("(‚ +|‚¢?)", "‚¢‚¢‚¢‚ ‚ ", 0, 2, 1);
+ x3("(‚ ‚¢‚¤)?", "‚ ‚¢‚¤", 0, 6, 1);
+ x3("(‚ ‚¢‚¤)*", "‚ ‚¢‚¤", 0, 6, 1);
+ x3("(‚ ‚¢‚¤)+", "‚ ‚¢‚¤", 0, 6, 1);
+ x3("(‚³‚µ‚·|‚ ‚¢‚¤)+", "‚ ‚¢‚¤", 0, 6, 1);
+ x3("([‚È‚É‚Ê][‚©‚«‚­]|‚©‚«‚­)+", "‚©‚«‚­", 0, 6, 1);
+ x3("((?i:‚ ‚¢‚¤))", "‚ ‚¢‚¤", 0, 6, 1);
+ x3("((?m:‚ .‚¤))", "‚ \n‚¤", 0, 5, 1);
+ x3("((?=‚ ‚ñ)‚ )", "‚ ‚ñ‚¢", 0, 2, 1);
+ x3("‚ ‚¢‚¤|(.‚ ‚¢‚¦)", "‚ñ‚ ‚¢‚¦", 0, 8, 1);
+ x3("‚ *(.)", "‚ ‚ ‚ ‚ ‚ñ", 8, 10, 1);
+ x3("‚ *?(.)", "‚ ‚ ‚ ‚ ‚ñ", 0, 2, 1);
+ x3("‚ *?(‚ñ)", "‚ ‚ ‚ ‚ ‚ñ", 8, 10, 1);
+ x3("[‚¢‚¤‚¦]‚ *(.)", "‚¦‚ ‚ ‚ ‚ ‚ñ", 10, 12, 1);
+ x3("(\\A‚¢‚¢)‚¤‚¤", "‚¢‚¢‚¤‚¤", 0, 4, 1);
+ n("(\\A‚¢‚¢)‚¤‚¤", "‚ñ‚¢‚¢‚¤‚¤");
+ x3("(^‚¢‚¢)‚¤‚¤", "‚¢‚¢‚¤‚¤", 0, 4, 1);
+ n("(^‚¢‚¢)‚¤‚¤", "‚ñ‚¢‚¢‚¤‚¤");
+ x3("‚ë‚ë(‚é‚é$)", "‚ë‚ë‚é‚é", 4, 8, 1);
+ n("‚ë‚ë(‚é‚é$)", "‚ë‚ë‚é‚é‚é");
+ x2("(–³)\\1", "–³–³", 0, 4);
+ n("(–³)\\1", "–³•");
+ x2("(‹ó?)\\1", "‹ó‹ó", 0, 4);
+ x2("(‹ó?\?)\\1", "‹ó‹ó", 0, 0);
+ x2("(‹ó*)\\1", "‹ó‹ó‹ó‹ó‹ó", 0, 8);
+ x3("(‹ó*)\\1", "‹ó‹ó‹ó‹ó‹ó", 0, 4, 1);
+ x2("‚ (‚¢*)\\1", "‚ ‚¢‚¢‚¢‚¢", 0, 10);
+ x2("‚ (‚¢*)\\1", "‚ ‚¢", 0, 2);
+ x2("(‚ *)(‚¢*)\\1\\2", "‚ ‚ ‚ ‚¢‚¢‚ ‚ ‚ ‚¢‚¢", 0, 20);
+ x2("(‚ *)(‚¢*)\\2", "‚ ‚ ‚ ‚¢‚¢‚¢‚¢", 0, 14);
+ x3("(‚ *)(‚¢*)\\2", "‚ ‚ ‚ ‚¢‚¢‚¢‚¢", 6, 10, 2);
+ x2("(((((((‚Û*)‚Ø))))))‚Ò\\7", "‚Û‚Û‚Û‚Ø‚Ò‚Û‚Û‚Û", 0, 16);
+ x3("(((((((‚Û*)‚Ø))))))‚Ò\\7", "‚Û‚Û‚Û‚Ø‚Ò‚Û‚Û‚Û", 0, 6, 7);
+ x2("(‚Í)(‚Ð)(‚Ó)\\2\\1\\3", "‚͂ЂӂЂ͂Ó", 0, 12);
+ x2("([‚«-‚¯])\\1", "‚­‚­", 0, 4);
+ x2("(\\w\\d\\s)\\1", "‚ 5 ‚ 5 ", 0, 8);
+ n("(\\w\\d\\s)\\1", "‚ 5 ‚ 5");
+ x2("(’NH|[‚ -‚¤]{3})\\1", "’NH’NH", 0, 8);
+ x2("...(’NH|[‚ -‚¤]{3})\\1", "‚ a‚ ’NH’NH", 0, 13);
+ x2("(’NH|[‚ -‚¤]{3})\\1", "‚¤‚¢‚¤‚¤‚¢‚¤", 0, 12);
+ x2("(^‚±)\\1", "‚±‚±", 0, 4);
+ n("(^‚Þ)\\1", "‚ß‚Þ‚Þ");
+ n("(‚ $)\\1", "‚ ‚ ");
+ n("(‚ ‚¢\\Z)\\1", "‚ ‚¢");
+ x2("(‚ *\\Z)\\1", "‚ ", 2, 2);
+ x2(".(‚ *\\Z)\\1", "‚¢‚ ", 2, 4);
+ x3("(.(‚â‚¢‚ä)\\2)", "z‚â‚¢‚ä‚â‚¢‚ä", 0, 13, 1);
+ x3("(.(..\\d.)\\2)", "‚ 12341234", 0, 10, 1);
+ x2("((?i:‚ v‚¸))\\1", "‚ v‚¸‚ v‚¸", 0, 10);
+ x2("(?<‹ð‚©>•Ï|\\(\\g<‹ð‚©>\\))", "((((((•Ï))))))", 0, 14);
+ x2("\\A(?:\\g<ˆ¢_1>|\\g<‰]_2>|\\zI—¹ (?<ˆ¢_1>ŠÏ|Ž©\\g<‰]_2>Ž©)(?<‰]_2>Ý|•ìŽF\\g<ˆ¢_1>•ìŽF))$", "•ìŽFŽ©•ìŽFŽ©ÝŽ©•ìŽFŽ©•ìŽF", 0, 26);
+ x2("[[‚ЂÓ]]", "‚Ó", 0, 2);
+ x2("[[‚¢‚¨‚¤]‚©]", "‚©", 0, 2);
+ n("[[^‚ ]]", "‚ ");
+ n("[^[‚ ]]", "‚ ");
+ x2("[^[^‚ ]]", "‚ ", 0, 2);
+ x2("[[‚©‚«‚­]&&‚«‚­]", "‚­", 0, 2);
+ n("[[‚©‚«‚­]&&‚«‚­]", "‚©");
+ n("[[‚©‚«‚­]&&‚«‚­]", "‚¯");
+ x2("[‚ -‚ñ&&‚¢-‚ð&&‚¤-‚ï]", "‚ï", 0, 2);
+ n("[^‚ -‚ñ&&‚¢-‚ð&&‚¤-‚ï]", "‚ï");
+ x2("[[^‚ &&‚ ]&&‚ -‚ñ]", "‚¢", 0, 2);
+ n("[[^‚ &&‚ ]&&‚ -‚ñ]", "‚ ");
+ x2("[[^‚ -‚ñ&&‚¢‚¤‚¦‚¨]&&[^‚¤-‚©]]", "‚«", 0, 2);
+ n("[[^‚ -‚ñ&&‚¢‚¤‚¦‚¨]&&[^‚¤-‚©]]", "‚¢");
+ x2("[^[^‚ ‚¢‚¤]&&[^‚¤‚¦‚¨]]", "‚¤", 0, 2);
+ x2("[^[^‚ ‚¢‚¤]&&[^‚¤‚¦‚¨]]", "‚¦", 0, 2);
+ n("[^[^‚ ‚¢‚¤]&&[^‚¤‚¦‚¨]]", "‚©");
+ x2("[‚ -&&-‚ ]", "-", 0, 1);
+ x2("[^[^a-z‚ ‚¢‚¤]&&[^bcdefg‚¤‚¦‚¨]q-w]", "‚¦", 0, 2);
+ x2("[^[^a-z‚ ‚¢‚¤]&&[^bcdefg‚¤‚¦‚¨]g-w]", "f", 0, 1);
+ x2("[^[^a-z‚ ‚¢‚¤]&&[^bcdefg‚¤‚¦‚¨]g-w]", "g", 0, 1);
+ n("[^[^a-z‚ ‚¢‚¤]&&[^bcdefg‚¤‚¦‚¨]g-w]", "2");
+ x2("a<b>ƒo[ƒWƒ‡ƒ“‚̃_ƒEƒ“ƒ[ƒh<\\/b>", "a<b>ƒo[ƒWƒ‡ƒ“‚̃_ƒEƒ“ƒ[ƒh</b>", 0, 32);
+ x2(".<b>ƒo[ƒWƒ‡ƒ“‚̃_ƒEƒ“ƒ[ƒh<\\/b>", "a<b>ƒo[ƒWƒ‡ƒ“‚̃_ƒEƒ“ƒ[ƒh</b>", 0, 32);
+ fprintf(stdout,
+ "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n",
+ nsucc, nfail, nerror, onig_version());
+
+#ifndef POSIX_TEST
+ onig_region_free(region, 1);
+ onig_end();
+#endif
+
+ return ((nfail == 0 && nerror == 0) ? 0 : -1);
+}