summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff-webhosting.net>2018-03-20 06:14:49 +0100
committerJörg Frings-Fürst <debian@jff-webhosting.net>2018-03-20 06:14:49 +0100
commit10abcf77cc24dfae451d96310b4391dad35906ed (patch)
treeb9812ad2b1b038fd121f1031e9ff87978af0f5ff /src
parent98ab313fe496ae7c792db29c80bf6b23347484ff (diff)
New upstream version 6.8.1upstream/6.8.1
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am15
-rw-r--r--src/Makefile.windows197
-rw-r--r--src/ascii.c67
-rw-r--r--src/big5.c5
-rw-r--r--src/config.h.cmake.in6
-rw-r--r--src/cp1251.c5
-rw-r--r--src/euc_jp.c5
-rw-r--r--src/euc_kr.c5
-rw-r--r--src/euc_tw.c5
-rw-r--r--src/gb18030.c5
-rw-r--r--src/iso8859_1.c5
-rw-r--r--src/iso8859_10.c5
-rw-r--r--src/iso8859_11.c5
-rw-r--r--src/iso8859_13.c5
-rw-r--r--src/iso8859_14.c5
-rw-r--r--src/iso8859_15.c5
-rw-r--r--src/iso8859_16.c5
-rw-r--r--src/iso8859_2.c5
-rw-r--r--src/iso8859_3.c5
-rw-r--r--src/iso8859_4.c5
-rw-r--r--src/iso8859_5.c5
-rw-r--r--src/iso8859_6.c5
-rw-r--r--src/iso8859_7.c5
-rw-r--r--src/iso8859_8.c5
-rw-r--r--src/iso8859_9.c5
-rw-r--r--src/koi8.c5
-rw-r--r--src/koi8_r.c5
-rw-r--r--src/onig_init.c4
-rw-r--r--src/oniggnu.h10
-rw-r--r--src/onigposix.h4
-rw-r--r--src/oniguruma.h325
-rw-r--r--src/regcomp.c1458
-rw-r--r--src/regenc.c100
-rw-r--r--src/regenc.h103
-rw-r--r--src/regerror.c20
-rw-r--r--src/regexec.c2553
-rw-r--r--src/reggnu.c8
-rw-r--r--src/regint.h293
-rw-r--r--src/regparse.c1592
-rw-r--r--src/regparse.h40
-rw-r--r--src/regposerr.c9
-rw-r--r--src/regposix.c13
-rw-r--r--src/regsyntax.c6
-rw-r--r--src/regversion.c5
-rw-r--r--src/sjis.c6
-rw-r--r--src/utf16_be.c55
-rw-r--r--src/utf16_le.c53
-rw-r--r--src/utf32_be.c6
-rw-r--r--src/utf32_le.c6
-rw-r--r--src/utf8.c6
50 files changed, 4915 insertions, 2160 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index be35b24..911aecd 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -4,13 +4,24 @@ libname = libonig.la
AM_CFLAGS = -Wall
AM_CPPFLAGS = -I$(top_srcdir) -I$(includedir)
-include_HEADERS = oniguruma.h oniggnu.h onigposix.h
+include_HEADERS = oniguruma.h oniggnu.h
+
+posix_headers = onigposix.h
+
+if ENABLE_POSIX_API
+posix_sources = regposix.c regposerr.c
+include_HEADERS += $(posix_headers)
+else
+posix_sources =
+endif
+
+
lib_LTLIBRARIES = $(libname)
libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \
regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c \
regenc.c regsyntax.c regtrav.c regversion.c st.c \
- regposix.c regposerr.c \
+ $(posix_sources) \
unicode.c \
unicode_unfold_key.c \
unicode_fold1_key.c \
diff --git a/src/Makefile.windows b/src/Makefile.windows
index 046345a..1ce8ce2 100644
--- a/src/Makefile.windows
+++ b/src/Makefile.windows
@@ -15,8 +15,7 @@ LINKFLAGS = -link -incremental:no -pdb:none
INSTALL = install -c
CP = copy
CC = cl
-DEFS = -DHAVE_CONFIG_H -DNOT_RUBY -DEXPORT
-RUBYDIR = ..
+DEFS = -DHAVE_CONFIG_H
subdirs =
@@ -25,44 +24,43 @@ libname = $(libbase)_s.lib
dllname = $(libbase).dll
dlllib = $(libbase).lib
-onigheaders = oniguruma.h regint.h regparse.h regenc.h st.h
-posixheaders = onigposix.h
+onigheaders = $(ONIG_DIR)/oniguruma.h $(ONIG_DIR)/regint.h $(ONIG_DIR)/regparse.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/st.h
+posixheaders = $(ONIG_DIR)/onigposix.h
headers = $(posixheaders) $(onigheaders)
-onigobjs = reggnu.obj regerror.obj regparse.obj regext.obj regcomp.obj \
- regexec.obj regenc.obj regsyntax.obj regtrav.obj \
- regversion.obj st.obj onig_init.obj
-posixobjs = regposix.obj regposerr.obj
+onigobjs = $(BUILD_DIR)/reggnu.obj $(BUILD_DIR)/regerror.obj $(BUILD_DIR)/regparse.obj $(BUILD_DIR)/regext.obj $(BUILD_DIR)/regcomp.obj \
+ $(BUILD_DIR)/regexec.obj $(BUILD_DIR)/regenc.obj $(BUILD_DIR)/regsyntax.obj $(BUILD_DIR)/regtrav.obj \
+ $(BUILD_DIR)/regversion.obj $(BUILD_DIR)/st.obj $(BUILD_DIR)/onig_init.obj
+posixobjs = $(BUILD_DIR)/regposix.obj $(BUILD_DIR)/regposerr.obj
libobjs = $(onigobjs) $(posixobjs)
-jp_objs = euc_jp.obj sjis.obj
-iso8859_objs = iso8859_1.obj iso8859_2.obj \
- iso8859_3.obj iso8859_4.obj \
- iso8859_5.obj iso8859_6.obj \
- iso8859_7.obj iso8859_8.obj \
- iso8859_9.obj iso8859_10.obj \
- iso8859_11.obj iso8859_13.obj \
- iso8859_14.obj iso8859_15.obj \
- iso8859_16.obj
-
-encobjs = ascii.obj utf8.obj \
- unicode.obj \
- utf16_be.obj utf16_le.obj \
- utf32_be.obj utf32_le.obj \
+jp_objs = $(BUILD_DIR)/euc_jp.obj $(BUILD_DIR)/sjis.obj
+iso8859_objs = $(BUILD_DIR)/iso8859_1.obj $(BUILD_DIR)/iso8859_2.obj \
+ $(BUILD_DIR)/iso8859_3.obj $(BUILD_DIR)/iso8859_4.obj \
+ $(BUILD_DIR)/iso8859_5.obj $(BUILD_DIR)/iso8859_6.obj \
+ $(BUILD_DIR)/iso8859_7.obj $(BUILD_DIR)/iso8859_8.obj \
+ $(BUILD_DIR)/iso8859_9.obj $(BUILD_DIR)/iso8859_10.obj \
+ $(BUILD_DIR)/iso8859_11.obj $(BUILD_DIR)/iso8859_13.obj \
+ $(BUILD_DIR)/iso8859_14.obj $(BUILD_DIR)/iso8859_15.obj \
+ $(BUILD_DIR)/iso8859_16.obj
+
+encobjs = $(BUILD_DIR)/ascii.obj $(BUILD_DIR)/utf8.obj \
+ $(BUILD_DIR)/unicode.obj \
+ $(BUILD_DIR)/utf16_be.obj $(BUILD_DIR)/utf16_le.obj \
+ $(BUILD_DIR)/utf32_be.obj $(BUILD_DIR)/utf32_le.obj \
$(jp_objs) $(iso8859_objs) \
- euc_tw.obj euc_kr.obj big5.obj \
- gb18030.obj \
- koi8_r.obj \
- cp1251.obj \
- euc_jp_prop.obj sjis_prop.obj \
- unicode_unfold_key.obj unicode_fold1_key.obj \
- unicode_fold2_key.obj unicode_fold3_key.obj # koi8.obj
-
-onigsources = regerror.c regparse.c regext.c regcomp.c regexec.c regenc.c \
- regsyntax.c regtrav.c regversion.c reggnu.c st.c
-posixsources = regposix.c regposerr.c
+ $(BUILD_DIR)/euc_tw.obj $(BUILD_DIR)/euc_kr.obj $(BUILD_DIR)/big5.obj \
+ $(BUILD_DIR)/gb18030.obj \
+ $(BUILD_DIR)/koi8_r.obj \
+ $(BUILD_DIR)/cp1251.obj \
+ $(BUILD_DIR)/euc_jp_prop.obj $(BUILD_DIR)/sjis_prop.obj \
+ $(BUILD_DIR)/unicode_unfold_key.obj $(BUILD_DIR)/unicode_fold1_key.obj \
+ $(BUILD_DIR)/unicode_fold2_key.obj $(BUILD_DIR)/unicode_fold3_key.obj # $(BUILD_DIR)/koi8.obj
+
+onigsources = $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regparse.c $(ONIG_DIR)/regext.c $(ONIG_DIR)/regcomp.c $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regenc.c \
+ $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regtrav.c $(ONIG_DIR)/regversion.c $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/st.c
+posixsources = $(ONIG_DIR)/regposix.c $(ONIG_DIR)/regposerr.c
libsources = $(posixsources) $(onigsources)
-rubysources = $(onigsources)
patchfiles = re.c.168.patch re.c.181.patch
distfiles = README COPYING HISTORY \
@@ -77,7 +75,7 @@ makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)'
.SUFFIXES:
.SUFFIXES: .obj .c .h .ps .dvi .info .texinfo
-.c.obj:
+{$(ONIG_DIR)}.c{$(BUILD_DIR)}.obj:
$(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /I.. /Fo$@ /c $<
# targets
@@ -96,58 +94,58 @@ $(libname): $(libobjs) $(encobjs)
$(dllname): $(libobjs) $(encobjs)
$(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS)
-regparse.obj: regparse.c $(onigheaders) config.h st.h
-regext.obj: regext.c $(onigheaders) config.h
-regtrav.obj: regtrav.c $(onigheaders) config.h
-regcomp.obj: regcomp.c $(onigheaders) config.h
-regexec.obj: regexec.c regint.h regenc.h oniguruma.h config.h
-reggnu.obj: reggnu.c regint.h regenc.h oniguruma.h config.h oniggnu.h
-regerror.obj: regerror.c regint.h regenc.h oniguruma.h config.h
-regenc.obj: regenc.c regenc.h oniguruma.h config.h
-regsyntax.obj: regsyntax.c regint.h regenc.h oniguruma.h config.h
-regversion.obj: regversion.c oniguruma.h config.h
-regposix.obj: regposix.c $(posixheaders) oniguruma.h config.h
-regposerr.obj: regposerr.c $(posixheaders) config.h
-st.obj: st.c regint.h oniguruma.h config.h st.h
-onig_init.obj: onig_init.c oniguruma.h
-
-ascii.obj: ascii.c regenc.h config.h
-unicode.obj: unicode.c unicode_fold_data.c unicode_property_data.c regenc.h config.h
-utf8.obj: utf8.c regenc.h config.h
-utf16_be.obj: utf16_be.c regenc.h config.h
-utf16_le.obj: utf16_le.c regenc.h config.h
-utf32_be.obj: utf32_be.c regenc.h config.h
-utf32_le.obj: utf32_le.c regenc.h config.h
-euc_jp.obj: euc_jp.c regenc.h config.h
-euc_tw.obj: euc_tw.c regenc.h config.h
-euc_kr.obj: euc_kr.c regenc.h config.h
-sjis.obj: sjis.c regenc.h config.h
-iso8859_1.obj: iso8859_1.c regenc.h config.h
-iso8859_2.obj: iso8859_2.c regenc.h config.h
-iso8859_3.obj: iso8859_3.c regenc.h config.h
-iso8859_4.obj: iso8859_4.c regenc.h config.h
-iso8859_5.obj: iso8859_5.c regenc.h config.h
-iso8859_6.obj: iso8859_6.c regenc.h config.h
-iso8859_7.obj: iso8859_7.c regenc.h config.h
-iso8859_8.obj: iso8859_8.c regenc.h config.h
-iso8859_9.obj: iso8859_9.c regenc.h config.h
-iso8859_10.obj: iso8859_10.c regenc.h config.h
-iso8859_11.obj: iso8859_11.c regenc.h config.h
-iso8859_13.obj: iso8859_13.c regenc.h config.h
-iso8859_14.obj: iso8859_14.c regenc.h config.h
-iso8859_15.obj: iso8859_15.c regenc.h config.h
-iso8859_16.obj: iso8859_16.c regenc.h config.h
-koi8.obj: koi8.c regenc.h config.h
-koi8_r.obj: koi8_r.c regenc.h config.h
-cp1251.obj: cp1251.c regenc.h config.h
-big5.obj: big5.c regenc.h config.h
-gb18030.obj: gb18030.c regenc.h config.h
-euc_jp_prop.obj: euc_jp_prop.c regenc.h
-sjis_prop.obj: sjis_prop.c regenc.h
-unicode_unfold_key.obj: unicode_unfold_key.c regenc.h config.h
-unicode_fold1_key.obj: unicode_fold1_key.c regenc.h config.h
-unicode_fold2_key.obj: unicode_fold2_key.c regenc.h config.h
-unicode_fold3_key.obj: unicode_fold3_key.c regenc.h config.h
+$(BUILD_DIR)/regparse.obj: $(ONIG_DIR)/regparse.c $(onigheaders) $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regext.obj: $(ONIG_DIR)/regext.c $(onigheaders) $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regtrav.obj: $(ONIG_DIR)/regtrav.c $(onigheaders) $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regcomp.obj: $(ONIG_DIR)/regcomp.c $(onigheaders) $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regexec.obj: $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/reggnu.obj: $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/oniggnu.h
+$(BUILD_DIR)/regerror.obj: $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regenc.obj: $(ONIG_DIR)/regenc.c $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regsyntax.obj: $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regversion.obj: $(ONIG_DIR)/regversion.c $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regposix.obj: $(ONIG_DIR)/regposix.c $(posixheaders) $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/regposerr.obj: $(ONIG_DIR)/regposerr.c $(posixheaders) $(BUILD_DIR)/config.h
+$(BUILD_DIR)/st.obj: $(ONIG_DIR)/st.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/st.h
+$(BUILD_DIR)/onig_init.obj: $(ONIG_DIR)/onig_init.c $(ONIG_DIR)/oniguruma.h
+
+$(BUILD_DIR)/ascii.obj: $(ONIG_DIR)/ascii.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/unicode.obj: $(ONIG_DIR)/unicode.c $(ONIG_DIR)/unicode_fold_data.c $(ONIG_DIR)/unicode_property_data.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/utf8.obj: $(ONIG_DIR)/utf8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/utf16_be.obj: $(ONIG_DIR)/utf16_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/utf16_le.obj: $(ONIG_DIR)/utf16_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/utf32_be.obj: $(ONIG_DIR)/utf32_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/utf32_le.obj: $(ONIG_DIR)/utf32_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/euc_jp.obj: $(ONIG_DIR)/euc_jp.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/euc_tw.obj: $(ONIG_DIR)/euc_tw.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/euc_kr.obj: $(ONIG_DIR)/euc_kr.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/sjis.obj: $(ONIG_DIR)/sjis.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_1.obj: $(ONIG_DIR)/iso8859_1.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_2.obj: $(ONIG_DIR)/iso8859_2.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_3.obj: $(ONIG_DIR)/iso8859_3.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_4.obj: $(ONIG_DIR)/iso8859_4.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_5.obj: $(ONIG_DIR)/iso8859_5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_6.obj: $(ONIG_DIR)/iso8859_6.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_7.obj: $(ONIG_DIR)/iso8859_7.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_8.obj: $(ONIG_DIR)/iso8859_8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_9.obj: $(ONIG_DIR)/iso8859_9.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_10.obj: $(ONIG_DIR)/iso8859_10.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_11.obj: $(ONIG_DIR)/iso8859_11.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_13.obj: $(ONIG_DIR)/iso8859_13.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_14.obj: $(ONIG_DIR)/iso8859_14.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_15.obj: $(ONIG_DIR)/iso8859_15.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/iso8859_16.obj: $(ONIG_DIR)/iso8859_16.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/koi8.obj: $(ONIG_DIR)/koi8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/koi8_r.obj: $(ONIG_DIR)/koi8_r.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/cp1251.obj: $(ONIG_DIR)/cp1251.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/big5.obj: $(ONIG_DIR)/big5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/gb18030.obj: $(ONIG_DIR)/gb18030.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/euc_jp_prop.obj: $(ONIG_DIR)/euc_jp_prop.c $(ONIG_DIR)/regenc.h
+$(BUILD_DIR)/sjis_prop.obj: $(ONIG_DIR)/sjis_prop.c $(ONIG_DIR)/regenc.h
+$(BUILD_DIR)/unicode_unfold_key.obj: $(ONIG_DIR)/unicode_unfold_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/unicode_fold1_key.obj: $(ONIG_DIR)/unicode_fold1_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/unicode_fold2_key.obj: $(ONIG_DIR)/unicode_fold2_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
+$(BUILD_DIR)/unicode_fold3_key.obj: $(ONIG_DIR)/unicode_fold3_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
# C library test
ctest: $(testc)
@@ -158,26 +156,23 @@ ptest: $(testp)
.\$(testp)
$(testc): $(testc).c $(libname)
- $(CC) -nologo /Fe:$(testc) -DONIG_EXTERN=extern $(testc).c $(libname)
+ $(CC) -nologo -o $(testc) -DONIG_EXTERN=extern $(testc).c $(libname)
$(testp): $(testc).c $(dlllib)
- $(CC) -nologo -DPOSIX_TEST /Fe:$(testp) $(testc).c $(dlllib)
-
-#$(testc)u.c: test.rb testconvu.rb
-# ruby -Ke testconvu.rb test.rb > $@
+ $(CC) -nologo -DPOSIX_TEST -o $(testp) $(testc).c $(dlllib)
$(testc)u: $(testc)u.c $(libname)
- $(CC) -nologo /Fe:$(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname)
+ $(CC) -nologo -o $(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname)
clean:
- del *.obj *.lib *.exp *.dll $(testp).exe $(testc).exe $(testc).obj
+ del $(BUILD_DIR)\*.obj $(BUILD_DIR)\*.lib $(BUILD_DIR)\*.exp $(BUILD_DIR)\*.dll $(BUILD_DIR)\$(testp).exe $(BUILD_DIR)\$(testc).exe $(BUILD_DIR)\$(testc).obj
samples: all
- $(CC) $(CFLAGS) -I. /Fe:simple sample\simple.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:posix sample\posix.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:names sample\names.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:listcap sample\listcap.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:sql sample\sql.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:encode sample\encode.c $(dlllib)
- $(CC) $(CFLAGS) -I. /Fe:syntax sample\syntax.c $(dlllib)
+ $(CC) $(CFLAGS) -I. -o simple $(ONIG_DIR)\sample\simple.c $(dlllib)
+ $(CC) $(CFLAGS) -I. -o posix $(ONIG_DIR)\sample\posix.c $(dlllib)
+ $(CC) $(CFLAGS) -I. -o names $(ONIG_DIR)\sample\names.c $(dlllib)
+ $(CC) $(CFLAGS) -I. -o listcap $(ONIG_DIR)\sample\listcap.c $(dlllib)
+ $(CC) $(CFLAGS) -I. -o sql $(ONIG_DIR)\sample\sql.c $(dlllib)
+ $(CC) $(CFLAGS) -I. -o encode $(ONIG_DIR)\sample\encode.c $(dlllib)
+ $(CC) $(CFLAGS) -I. -o syntax $(ONIG_DIR)\sample\syntax.c $(dlllib) \ No newline at end of file
diff --git a/src/ascii.c b/src/ascii.c
index b21878d..7efaa26 100644
--- a/src/ascii.c
+++ b/src/ascii.c
@@ -2,7 +2,7 @@
ascii.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,7 +27,59 @@
* SUCH DAMAGE.
*/
-#include "regenc.h"
+#include "regint.h" /* for USE_CALLOUT */
+
+static int
+init(void)
+{
+#ifdef USE_CALLOUT
+
+ int id;
+ OnigEncoding enc;
+ char* name;
+ unsigned int t_long;
+ unsigned int args[4];
+ OnigValue opts[4];
+
+ enc = ONIG_ENCODING_ASCII;
+ t_long = ONIG_TYPE_LONG;
+
+ name = "FAIL"; BC0_P(name, fail);
+ name = "MISMATCH"; BC0_P(name, mismatch);
+ name = "MAX"; BC_B(name, max, 1, &t_long);
+
+ name = "ERROR";
+ args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT;
+ BC_P_O(name, error, 1, args, 1, opts);
+
+ name = "COUNT";
+ args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';
+ BC_B_O(name, count, 1, args, 1, opts);
+
+ name = "TOTAL_COUNT";
+ args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';
+ BC_B_O(name, total_count, 1, args, 1, opts);
+
+ name = "CMP";
+ args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
+ args[1] = ONIG_TYPE_STRING;
+ args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
+ BC_P(name, cmp, 3, args);
+
+#endif /* USE_CALLOUT */
+
+ return ONIG_NORMAL;
+}
+
+#if 0
+static int
+is_initialized(void)
+{
+ /* Don't use this function */
+ /* can't answer, because builtin callout entries removed in onig_end() */
+ return 0;
+}
+#endif
static int
ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype)
@@ -41,8 +93,8 @@ ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype)
OnigEncodingType OnigEncodingASCII = {
onigenc_single_byte_mbc_enc_len,
"US-ASCII", /* name */
- 1, /* max byte length */
- 1, /* min byte length */
+ 1, /* max enc length */
+ 1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
@@ -55,7 +107,8 @@ OnigEncodingType OnigEncodingASCII = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
- NULL, /* init */
- NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ init,
+ 0, /* is_initialized */
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/big5.c b/src/big5.c
index bc713ab..ff0c51b 100644
--- a/src/big5.c
+++ b/src/big5.c
@@ -2,7 +2,7 @@
big5.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -186,5 +186,6 @@ OnigEncodingType OnigEncodingBIG5 = {
big5_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- is_valid_mbc_string
+ is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/config.h.cmake.in b/src/config.h.cmake.in
index e13fad1..b59cc8d 100644
--- a/src/config.h.cmake.in
+++ b/src/config.h.cmake.in
@@ -31,6 +31,9 @@
/* Define to 1 if you have the <string.h> header file. */
#cmakedefine HAVE_STRING_H ${HAVE_STRING_H}
+/* Define to 1 if you have the <limits.h> header file. */
+#cmakedefine HAVE_LIMITS_H ${HAVE_LIMITS_H}
+
/* Define to 1 if you have the <sys/times.h> header file. */
#cmakedefine HAVE_SYS_TIMES_H ${HAVE_SYS_TIMES_H}
@@ -64,9 +67,6 @@
/* Define to 1 if you have the ANSI C header files. */
#cmakedefine STDC_HEADERS ${STDC_HEADERS}
-/* Define if combination explosion check */
-#cmakedefine USE_COMBINATION_EXPLOSION_CHECK ${USE_COMBINATION_EXPLOSION_CHECK}
-
/* Define if enable CR+NL as line terminator */
#cmakedefine USE_CRNL_AS_LINE_TERMINATOR ${USE_CRNL_AS_LINE_TERMINATOR}
diff --git a/src/cp1251.c b/src/cp1251.c
index 4d655bb..f7b43c3 100644
--- a/src/cp1251.c
+++ b/src/cp1251.c
@@ -2,7 +2,7 @@
cp1251.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2006-2016 Byte <byte AT mail DOT kna DOT ru>
+ * Copyright (c) 2006-2018 Byte <byte AT mail DOT kna DOT ru>
* K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
@@ -199,5 +199,6 @@ OnigEncodingType OnigEncodingCP1251 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/euc_jp.c b/src/euc_jp.c
index 42c3bce..8dd6ac1 100644
--- a/src/euc_jp.c
+++ b/src/euc_jp.c
@@ -2,7 +2,7 @@
euc_jp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -306,5 +306,6 @@ OnigEncodingType OnigEncodingEUC_JP = {
is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- is_valid_mbc_string
+ is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/euc_kr.c b/src/euc_kr.c
index 450caf1..08bfa1c 100644
--- a/src/euc_kr.c
+++ b/src/euc_kr.c
@@ -2,7 +2,7 @@
euc_kr.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -184,5 +184,6 @@ OnigEncodingType OnigEncodingEUC_CN = {
euckr_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- is_valid_mbc_string
+ is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/euc_tw.c b/src/euc_tw.c
index b3ee628..dbf0eac 100644
--- a/src/euc_tw.c
+++ b/src/euc_tw.c
@@ -2,7 +2,7 @@
euc_tw.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -167,5 +167,6 @@ OnigEncodingType OnigEncodingEUC_TW = {
euctw_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- is_valid_mbc_string
+ is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/gb18030.c b/src/gb18030.c
index c8b5865..073c83b 100644
--- a/src/gb18030.c
+++ b/src/gb18030.c
@@ -2,7 +2,7 @@
gb18030.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2005-2016 KUBO Takehiro <kubo AT jiubao DOT org>
+ * Copyright (c) 2005-2018 KUBO Takehiro <kubo AT jiubao DOT org>
* K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
@@ -534,5 +534,6 @@ OnigEncodingType OnigEncodingGB18030 = {
gb18030_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- is_valid_mbc_string
+ is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/iso8859_1.c b/src/iso8859_1.c
index 573931f..bcd7e26 100644
--- a/src/iso8859_1.c
+++ b/src/iso8859_1.c
@@ -2,7 +2,7 @@
iso8859_1.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -271,5 +271,6 @@ OnigEncodingType OnigEncodingISO_8859_1 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/iso8859_10.c b/src/iso8859_10.c
index 91b18d4..a5946cc 100644
--- a/src/iso8859_10.c
+++ b/src/iso8859_10.c
@@ -2,7 +2,7 @@
iso8859_10.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -238,5 +238,6 @@ OnigEncodingType OnigEncodingISO_8859_10 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/iso8859_11.c b/src/iso8859_11.c
index 518be25..ec94fd1 100644
--- a/src/iso8859_11.c
+++ b/src/iso8859_11.c
@@ -2,7 +2,7 @@
iso8859_11.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -95,5 +95,6 @@ OnigEncodingType OnigEncodingISO_8859_11 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/iso8859_13.c b/src/iso8859_13.c
index d1f39a2..fba7fd4 100644
--- a/src/iso8859_13.c
+++ b/src/iso8859_13.c
@@ -2,7 +2,7 @@
iso8859_13.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -227,5 +227,6 @@ OnigEncodingType OnigEncodingISO_8859_13 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/iso8859_14.c b/src/iso8859_14.c
index 3361b0d..e1f71f5 100644
--- a/src/iso8859_14.c
+++ b/src/iso8859_14.c
@@ -2,7 +2,7 @@
iso8859_14.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -240,5 +240,6 @@ OnigEncodingType OnigEncodingISO_8859_14 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/iso8859_15.c b/src/iso8859_15.c
index b09e876..236e9e7 100644
--- a/src/iso8859_15.c
+++ b/src/iso8859_15.c
@@ -2,7 +2,7 @@
iso8859_15.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -234,5 +234,6 @@ OnigEncodingType OnigEncodingISO_8859_15 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/iso8859_16.c b/src/iso8859_16.c
index 29a350d..42045bd 100644
--- a/src/iso8859_16.c
+++ b/src/iso8859_16.c
@@ -2,7 +2,7 @@
iso8859_16.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -236,5 +236,6 @@ OnigEncodingType OnigEncodingISO_8859_16 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/iso8859_2.c b/src/iso8859_2.c
index 9eb3536..db93046 100644
--- a/src/iso8859_2.c
+++ b/src/iso8859_2.c
@@ -2,7 +2,7 @@
iso8859_2.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -234,5 +234,6 @@ OnigEncodingType OnigEncodingISO_8859_2 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/iso8859_3.c b/src/iso8859_3.c
index 862823a..6fe5e6f 100644
--- a/src/iso8859_3.c
+++ b/src/iso8859_3.c
@@ -2,7 +2,7 @@
iso8859_3.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -234,5 +234,6 @@ OnigEncodingType OnigEncodingISO_8859_3 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/iso8859_4.c b/src/iso8859_4.c
index db706da..ee1eb93 100644
--- a/src/iso8859_4.c
+++ b/src/iso8859_4.c
@@ -2,7 +2,7 @@
iso8859_4.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -236,5 +236,6 @@ OnigEncodingType OnigEncodingISO_8859_4 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/iso8859_5.c b/src/iso8859_5.c
index 0e03e9c..7d828e1 100644
--- a/src/iso8859_5.c
+++ b/src/iso8859_5.c
@@ -2,7 +2,7 @@
iso8859_5.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -225,5 +225,6 @@ OnigEncodingType OnigEncodingISO_8859_5 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/iso8859_6.c b/src/iso8859_6.c
index 6289af5..a959e98 100644
--- a/src/iso8859_6.c
+++ b/src/iso8859_6.c
@@ -2,7 +2,7 @@
iso8859_6.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -95,5 +95,6 @@ OnigEncodingType OnigEncodingISO_8859_6 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/iso8859_7.c b/src/iso8859_7.c
index 75b520f..e695523 100644
--- a/src/iso8859_7.c
+++ b/src/iso8859_7.c
@@ -2,7 +2,7 @@
iso8859_7.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -221,5 +221,6 @@ OnigEncodingType OnigEncodingISO_8859_7 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/iso8859_8.c b/src/iso8859_8.c
index 5f18345..66b63b8 100644
--- a/src/iso8859_8.c
+++ b/src/iso8859_8.c
@@ -2,7 +2,7 @@
iso8859_8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -95,5 +95,6 @@ OnigEncodingType OnigEncodingISO_8859_8 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/iso8859_9.c b/src/iso8859_9.c
index d0c06bb..d780293 100644
--- a/src/iso8859_9.c
+++ b/src/iso8859_9.c
@@ -2,7 +2,7 @@
iso8859_9.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -227,5 +227,6 @@ OnigEncodingType OnigEncodingISO_8859_9 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/koi8.c b/src/koi8.c
index 80f89e9..8531825 100644
--- a/src/koi8.c
+++ b/src/koi8.c
@@ -2,7 +2,7 @@
koi8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -249,5 +249,6 @@ OnigEncodingType OnigEncodingKOI8 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/koi8_r.c b/src/koi8_r.c
index f8ef34f..e88cfe3 100644
--- a/src/koi8_r.c
+++ b/src/koi8_r.c
@@ -2,7 +2,7 @@
koi8_r.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -211,5 +211,6 @@ OnigEncodingType OnigEncodingKOI8_R = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string
+ onigenc_always_true_is_valid_mbc_string,
+ 0, 0, 0
};
diff --git a/src/onig_init.c b/src/onig_init.c
index 9f53568..7ad98b7 100644
--- a/src/onig_init.c
+++ b/src/onig_init.c
@@ -2,7 +2,7 @@
onig_init.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2016-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*/
-#include "oniguruma.h"
+#include "regint.h"
/* onig_init(): deprecated function */
extern int
diff --git a/src/oniggnu.h b/src/oniggnu.h
index 3da9f23..d688883 100644
--- a/src/oniggnu.h
+++ b/src/oniggnu.h
@@ -35,10 +35,12 @@
extern "C" {
#endif
-#define RE_MBCTYPE_ASCII 0
-#define RE_MBCTYPE_EUC 1
-#define RE_MBCTYPE_SJIS 2
-#define RE_MBCTYPE_UTF8 3
+enum {
+ RE_MBCTYPE_ASCII = 0,
+ RE_MBCTYPE_EUC = 1,
+ RE_MBCTYPE_SJIS = 2,
+ RE_MBCTYPE_UTF8 = 3
+};
/* GNU regex options */
#ifndef RE_NREGS
diff --git a/src/onigposix.h b/src/onigposix.h
index 22211e4..da0f919 100644
--- a/src/onigposix.h
+++ b/src/onigposix.h
@@ -4,7 +4,7 @@
onigposix.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -97,7 +97,7 @@ typedef struct {
#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__GNUC__)
-#if defined(EXPORT)
+#if defined(ONIGURUMA_EXPORT)
#define ONIG_EXTERN extern __declspec(dllexport)
#else
#define ONIG_EXTERN extern __declspec(dllimport)
diff --git a/src/oniguruma.h b/src/oniguruma.h
index 5ad4469..349c00e 100644
--- a/src/oniguruma.h
+++ b/src/oniguruma.h
@@ -4,7 +4,7 @@
oniguruma.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,30 +35,10 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 6
-#define ONIGURUMA_VERSION_MINOR 7
-#define ONIGURUMA_VERSION_TEENY 0
+#define ONIGURUMA_VERSION_MINOR 8
+#define ONIGURUMA_VERSION_TEENY 1
-#ifdef __cplusplus
-# ifndef HAVE_PROTOTYPES
-# define HAVE_PROTOTYPES 1
-# endif
-# ifndef HAVE_STDARG_PROTOTYPES
-# define HAVE_STDARG_PROTOTYPES 1
-# endif
-#endif
-
-/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
-#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
-# ifndef HAVE_STDARG_PROTOTYPES
-# define HAVE_STDARG_PROTOTYPES 1
-# endif
-#endif
-
-#ifdef HAVE_STDARG_H
-# ifndef HAVE_STDARG_PROTOTYPES
-# define HAVE_STDARG_PROTOTYPES 1
-# endif
-#endif
+#define ONIGURUMA_VERSION_INT 60801
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
@@ -69,16 +49,12 @@ extern "C" {
#endif
#ifndef PV_
-#ifdef HAVE_STDARG_PROTOTYPES
# define PV_(args) args
-#else
-# define PV_(args) ()
-#endif
#endif
#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__GNUC__)
-#if defined(EXPORT) || defined(RUBY_EXPORT)
+#if defined(ONIGURUMA_EXPORT)
#define ONIG_EXTERN extern __declspec(dllexport)
#else
#define ONIG_EXTERN extern __declspec(dllimport)
@@ -96,10 +72,6 @@ extern "C" {
#define UChar OnigUChar
#endif
-#ifdef _WIN32
-#include <windows.h>
-#endif
-
typedef unsigned int OnigCodePoint;
typedef unsigned char OnigUChar;
typedef unsigned int OnigCtype;
@@ -166,6 +138,9 @@ typedef struct OnigEncodingTypeST {
int (*init)(void);
int (*is_initialized)(void);
int (*is_valid_mbc_string)(const OnigUChar* s, const OnigUChar* end);
+ unsigned int flag;
+ OnigCodePoint sb_range;
+ int index;
} OnigEncodingType;
typedef OnigEncodingType* OnigEncoding;
@@ -243,21 +218,24 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
/* 18: 6(max-byte) * 3(case-fold chars) */
/* character types */
-#define ONIGENC_CTYPE_NEWLINE 0
-#define ONIGENC_CTYPE_ALPHA 1
-#define ONIGENC_CTYPE_BLANK 2
-#define ONIGENC_CTYPE_CNTRL 3
-#define ONIGENC_CTYPE_DIGIT 4
-#define ONIGENC_CTYPE_GRAPH 5
-#define ONIGENC_CTYPE_LOWER 6
-#define ONIGENC_CTYPE_PRINT 7
-#define ONIGENC_CTYPE_PUNCT 8
-#define ONIGENC_CTYPE_SPACE 9
-#define ONIGENC_CTYPE_UPPER 10
-#define ONIGENC_CTYPE_XDIGIT 11
-#define ONIGENC_CTYPE_WORD 12
-#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */
-#define ONIGENC_CTYPE_ASCII 14
+typedef enum {
+ ONIGENC_CTYPE_NEWLINE = 0,
+ ONIGENC_CTYPE_ALPHA = 1,
+ ONIGENC_CTYPE_BLANK = 2,
+ ONIGENC_CTYPE_CNTRL = 3,
+ ONIGENC_CTYPE_DIGIT = 4,
+ ONIGENC_CTYPE_GRAPH = 5,
+ ONIGENC_CTYPE_LOWER = 6,
+ ONIGENC_CTYPE_PRINT = 7,
+ ONIGENC_CTYPE_PUNCT = 8,
+ ONIGENC_CTYPE_SPACE = 9,
+ ONIGENC_CTYPE_UPPER = 10,
+ ONIGENC_CTYPE_XDIGIT = 11,
+ ONIGENC_CTYPE_WORD = 12,
+ ONIGENC_CTYPE_ALNUM = 13, /* alpha || digit */
+ ONIGENC_CTYPE_ASCII = 14
+} OnigEncCtype;
+
#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
@@ -365,7 +343,8 @@ ONIG_EXTERN
int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
ONIG_EXTERN
int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end));
-
+ONIG_EXTERN
+UChar* onigenc_strdup P_((OnigEncoding enc, const UChar* s, const UChar* end));
/* PART: regular expression */
@@ -513,6 +492,8 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP (1U<<25) /* (?~...) */
#define ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER (1U<<26) /* \X \y \Y */
#define ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL (1U<<27) /* (?R), (?&name)... */
+#define ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS (1U<<28) /* (?{...}) (?{{...}}) */
+#define ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME (1U<<29) /* (*name) (*name{a,..}) */
/* syntax (behavior) */
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
@@ -552,6 +533,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_NORMAL 0
#define ONIG_MISMATCH -1
#define ONIG_NO_SUPPORT_CONFIG -2
+#define ONIG_ABORT -3
/* internal error */
#define ONIGERR_MEMORY -5
@@ -562,6 +544,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_UNEXPECTED_BYTECODE -14
#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
#define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16
+#define ONIGERR_RETRY_LIMIT_IN_MATCH_OVER -17
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21
#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
#define ONIGERR_FAIL_TO_INITIALIZE -23
@@ -616,6 +599,12 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_INVALID_IF_ELSE_SYNTAX -224
#define ONIGERR_INVALID_ABSENT_GROUP_PATTERN -225
#define ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN -226
+#define ONIGERR_INVALID_CALLOUT_PATTERN -227
+#define ONIGERR_INVALID_CALLOUT_NAME -228
+#define ONIGERR_UNDEFINED_CALLOUT_NAME -229
+#define ONIGERR_INVALID_CALLOUT_BODY -230
+#define ONIGERR_INVALID_CALLOUT_TAG_NAME -231
+#define ONIGERR_INVALID_CALLOUT_ARG -232
#define ONIGERR_INVALID_CODE_POINT_VALUE -400
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
@@ -681,49 +670,8 @@ extern void onig_null_warn P_((const char* s));
#define ONIG_CHAR_TABLE_SIZE 256
-typedef struct re_pattern_buffer {
- /* common members of BBuf(bytes-buffer) */
- unsigned char* p; /* compiled pattern */
- unsigned int used; /* used space for p */
- unsigned int alloc; /* allocated space for p */
-
- int num_mem; /* used memory(...) num counted from 1 */
- int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
- int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */
- int num_comb_exp_check; /* combination explosion check */
- int num_call; /* number of subexp call */
- unsigned int capture_history; /* (?@...) flag (1-31) */
- unsigned int bt_mem_start; /* need backtrack flag */
- unsigned int bt_mem_end; /* need backtrack flag */
- int stack_pop_level;
- int repeat_range_alloc;
- OnigRepeatRange* repeat_range;
-
- OnigEncoding enc;
- OnigOptionType options;
- OnigSyntaxType* syntax;
- OnigCaseFoldType case_fold_flag;
- void* name_table;
-
- /* optimization info (string search, char-map and anchors) */
- int optimize; /* optimize flag */
- int threshold_len; /* search str-length for apply optimize */
- int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
- OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */
- OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */
- int sub_anchor; /* start-anchor for exact or map */
- unsigned char *exact;
- unsigned char *exact_end;
- unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
- int *int_map; /* BM skip for exact_len > 255 */
- int *int_map_backward; /* BM skip for backward search */
- OnigLen dmin; /* min-distance of exact or map */
- OnigLen dmax; /* max-distance of exact or map */
-
- /* regex_t link chain */
- struct re_pattern_buffer* chain; /* escape compile-conflict */
-} OnigRegexType;
-
+struct re_pattern_buffer;
+typedef struct re_pattern_buffer OnigRegexType;
typedef OnigRegexType* OnigRegex;
#ifndef ONIG_ESCAPE_REGEX_T_COLLISION
@@ -740,10 +688,74 @@ typedef struct {
OnigCaseFoldType case_fold_flag;
} OnigCompileInfo;
+
+/* types for callout */
+typedef enum {
+ ONIG_CALLOUT_IN_PROGRESS = 1, /* 1<<0 */
+ ONIG_CALLOUT_IN_RETRACTION = 2 /* 1<<1 */
+} OnigCalloutIn;
+
+#define ONIG_CALLOUT_IN_BOTH (ONIG_CALLOUT_IN_PROGRESS | ONIG_CALLOUT_IN_RETRACTION)
+
+typedef enum {
+ ONIG_CALLOUT_OF_CONTENTS = 0,
+ ONIG_CALLOUT_OF_NAME = 1
+} OnigCalloutOf;
+
+typedef enum {
+ ONIG_CALLOUT_TYPE_SINGLE = 0,
+ ONIG_CALLOUT_TYPE_START_CALL = 1,
+ ONIG_CALLOUT_TYPE_BOTH_CALL = 2,
+ ONIG_CALLOUT_TYPE_START_MARK_END_CALL = 3,
+} OnigCalloutType;
+
+
+#define ONIG_NON_NAME_ID -1
+#define ONIG_NON_CALLOUT_NUM 0
+
+#define ONIG_CALLOUT_MAX_ARGS_NUM 4
+#define ONIG_CALLOUT_DATA_SLOT_NUM 5
+
+struct OnigCalloutArgsStruct;
+typedef struct OnigCalloutArgsStruct OnigCalloutArgs;
+
+typedef int (*OnigCalloutFunc)(OnigCalloutArgs* args, void* user_data);
+
+/* callout function return values (less than -1: error code) */
+typedef enum {
+ ONIG_CALLOUT_FAIL = 1,
+ ONIG_CALLOUT_SUCCESS = 0
+} OnigCalloutResult;
+
+typedef enum {
+ ONIG_TYPE_VOID = 0,
+ ONIG_TYPE_LONG = 1<<0,
+ ONIG_TYPE_CHAR = 1<<1,
+ ONIG_TYPE_STRING = 1<<2,
+ ONIG_TYPE_POINTER = 1<<3,
+ ONIG_TYPE_TAG = 1<<4,
+} OnigType;
+
+typedef union {
+ long l;
+ OnigCodePoint c;
+ struct {
+ OnigUChar* start;
+ OnigUChar* end;
+ } s;
+ void* p;
+ int tag; /* tag -> callout_num */
+} OnigValue;
+
+
+struct OnigMatchParamStruct;
+typedef struct OnigMatchParamStruct OnigMatchParam;
+
+
/* Oniguruma Native API */
ONIG_EXTERN
-int onig_initialize P_((OnigEncoding encodings[], int n));
+int onig_initialize P_((OnigEncoding encodings[], int number_of_encodings));
/* onig_init(): deprecated function. Use onig_initialize(). */
ONIG_EXTERN
int onig_init P_((void));
@@ -756,7 +768,7 @@ void onig_set_verb_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
-int onig_reg_init P_((regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax));
+int onig_reg_init P_((OnigRegex reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax));
int onig_new_without_alloc P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
@@ -765,12 +777,16 @@ void onig_free P_((OnigRegex));
ONIG_EXTERN
void onig_free_body P_((OnigRegex));
ONIG_EXTERN
-int onig_scan(regex_t* reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg);
+int onig_scan(OnigRegex reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg);
ONIG_EXTERN
int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
+int onig_search_with_param P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option, OnigMatchParam* mp));
+ONIG_EXTERN
int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
+int onig_match_with_param P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option, OnigMatchParam* mp));
+ONIG_EXTERN
OnigRegion* onig_region_new P_((void));
ONIG_EXTERN
void onig_region_init P_((OnigRegion* region));
@@ -843,6 +859,10 @@ unsigned int onig_get_match_stack_limit_size P_((void));
ONIG_EXTERN
int onig_set_match_stack_limit_size P_((unsigned int size));
ONIG_EXTERN
+unsigned long onig_get_retry_limit_in_match P_((void));
+ONIG_EXTERN
+int onig_set_retry_limit_in_match P_((unsigned long n));
+ONIG_EXTERN
unsigned int onig_get_parse_depth_limit P_((void));
ONIG_EXTERN
int onig_set_capture_num_limit P_((int num));
@@ -857,6 +877,121 @@ const char* onig_version P_((void));
ONIG_EXTERN
const char* onig_copyright P_((void));
+/* for OnigMatchParam */
+ONIG_EXTERN
+OnigMatchParam* onig_new_match_param P_((void));
+ONIG_EXTERN
+void onig_free_match_param P_((OnigMatchParam* p));
+ONIG_EXTERN
+void onig_free_match_param_content P_((OnigMatchParam* p));
+ONIG_EXTERN
+int onig_initialize_match_param P_((OnigMatchParam* mp));
+ONIG_EXTERN
+int onig_set_match_stack_limit_size_of_match_param P_((OnigMatchParam* param, unsigned int limit));
+ONIG_EXTERN
+int onig_set_retry_limit_in_match_of_match_param P_((OnigMatchParam* param, unsigned long limit));
+ONIG_EXTERN
+int onig_set_progress_callout_of_match_param P_((OnigMatchParam* param, OnigCalloutFunc f));
+ONIG_EXTERN
+int onig_set_retraction_callout_of_match_param P_((OnigMatchParam* param, OnigCalloutFunc f));
+
+/* for callout functions */
+ONIG_EXTERN
+OnigCalloutFunc onig_get_progress_callout P_((void));
+ONIG_EXTERN
+int onig_set_progress_callout P_((OnigCalloutFunc f));
+ONIG_EXTERN
+OnigCalloutFunc onig_get_retraction_callout P_((void));
+ONIG_EXTERN
+int onig_set_retraction_callout P_((OnigCalloutFunc f));
+ONIG_EXTERN
+int onig_set_callout_of_name P_((OnigEncoding enc, OnigCalloutType type, OnigUChar* name, OnigUChar* name_end, int callout_in, OnigCalloutFunc callout, OnigCalloutFunc end_callout, int arg_num, unsigned int arg_types[], int optional_arg_num, OnigValue opt_defaults[])); /* name: single-byte string */
+ONIG_EXTERN
+OnigUChar* onig_get_callout_name_by_name_id P_((int id));
+ONIG_EXTERN
+int onig_get_callout_num_by_tag P_((OnigRegex reg, const UChar* tag, const UChar* tag_end));
+ONIG_EXTERN
+int onig_get_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const UChar* tag, const UChar* tag_end, int slot, OnigType* type, OnigValue* val));
+ONIG_EXTERN
+int onig_set_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const UChar* tag, const UChar* tag_end, int slot, OnigType type, OnigValue* val));
+
+/* used in callout functions */
+ONIG_EXTERN
+int onig_get_callout_num_by_callout_args P_((OnigCalloutArgs* args));
+ONIG_EXTERN
+OnigCalloutIn onig_get_callout_in_by_callout_args P_((OnigCalloutArgs* args));
+ONIG_EXTERN
+int onig_get_name_id_by_callout_args P_((OnigCalloutArgs* args));
+ONIG_EXTERN
+const OnigUChar* onig_get_contents_by_callout_args P_((OnigCalloutArgs* args));
+ONIG_EXTERN
+const OnigUChar* onig_get_contents_end_by_callout_args P_((OnigCalloutArgs* args));
+ONIG_EXTERN
+int onig_get_args_num_by_callout_args P_((OnigCalloutArgs* args));
+ONIG_EXTERN
+int onig_get_passed_args_num_by_callout_args P_((OnigCalloutArgs* args));
+ONIG_EXTERN
+int onig_get_arg_by_callout_args P_((OnigCalloutArgs* args, int index, OnigType* type, OnigValue* val));
+ONIG_EXTERN
+const OnigUChar* onig_get_string_by_callout_args P_((OnigCalloutArgs* args));
+ONIG_EXTERN
+const OnigUChar* onig_get_string_end_by_callout_args P_((OnigCalloutArgs* args));
+ONIG_EXTERN
+const OnigUChar* onig_get_start_by_callout_args P_((OnigCalloutArgs* args));
+ONIG_EXTERN
+const OnigUChar* onig_get_right_range_by_callout_args P_((OnigCalloutArgs* args));
+ONIG_EXTERN
+const OnigUChar* onig_get_current_by_callout_args P_((OnigCalloutArgs* args));
+ONIG_EXTERN
+OnigRegex onig_get_regex_by_callout_args P_((OnigCalloutArgs* args));
+ONIG_EXTERN
+unsigned long onig_get_retry_counter_by_callout_args P_((OnigCalloutArgs* args));
+ONIG_EXTERN
+int onig_callout_tag_is_exist_at_callout_num P_((OnigRegex reg, int callout_num));
+ONIG_EXTERN
+const OnigUChar* onig_get_callout_tag_start P_((OnigRegex reg, int callout_num));
+ONIG_EXTERN
+const OnigUChar* onig_get_callout_tag_end P_((OnigRegex reg, int callout_num));
+ONIG_EXTERN
+int onig_get_callout_data_dont_clear_old P_((OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType* type, OnigValue* val));
+ONIG_EXTERN
+int onig_get_callout_data_by_callout_args_self_dont_clear_old P_((OnigCalloutArgs* args, int slot, OnigType* type, OnigValue* val));
+ONIG_EXTERN
+int onig_get_callout_data P_((OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType* type, OnigValue* val));
+ONIG_EXTERN
+int onig_get_callout_data_by_callout_args P_((OnigCalloutArgs* args, int callout_num, int slot, OnigType* type, OnigValue* val));
+ONIG_EXTERN
+int onig_get_callout_data_by_callout_args_self P_((OnigCalloutArgs* args, int slot, OnigType* type, OnigValue* val));
+ONIG_EXTERN
+int onig_set_callout_data P_((OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType type, OnigValue* val));
+ONIG_EXTERN
+int onig_set_callout_data_by_callout_args P_((OnigCalloutArgs* args, int callout_num, int slot, OnigType type, OnigValue* val));
+ONIG_EXTERN
+int onig_set_callout_data_by_callout_args_self P_((OnigCalloutArgs* args, int slot, OnigType type, OnigValue* val));
+ONIG_EXTERN
+int onig_get_capture_range_in_callout P_((OnigCalloutArgs* args, int mem_num, int* begin, int* end));
+ONIG_EXTERN
+int onig_get_used_stack_size_in_callout P_((OnigCalloutArgs* args, int* used_num, int* used_bytes));
+
+/* builtin callout functions */
+ONIG_EXTERN
+int onig_builtin_fail P_((OnigCalloutArgs* args, void* user_data));
+ONIG_EXTERN
+int onig_builtin_mismatch P_((OnigCalloutArgs* args, void* user_data));
+ONIG_EXTERN
+int onig_builtin_error P_((OnigCalloutArgs* args, void* user_data));
+ONIG_EXTERN
+int onig_builtin_count P_((OnigCalloutArgs* args, void* user_data));
+ONIG_EXTERN
+int onig_builtin_total_count P_((OnigCalloutArgs* args, void* user_data));
+ONIG_EXTERN
+int onig_builtin_max P_((OnigCalloutArgs* args, void* user_data));
+ONIG_EXTERN
+int onig_builtin_cmp P_((OnigCalloutArgs* args, void* user_data));
+
+ONIG_EXTERN
+int onig_setup_builtin_monitors_by_ascii_encoded_name P_((void* fp));
+
#ifdef __cplusplus
}
#endif
diff --git a/src/regcomp.c b/src/regcomp.c
index 63df18b..a19109f 100644
--- a/src/regcomp.c
+++ b/src/regcomp.c
@@ -2,7 +2,7 @@
regcomp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -78,7 +78,7 @@ int_stack_push(int_stack* s, int v)
{
if (s->n >= s->alloc) {
int new_size = s->alloc * 2;
- int* nv = (int* )xrealloc(s->v, new_size);
+ int* nv = (int* )xrealloc(s->v, sizeof(int) * new_size);
if (IS_NULL(nv)) return ONIGERR_MEMORY;
s->alloc = new_size;
@@ -121,26 +121,28 @@ onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
return 0;
}
-
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
-static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
-#endif
-
-static UChar*
-str_dup(UChar* s, UChar* end)
+static int
+int_multiply_cmp(int x, int y, int v)
{
- int len = (int )(end - s);
+ if (x == 0 || y == 0) return -1;
- if (len > 0) {
- UChar* r = (UChar* )xmalloc(len + 1);
- CHECK_NULL_RETURN(r);
- xmemcpy(r, s, len);
- r[len] = (UChar )0;
- return r;
+ if (x < INT_MAX / y) {
+ int xy = x * y;
+ if (xy > v) return 1;
+ else {
+ if (xy == v) return 0;
+ else return -1;
+ }
}
- else return NULL;
+ else
+ return 1;
}
+
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
+#endif
+
static void
swap_node(Node* a, Node* b)
{
@@ -200,20 +202,6 @@ bitset_is_empty(BitSetRef bs)
return 1;
}
-#ifdef ONIG_DEBUG
-static int
-bitset_on_num(BitSetRef bs)
-{
- int i, n;
-
- n = 0;
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (BITSET_AT(bs, i)) n++;
- }
- return n;
-}
-#endif
-
extern int
onig_bbuf_init(BBuf* buf, int size)
{
@@ -282,17 +270,6 @@ add_opcode(regex_t* reg, int opcode)
return 0;
}
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-static int
-add_state_check_num(regex_t* reg, int num)
-{
- StateCheckNumType n = (StateCheckNumType )num;
-
- BB_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
- return 0;
-}
-#endif
-
static int
add_rel_addr(regex_t* reg, int addr)
{
@@ -811,7 +788,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
}
static int
-is_anychar_star_quantifier(QuantNode* qn)
+is_anychar_infinite_greedy(QuantNode* qn)
{
if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))
@@ -823,254 +800,21 @@ is_anychar_star_quantifier(QuantNode* qn)
#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
#define CKN_ON (ckn > 0)
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-
-static int
-compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
-{
- int len, mod_tlen, cklen;
- int ckn;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_info = qn->body_empty_info;
- int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
-
- if (tlen < 0) return tlen;
-
- ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
-
- cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
-
- /* anychar repeat */
- if (NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn))) {
- if (qn->greedy && infinite) {
- if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
- return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
- else
- return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
- }
- }
-
- if (empty_info == QUANT_BODY_IS_NOT_EMPTY)
- mod_tlen = tlen;
- else
- mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
-
- if (infinite && qn->lower <= 1) {
- if (qn->greedy) {
- if (qn->lower == 1)
- len = SIZE_OP_JUMP;
- else
- len = 0;
-
- len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
- }
- else {
- if (qn->lower == 0)
- len = SIZE_OP_JUMP;
- else
- len = 0;
-
- len += mod_tlen + SIZE_OP_PUSH + cklen;
- }
- }
- else if (qn->upper == 0) {
- if (qn->is_refered != 0) /* /(?<n>..){0}/ */
- len = SIZE_OP_JUMP + tlen;
- else
- len = 0;
- }
- else if (qn->upper == 1 && qn->greedy) {
- if (qn->lower == 0) {
- if (CKN_ON) {
- len = SIZE_OP_STATE_CHECK_PUSH + tlen;
- }
- else {
- len = SIZE_OP_PUSH + tlen;
- }
- }
- else {
- len = tlen;
- }
- }
- else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
- len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
- }
- else {
- len = SIZE_OP_REPEAT_INC + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
- if (CKN_ON)
- len += SIZE_OP_STATE_CHECK;
- }
-
- return len;
-}
-
-static int
-compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
-{
- int r, mod_tlen;
- int ckn;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_info = qn->body_empty_info;
- int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
-
- if (tlen < 0) return tlen;
-
- ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
-
- if (is_anychar_star_quantifier(qn)) {
- r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
- if (r != 0) return r;
- if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
- if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))
- r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
- else
- r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
- if (r != 0) return r;
- if (CKN_ON) {
- r = add_state_check_num(reg, ckn);
- if (r != 0) return r;
- }
-
- return add_bytes(reg, STR_(qn->next_head_exact)->s, 1);
- }
- else {
- if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) {
- r = add_opcode(reg, (CKN_ON ?
- OP_STATE_CHECK_ANYCHAR_ML_STAR
- : OP_ANYCHAR_ML_STAR));
- }
- else {
- r = add_opcode(reg, (CKN_ON ?
- OP_STATE_CHECK_ANYCHAR_STAR
- : OP_ANYCHAR_STAR));
- }
- if (r != 0) return r;
- if (CKN_ON)
- r = add_state_check_num(reg, ckn);
-
- return r;
- }
- }
-
- if (empty_info == QUANT_BODY_IS_NOT_EMPTY)
- mod_tlen = tlen;
- else
- mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
-
- if (infinite && qn->lower <= 1) {
- if (qn->greedy) {
- if (qn->lower == 1) {
- r = add_opcode_rel_addr(reg, OP_JUMP,
- (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
- if (r != 0) return r;
- }
-
- if (CKN_ON) {
- r = add_opcode(reg, OP_STATE_CHECK_PUSH);
- if (r != 0) return r;
- r = add_state_check_num(reg, ckn);
- if (r != 0) return r;
- r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
- }
- else {
- r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
- }
- if (r != 0) return r;
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
- if (r != 0) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP
- + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
- }
- else {
- if (qn->lower == 0) {
- r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
- if (r != 0) return r;
- }
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
- if (r != 0) return r;
- if (CKN_ON) {
- r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
- if (r != 0) return r;
- r = add_state_check_num(reg, ckn);
- if (r != 0) return r;
- r = add_rel_addr(reg,
- -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
- }
- else
- r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
- }
- }
- else if (qn->upper == 0) {
- if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
- r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
- if (r != 0) return r;
- r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
- }
- else
- r = 0;
- }
- else if (qn->upper == 1 && qn->greedy) {
- if (qn->lower == 0) {
- if (CKN_ON) {
- r = add_opcode(reg, OP_STATE_CHECK_PUSH);
- if (r != 0) return r;
- r = add_state_check_num(reg, ckn);
- if (r != 0) return r;
- r = add_rel_addr(reg, tlen);
- }
- else {
- r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
- }
- if (r != 0) return r;
- }
-
- r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
- }
- else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
- if (CKN_ON) {
- r = add_opcode(reg, OP_STATE_CHECK_PUSH);
- if (r != 0) return r;
- r = add_state_check_num(reg, ckn);
- if (r != 0) return r;
- r = add_rel_addr(reg, SIZE_OP_JUMP);
- }
- else {
- r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
- }
-
- if (r != 0) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
- if (r != 0) return r;
- r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
- }
- else {
- r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env);
- if (CKN_ON) {
- if (r != 0) return r;
- r = add_opcode(reg, OP_STATE_CHECK);
- if (r != 0) return r;
- r = add_state_check_num(reg, ckn);
- }
- }
- return r;
-}
-
-#else /* USE_COMBINATION_EXPLOSION_CHECK */
-
static int
compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
{
int len, mod_tlen;
int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_info = qn->body_empty_info;
+ enum QuantBodyEmpty empty_info = qn->body_empty_info;
int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
+ if (tlen == 0) return 0;
/* anychar repeat */
- if (is_anychar_star_quantifier(qn)) {
- if (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE) {
+ if (is_anychar_infinite_greedy(qn)) {
+ if (qn->lower <= 1 ||
+ int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {
if (IS_NOT_NULL(qn->next_head_exact))
return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
else
@@ -1084,7 +828,8 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
if (infinite &&
- (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ (qn->lower <= 1 ||
+ int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
len = SIZE_OP_JUMP;
}
@@ -1107,8 +852,9 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
len = SIZE_OP_JUMP + tlen;
}
else if (!infinite && qn->greedy &&
- (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
- <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ (qn->upper == 1 ||
+ int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
+ QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
len = tlen * qn->lower;
len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
}
@@ -1128,13 +874,15 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
{
int i, r, mod_tlen;
int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_info = qn->body_empty_info;
+ enum QuantBodyEmpty empty_info = qn->body_empty_info;
int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
+ if (tlen == 0) return 0;
- if (is_anychar_star_quantifier(qn) &&
- (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ if (is_anychar_infinite_greedy(qn) &&
+ (qn->lower <= 1 ||
+ int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
if (r != 0) return r;
if (IS_NOT_NULL(qn->next_head_exact)) {
@@ -1159,7 +907,8 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
if (infinite &&
- (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ (qn->lower <= 1 ||
+ int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
if (qn->greedy) {
if (IS_NOT_NULL(qn->head_exact))
@@ -1223,8 +972,9 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
}
else if (! infinite && qn->greedy &&
- (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
- <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ (qn->upper == 1 ||
+ int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
+ QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
int n = qn->upper - qn->lower;
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
@@ -1250,7 +1000,6 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
}
return r;
}
-#endif /* USE_COMBINATION_EXPLOSION_CHECK */
static int
compile_length_option_node(EnclosureNode* node, regex_t* reg)
@@ -1358,7 +1107,7 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)
if (tlen < 0) return tlen;
len = tlen * qn->lower
- + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
+ + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;
}
else {
len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END;
@@ -1505,14 +1254,14 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
len = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (len < 0) return len;
- r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
+ r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP_OUT + SIZE_OP_JUMP);
if (r != 0) return r;
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
if (r != 0) return r;
- r = add_opcode(reg, OP_POP);
+ r = add_opcode(reg, OP_POP_OUT);
if (r != 0) return r;
r = add_opcode_rel_addr(reg, OP_JUMP,
- -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
+ -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT + (int )SIZE_OP_JUMP));
}
else {
r = add_opcode(reg, OP_ATOMIC_START);
@@ -1762,6 +1511,30 @@ compile_gimmick_node(GimmickNode* node, regex_t* reg)
if (r != 0) return r;
r = add_mem_num(reg, node->id);
break;
+
+#ifdef USE_CALLOUT
+ case GIMMICK_CALLOUT:
+ switch (node->detail_type) {
+ case ONIG_CALLOUT_OF_CONTENTS:
+ case ONIG_CALLOUT_OF_NAME:
+ {
+ r = add_opcode(reg, (node->detail_type == ONIG_CALLOUT_OF_CONTENTS) ?
+ OP_CALLOUT_CONTENTS : OP_CALLOUT_NAME);
+ if (r != 0) return r;
+ if (node->detail_type == ONIG_CALLOUT_OF_NAME) {
+ r = add_mem_num(reg, node->id);
+ if (r != 0) return r;
+ }
+ r = add_mem_num(reg, node->num);
+ if (r != 0) return r;
+ }
+ break;
+
+ default:
+ r = ONIGERR_TYPE_BUG;
+ break;
+ }
+#endif
}
return r;
@@ -1785,6 +1558,23 @@ compile_length_gimmick_node(GimmickNode* node, regex_t* reg)
case GIMMICK_UPDATE_VAR:
len = SIZE_OP_UPDATE_VAR;
break;
+
+#ifdef USE_CALLOUT
+ case GIMMICK_CALLOUT:
+ switch (node->detail_type) {
+ case ONIG_CALLOUT_OF_CONTENTS:
+ len = SIZE_OP_CALLOUT_CONTENTS;
+ break;
+ case ONIG_CALLOUT_OF_NAME:
+ len = SIZE_OP_CALLOUT_NAME;
+ break;
+
+ default:
+ len = ONIGERR_TYPE_BUG;
+ break;
+ }
+ break;
+#endif
}
return len;
@@ -2337,7 +2127,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
#ifdef USE_CALL
static int
-unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
+fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)
{
int i, offset;
EnclosureNode* en;
@@ -3725,11 +3515,12 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* re
}
static int
-expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
- UChar *p, int slen, UChar *end, regex_t* reg,
- Node **rnode)
+expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p,
+ int slen, UChar *end, regex_t* reg, Node **rnode)
{
- int r, i, j, len, varlen;
+ int r, i, j;
+ int len;
+ int varlen;
Node *anode, *var_anode, *snode, *xnode, *an;
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
@@ -3972,145 +3763,8 @@ expand_case_fold_string(Node* node, regex_t* reg)
return r;
}
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-
-#define CEC_THRES_NUM_BIG_REPEAT 512
-#define CEC_INFINITE_NUM 0x7fffffff
-
-#define CEC_IN_INFINITE_REPEAT (1<<0)
-#define CEC_IN_FINITE_REPEAT (1<<1)
-#define CEC_CONT_BIG_REPEAT (1<<2)
-
-static int
-setup_comb_exp_check(Node* node, int state, ScanEnv* env)
-{
- int r = state;
-
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- {
- do {
- r = setup_comb_exp_check(NODE_CAR(node), r, env);
- } while (r >= 0 && IS_NOT_NULL(node = NODE_CDR(node)));
- }
- break;
-
- case NODE_ALT:
- {
- int ret;
- do {
- ret = setup_comb_exp_check(NODE_CAR(node), state, env);
- r |= ret;
- } while (ret >= 0 && IS_NOT_NULL(node = NODE_CDR(node)));
- }
- break;
-
- case NODE_QUANT:
- {
- int var_num;
- int child_state = state;
- int add_state = 0;
- QuantNode* qn = QUANT_(node);
- Node* target = NODE_QUANT_BODY(qn);
-
- if (! IS_REPEAT_INFINITE(qn->upper)) {
- if (qn->upper > 1) {
- /* {0,1}, {1,1} are allowed */
- child_state |= CEC_IN_FINITE_REPEAT;
-
- /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
- if (env->backrefed_mem == 0) {
- if (NODE_TYPE(NODE_QUANT_BODY(qn)) == NODE_ENCLOSURE) {
- EnclosureNode* en = ENCLOSURE_(NODE_QUANT_BODY(qn));
- if (en->type == ENCLOSURE_MEMORY) {
- if (NODE_TYPE(NODE_ENCLOSURE_BODY(en)) == NODE_QUANT) {
- QuantNode* q = QUANT_(NODE_ENCLOSURE_BODY(en));
- if (IS_REPEAT_INFINITE(q->upper)
- && q->greedy == qn->greedy) {
- qn->upper = (qn->lower == 0 ? 1 : qn->lower);
- if (qn->upper == 1)
- child_state = state;
- }
- }
- }
- }
- }
- }
- }
-
- if (state & CEC_IN_FINITE_REPEAT) {
- qn->comb_exp_check_num = -1;
- }
- else {
- if (IS_REPEAT_INFINITE(qn->upper)) {
- var_num = CEC_INFINITE_NUM;
- child_state |= CEC_IN_INFINITE_REPEAT;
- }
- else {
- var_num = qn->upper - qn->lower;
- }
-
- if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
- add_state |= CEC_CONT_BIG_REPEAT;
-
- if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
- ((state & CEC_CONT_BIG_REPEAT) != 0 &&
- var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
- if (qn->comb_exp_check_num == 0) {
- env->num_comb_exp_check++;
- qn->comb_exp_check_num = env->num_comb_exp_check;
- if (env->curr_max_regnum > env->comb_exp_max_regnum)
- env->comb_exp_max_regnum = env->curr_max_regnum;
- }
- }
- }
-
- r = setup_comb_exp_check(target, child_state, env);
- r |= add_state;
- }
- break;
-
- case NODE_ENCLOSURE:
- {
- EnclosureNode* en = ENCLOSURE_(node);
-
- switch (en->type) {
- case ENCLOSURE_MEMORY:
- {
- if (env->curr_max_regnum < en->m.regnum)
- env->curr_max_regnum = en->m.regnum;
-
- r = setup_comb_exp_check(NODE_ENCLOSURE_BODY(en), state, env);
- }
- break;
-
- default:
- r = setup_comb_exp_check(NODE_ENCLOSURE_BODY(en), state, env);
- break;
- }
- }
- break;
-
-#ifdef USE_CALL
- case NODE_CALL:
- if (NODE_IS_RECURSION(node))
- env->has_recursion = 1;
- else
- r = setup_comb_exp_check(NODE_BODY(node), state, env);
- break;
-#endif
-
- default:
- break;
- }
-
- return r;
-}
-#endif
-
#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT
-static int
+static enum QuantBodyEmpty
quantifiers_memory_node_info(Node* node)
{
int r = QUANT_BODY_IS_EMPTY;
@@ -4638,7 +4292,7 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
#define ALLOWED_TYPE_IN_LB \
( BIT_NODE_LIST | BIT_NODE_ALT | BIT_NODE_STRING | BIT_NODE_CCLASS \
| BIT_NODE_CTYPE | BIT_NODE_ANCHOR | BIT_NODE_ENCLOSURE | BIT_NODE_QUANT \
- | BIT_NODE_CALL )
+ | BIT_NODE_CALL | BIT_NODE_GIMMICK)
#define ALLOWED_ENCLOSURE_IN_LB ( 1<<ENCLOSURE_MEMORY | 1<<ENCLOSURE_OPTION )
#define ALLOWED_ENCLOSURE_IN_LB_NOT (1<<ENCLOSURE_OPTION)
@@ -4765,7 +4419,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
}
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
- if (qn->greedy && (qn->body_empty_info != 0)) {
+ if (qn->greedy && (qn->body_empty_info != QUANT_BODY_IS_NOT_EMPTY)) {
if (NODE_TYPE(body) == NODE_QUANT) {
QuantNode* tqn = QUANT_(body);
if (IS_NOT_NULL(tqn->head_exact)) {
@@ -4948,10 +4602,10 @@ set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
typedef struct {
OnigLen min; /* min byte length */
OnigLen max; /* max byte length */
-} MinMaxLen;
+} MinMax;
typedef struct {
- MinMaxLen mmd;
+ MinMax mmd;
OnigEncoding enc;
OnigOptionType options;
OnigCaseFoldType case_fold_flag;
@@ -4964,35 +4618,35 @@ typedef struct {
} OptAnc;
typedef struct {
- MinMaxLen mmd; /* info position */
+ MinMax mmd; /* position */
OptAnc anc;
- int reach_end;
- int ignore_case;
- int len;
- UChar s[OPT_EXACT_MAXLEN];
+ int reach_end;
+ int ignore_case;
+ int len;
+ UChar s[OPT_EXACT_MAXLEN];
} OptExact;
typedef struct {
- MinMaxLen mmd; /* info position */
- OptAnc anc;
- int value; /* weighted value */
- UChar map[ONIG_CHAR_TABLE_SIZE];
+ MinMax mmd; /* position */
+ OptAnc anc;
+ int value; /* weighted value */
+ UChar map[ONIG_CHAR_TABLE_SIZE];
} OptMap;
typedef struct {
- MinMaxLen len;
- OptAnc anc;
- OptExact exb; /* boundary */
- OptExact exm; /* middle */
- OptExact expr; /* prec read (?=...) */
- OptMap map; /* boundary */
+ MinMax len;
+ OptAnc anc;
+ OptExact exb; /* boundary */
+ OptExact exm; /* middle */
+ OptExact expr; /* prec read (?=...) */
+ OptMap map; /* boundary */
} NodeOpt;
static int
map_position_value(OnigEncoding enc, int i)
{
- static const short int ByteValTable[] = {
+ static const short int Vals[] = {
5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
@@ -5003,18 +4657,18 @@ map_position_value(OnigEncoding enc, int i)
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
};
- if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
+ if (i < (int )(sizeof(Vals)/sizeof(Vals[0]))) {
if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
return 20;
else
- return (int )ByteValTable[i];
+ return (int )Vals[i];
}
else
return 4; /* Take it easy. */
}
static int
-distance_value(MinMaxLen* mm)
+distance_value(MinMax* mm)
{
/* 1000 / (min-max-dist + 1) */
static const short int dist_vals[] = {
@@ -5043,7 +4697,7 @@ distance_value(MinMaxLen* mm)
}
static int
-comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
+comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2)
{
if (v2 <= 0) return -1;
if (v1 <= 0) return 1;
@@ -5060,40 +4714,40 @@ comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
}
static int
-is_equal_mml(MinMaxLen* a, MinMaxLen* b)
+is_equal_mml(MinMax* a, MinMax* b)
{
return (a->min == b->min && a->max == b->max) ? 1 : 0;
}
static void
-set_mml(MinMaxLen* mml, OnigLen min, OnigLen max)
+set_mml(MinMax* l, OnigLen min, OnigLen max)
{
- mml->min = min;
- mml->max = max;
+ l->min = min;
+ l->max = max;
}
static void
-clear_mml(MinMaxLen* mml)
+clear_mml(MinMax* l)
{
- mml->min = mml->max = 0;
+ l->min = l->max = 0;
}
static void
-copy_mml(MinMaxLen* to, MinMaxLen* from)
+copy_mml(MinMax* to, MinMax* from)
{
to->min = from->min;
to->max = from->max;
}
static void
-add_mml(MinMaxLen* to, MinMaxLen* from)
+add_mml(MinMax* to, MinMax* from)
{
to->min = distance_add(to->min, from->min);
to->max = distance_add(to->max, from->max);
}
static void
-alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
+alt_merge_mml(MinMax* to, MinMax* from)
{
if (to->min > from->min) to->min = from->min;
if (to->max < from->max) to->max = from->max;
@@ -5106,10 +4760,10 @@ copy_opt_env(OptEnv* to, OptEnv* from)
}
static void
-clear_opt_anc_info(OptAnc* anc)
+clear_opt_anc_info(OptAnc* a)
{
- anc->left = 0;
- anc->right = 0;
+ a->left = 0;
+ a->right = 0;
}
static void
@@ -5139,11 +4793,10 @@ concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right,
}
static int
-is_left(int anc)
+is_left(int a)
{
- if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
- anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
- anc == ANCHOR_PREC_READ_NOT)
+ if (a == ANCHOR_END_BUF || a == ANCHOR_SEMI_END_BUF ||
+ a == ANCHOR_END_LINE || a == ANCHOR_PREC_READ || a == ANCHOR_PREC_READ_NOT)
return 0;
return 1;
@@ -5183,20 +4836,20 @@ alt_merge_opt_anc_info(OptAnc* to, OptAnc* add)
}
static int
-is_full_opt_exact(OptExact* ex)
+is_full_opt_exact(OptExact* e)
{
- return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
+ return (e->len >= OPT_EXACT_MAXLEN ? 1 : 0);
}
static void
-clear_opt_exact(OptExact* ex)
+clear_opt_exact(OptExact* e)
{
- clear_mml(&ex->mmd);
- clear_opt_anc_info(&ex->anc);
- ex->reach_end = 0;
- ex->ignore_case = 0;
- ex->len = 0;
- ex->s[0] = '\0';
+ clear_mml(&e->mmd);
+ clear_opt_anc_info(&e->anc);
+ e->reach_end = 0;
+ e->ignore_case = 0;
+ e->len = 0;
+ e->s[0] = '\0';
}
static void
@@ -5205,24 +4858,28 @@ copy_opt_exact(OptExact* to, OptExact* from)
*to = *from;
}
-static void
+static int
concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc)
{
- int i, j, len;
+ int i, j, len, r;
UChar *p, *end;
OptAnc tanc;
if (! to->ignore_case && add->ignore_case) {
- if (to->len >= add->len) return ; /* avoid */
+ if (to->len >= add->len) return 0; /* avoid */
to->ignore_case = 1;
}
+ r = 0;
p = add->s;
end = p + add->len;
for (i = to->len; p < end; ) {
len = enclen(enc, p);
- if (i + len > OPT_EXACT_MAXLEN) break;
+ if (i + len > OPT_EXACT_MAXLEN) {
+ r = 1; /* 1:full */
+ break;
+ }
for (j = 0; j < len && p < end; j++)
to->s[i++] = *p++;
}
@@ -5233,11 +4890,12 @@ concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc)
concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
if (! to->reach_end) tanc.right = 0;
copy_opt_anc_info(&to->anc, &tanc);
+
+ return r;
}
static void
-concat_opt_exact_str(OptExact* to, UChar* s, UChar* end,
- int raw ARG_UNUSED, OnigEncoding enc)
+concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc)
{
int i, j, len;
UChar *p;
@@ -5291,31 +4949,31 @@ alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env)
static void
select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt)
{
- int v1, v2;
+ int vn, va;
- v1 = now->len;
- v2 = alt->len;
+ vn = now->len;
+ va = alt->len;
- if (v2 == 0) {
+ if (va == 0) {
return ;
}
- else if (v1 == 0) {
+ else if (vn == 0) {
copy_opt_exact(now, alt);
return ;
}
- else if (v1 <= 2 && v2 <= 2) {
+ else if (vn <= 2 && va <= 2) {
/* ByteValTable[x] is big value --> low price */
- v2 = map_position_value(enc, now->s[0]);
- v1 = map_position_value(enc, alt->s[0]);
+ va = map_position_value(enc, now->s[0]);
+ vn = map_position_value(enc, alt->s[0]);
- if (now->len > 1) v1 += 5;
- if (alt->len > 1) v2 += 5;
+ if (now->len > 1) vn += 5;
+ if (alt->len > 1) va += 5;
}
- if (now->ignore_case == 0) v1 *= 2;
- if (alt->ignore_case == 0) v2 *= 2;
+ if (now->ignore_case == 0) vn *= 2;
+ if (alt->ignore_case == 0) va *= 2;
- if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
+ if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
copy_opt_exact(now, alt);
}
@@ -5354,17 +5012,17 @@ copy_opt_map(OptMap* to, OptMap* from)
}
static void
-add_char_opt_map(OptMap* map, UChar c, OnigEncoding enc)
+add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc)
{
- if (map->map[c] == 0) {
- map->map[c] = 1;
- map->value += map_position_value(enc, c);
+ if (m->map[c] == 0) {
+ m->map[c] = 1;
+ m->value += map_position_value(enc, c);
}
}
static int
add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end,
- OnigEncoding enc, OnigCaseFoldType case_fold_flag)
+ OnigEncoding enc, OnigCaseFoldType fold_flag)
{
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
@@ -5372,8 +5030,8 @@ add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end,
add_char_opt_map(map, p[0], enc);
- case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
+ fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(fold_flag);
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, fold_flag, p, end, items);
if (n < 0) return n;
for (i = 0; i < n; i++) {
@@ -5389,7 +5047,7 @@ select_opt_map(OptMap* now, OptMap* alt)
{
static int z = 1<<15; /* 32768: something big value */
- int v1, v2;
+ int vn, va;
if (alt->value == 0) return ;
if (now->value == 0) {
@@ -5397,9 +5055,9 @@ select_opt_map(OptMap* now, OptMap* alt)
return ;
}
- v1 = z / now->value;
- v2 = z / alt->value;
- if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
+ vn = z / now->value;
+ va = z / alt->value;
+ if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
copy_opt_map(now, alt);
}
@@ -5407,13 +5065,13 @@ static int
comp_opt_exact_or_map(OptExact* e, OptMap* m)
{
#define COMP_EM_BASE 20
- int ve, vm;
+ int ae, am;
if (m->value <= 0) return -1;
- ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
- vm = COMP_EM_BASE * 5 * 2 / m->value;
- return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
+ ae = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
+ am = COMP_EM_BASE * 5 * 2 / m->value;
+ return comp_distance_value(&e->mmd, &m->mmd, ae, am);
}
static void
@@ -5444,11 +5102,11 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)
}
static void
-set_bound_node_opt_info(NodeOpt* opt, MinMaxLen* mmd)
+set_bound_node_opt_info(NodeOpt* opt, MinMax* plen)
{
- copy_mml(&(opt->exb.mmd), mmd);
- copy_mml(&(opt->expr.mmd), mmd);
- copy_mml(&(opt->map.mmd), mmd);
+ copy_mml(&(opt->exb.mmd), plen);
+ copy_mml(&(opt->expr.mmd), plen);
+ copy_mml(&(opt->map.mmd), plen);
}
static void
@@ -5543,10 +5201,12 @@ alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env)
static int
optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
{
- OnigEncoding enc;
int i;
- int r = 0;
+ int r;
+ NodeOpt xo;
+ OnigEncoding enc;
+ r = 0;
enc = env->enc;
clear_node_opt_info(opt);
set_bound_node_opt_info(opt, &env->mmd);
@@ -5555,15 +5215,14 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
case NODE_LIST:
{
OptEnv nenv;
- NodeOpt nopt;
Node* nd = node;
copy_opt_env(&nenv, env);
do {
- r = optimize_nodes(NODE_CAR(nd), &nopt, &nenv);
+ r = optimize_nodes(NODE_CAR(nd), &xo, &nenv);
if (r == 0) {
- add_mml(&nenv.mmd, &nopt.len);
- concat_left_node_opt_info(enc, opt, &nopt);
+ add_mml(&nenv.mmd, &xo.len);
+ concat_left_node_opt_info(enc, opt, &xo);
}
} while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd)));
}
@@ -5571,14 +5230,13 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
case NODE_ALT:
{
- NodeOpt nopt;
Node* nd = node;
do {
- r = optimize_nodes(NODE_CAR(nd), &nopt, env);
+ r = optimize_nodes(NODE_CAR(nd), &xo, env);
if (r == 0) {
- if (nd == node) copy_node_opt_info(opt, &nopt);
- else alt_merge_node_opt_info(opt, &nopt, env);
+ if (nd == node) copy_node_opt_info(opt, &xo);
+ else alt_merge_node_opt_info(opt, &xo, env);
}
} while ((r == 0) && IS_NOT_NULL(nd = NODE_CDR(nd)));
}
@@ -5588,11 +5246,10 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
{
StrNode* sn = STR_(node);
int slen = (int )(sn->end - sn->s);
- int is_raw = NODE_STRING_IS_RAW(node);
+ /* int is_raw = NODE_STRING_IS_RAW(node); */
if (! NODE_STRING_IS_AMBIG(node)) {
- concat_opt_exact_str(&opt->exb, sn->s, sn->end,
- NODE_STRING_IS_RAW(node), enc);
+ concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);
if (slen > 0) {
add_char_opt_map(&opt->map, *(sn->s), enc);
}
@@ -5606,7 +5263,7 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;
}
else {
- concat_opt_exact_str(&opt->exb, sn->s, sn->end, is_raw, enc);
+ concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);
opt->exb.ignore_case = 1;
if (slen > 0) {
@@ -5709,19 +5366,17 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
case ANCHOR_PREC_READ:
{
- NodeOpt nopt;
-
- r = optimize_nodes(NODE_BODY(node), &nopt, env);
+ r = optimize_nodes(NODE_BODY(node), &xo, env);
if (r == 0) {
- if (nopt.exb.len > 0)
- copy_opt_exact(&opt->expr, &nopt.exb);
- else if (nopt.exm.len > 0)
- copy_opt_exact(&opt->expr, &nopt.exm);
+ if (xo.exb.len > 0)
+ copy_opt_exact(&opt->expr, &xo.exb);
+ else if (xo.exm.len > 0)
+ copy_opt_exact(&opt->expr, &xo.exm);
opt->expr.reach_end = 0;
- if (nopt.map.value > 0)
- copy_opt_map(&opt->map, &nopt.map);
+ if (xo.map.value > 0)
+ copy_opt_map(&opt->map, &xo.map);
}
}
break;
@@ -5771,48 +5426,47 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
case NODE_QUANT:
{
OnigLen min, max;
- NodeOpt nopt;
QuantNode* qn = QUANT_(node);
- r = optimize_nodes(NODE_BODY(node), &nopt, env);
+ r = optimize_nodes(NODE_BODY(node), &xo, env);
if (r != 0) break;
- if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
+ if (qn->lower > 0) {
+ copy_node_opt_info(opt, &xo);
+ if (xo.exb.len > 0) {
+ if (xo.exb.reach_end) {
+ for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) {
+ int rc = concat_opt_exact(&opt->exb, &xo.exb, enc);
+ if (rc > 0) break;
+ }
+ if (i < qn->lower) opt->exb.reach_end = 0;
+ }
+ }
+
+ if (qn->lower != qn->upper) {
+ opt->exb.reach_end = 0;
+ opt->exm.reach_end = 0;
+ }
+ if (qn->lower > 1)
+ opt->exm.reach_end = 0;
+ }
+
+ if (IS_REPEAT_INFINITE(qn->upper)) {
if (env->mmd.max == 0 &&
NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {
if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))
- add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
+ add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_ML);
else
- add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
+ add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF);
}
+
+ max = (xo.len.max > 0 ? INFINITE_LEN : 0);
}
else {
- if (qn->lower > 0) {
- copy_node_opt_info(opt, &nopt);
- if (nopt.exb.len > 0) {
- if (nopt.exb.reach_end) {
- for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) {
- concat_opt_exact(&opt->exb, &nopt.exb, enc);
- }
- if (i < qn->lower) opt->exb.reach_end = 0;
- }
- }
-
- if (qn->lower != qn->upper) {
- opt->exb.reach_end = 0;
- opt->exm.reach_end = 0;
- }
- if (qn->lower > 1)
- opt->exm.reach_end = 0;
- }
+ max = distance_multiply(xo.len.max, qn->upper);
}
- min = distance_multiply(nopt.len.min, qn->lower);
- if (IS_REPEAT_INFINITE(qn->upper))
- max = (nopt.len.max > 0 ? INFINITE_LEN : 0);
- else
- max = distance_multiply(nopt.len.max, qn->upper);
-
+ min = distance_multiply(xo.len.min, qn->lower);
set_mml(&opt->len, min, max);
}
break;
@@ -5848,9 +5502,9 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
#endif
{
r = optimize_nodes(NODE_BODY(node), opt, env);
- if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
+ if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK)) {
if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum))
- remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
+ remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK);
}
}
break;
@@ -5862,24 +5516,23 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
case ENCLOSURE_IF_ELSE:
{
OptEnv nenv;
- NodeOpt nopt;
copy_opt_env(&nenv, env);
- r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &nopt, &nenv);
+ r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &xo, &nenv);
if (r == 0) {
- add_mml(&nenv.mmd, &nopt.len);
- concat_left_node_opt_info(enc, opt, &nopt);
+ add_mml(&nenv.mmd, &xo.len);
+ concat_left_node_opt_info(enc, opt, &xo);
if (IS_NOT_NULL(en->te.Then)) {
- r = optimize_nodes(en->te.Then, &nopt, &nenv);
+ r = optimize_nodes(en->te.Then, &xo, &nenv);
if (r == 0) {
- concat_left_node_opt_info(enc, opt, &nopt);
+ concat_left_node_opt_info(enc, opt, &xo);
}
}
if (IS_NOT_NULL(en->te.Else)) {
- r = optimize_nodes(en->te.Else, &nopt, env);
+ r = optimize_nodes(en->te.Else, &xo, env);
if (r == 0)
- alt_merge_node_opt_info(opt, &nopt, env);
+ alt_merge_node_opt_info(opt, &xo, env);
}
}
}
@@ -5914,12 +5567,12 @@ set_optimize_exact(regex_t* reg, OptExact* e)
CHECK_NULL_RETURN_MEMERR(reg->exact);
xmemcpy(reg->exact, e->s, e->len);
reg->exact_end = reg->exact + e->len;
- reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
+ reg->optimize = OPTIMIZE_EXACT_IC;
}
else {
int allow_reverse;
- reg->exact = str_dup(e->s, e->s + e->len);
+ reg->exact = onigenc_strdup(reg->enc, e->s, e->s + e->len);
CHECK_NULL_RETURN_MEMERR(reg->exact);
reg->exact_end = reg->exact + e->len;
@@ -5932,10 +5585,10 @@ set_optimize_exact(regex_t* reg, OptExact* e)
if (r != 0) return r;
reg->optimize = (allow_reverse != 0
- ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
+ ? OPTIMIZE_EXACT_BM : OPTIMIZE_EXACT_BM_NO_REV);
}
else {
- reg->optimize = ONIG_OPTIMIZE_EXACT;
+ reg->optimize = OPTIMIZE_EXACT;
}
}
@@ -5957,7 +5610,7 @@ set_optimize_map(regex_t* reg, OptMap* m)
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
reg->map[i] = m->map[i];
- reg->optimize = ONIG_OPTIMIZE_MAP;
+ reg->optimize = OPTIMIZE_MAP;
reg->dmin = m->mmd.min;
reg->dmax = m->mmd.max;
@@ -5994,11 +5647,11 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
if (r != 0) return r;
reg->anchor = opt.anc.left & (ANCHOR_BEGIN_BUF |
- ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
+ ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML |
ANCHOR_LOOK_BEHIND);
if ((opt.anc.left & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
- reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
+ reg->anchor &= ~ANCHOR_ANYCHAR_INF_ML;
reg->anchor |= opt.anc.right & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
ANCHOR_PREC_READ_NOT);
@@ -6038,7 +5691,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
static void
clear_optimize_info(regex_t* reg)
{
- reg->optimize = ONIG_OPTIMIZE_NONE;
+ reg->optimize = OPTIMIZE_NONE;
reg->anchor = 0;
reg->anchor_dmin = 0;
reg->anchor_dmax = 0;
@@ -6141,14 +5794,14 @@ print_anchor(FILE* f, int anchor)
q = 1;
fprintf(f, "end-line");
}
- if (anchor & ANCHOR_ANYCHAR_STAR) {
+ if (anchor & ANCHOR_ANYCHAR_INF) {
if (q) fprintf(f, ", ");
q = 1;
- fprintf(f, "anychar-star");
+ fprintf(f, "anychar-inf");
}
- if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
+ if (anchor & ANCHOR_ANYCHAR_INF_ML) {
if (q) fprintf(f, ", ");
- fprintf(f, "anychar-star-ml");
+ fprintf(f, "anychar-inf-ml");
}
fprintf(f, "]");
@@ -6180,7 +5833,7 @@ print_optimize_info(FILE* f, regex_t* reg)
}
fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
}
- else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
+ else if (reg->optimize & OPTIMIZE_MAP) {
int c, i, n = 0;
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
@@ -6208,6 +5861,66 @@ print_optimize_info(FILE* f, regex_t* reg)
#endif
+extern RegexExt*
+onig_get_regex_ext(regex_t* reg)
+{
+ if (IS_NULL(REG_EXTP(reg))) {
+ RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext));
+ if (IS_NULL(ext)) return 0;
+
+ ext->pattern = 0;
+ ext->pattern_end = 0;
+#ifdef USE_CALLOUT
+ ext->tag_table = 0;
+ ext->callout_num = 0;
+ ext->callout_list_alloc = 0;
+ ext->callout_list = 0;
+#endif
+
+ REG_EXTPL(reg) = (void* )ext;
+ }
+
+ return REG_EXTP(reg);
+}
+
+static void
+free_regex_ext(RegexExt* ext)
+{
+ if (IS_NOT_NULL(ext)) {
+ if (IS_NOT_NULL(ext->pattern))
+ xfree((void* )ext->pattern);
+
+#ifdef USE_CALLOUT
+ if (IS_NOT_NULL(ext->tag_table))
+ onig_callout_tag_table_free(ext->tag_table);
+
+ if (IS_NOT_NULL(ext->callout_list))
+ onig_free_reg_callout_list(ext->callout_num, ext->callout_list);
+#endif
+
+ xfree(ext);
+ }
+}
+
+extern int
+onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end)
+{
+ RegexExt* ext;
+ UChar* s;
+
+ ext = onig_get_regex_ext(reg);
+ CHECK_NULL_RETURN_MEMERR(ext);
+
+ s = onigenc_strdup(reg->enc, pattern, pattern_end);
+ CHECK_NULL_RETURN_MEMERR(s);
+
+ ext->pattern = s;
+ ext->pattern_end = s + (pattern_end - pattern);
+
+ return ONIG_NORMAL;
+}
+
+
extern void
onig_free_body(regex_t* reg)
{
@@ -6217,7 +5930,10 @@ onig_free_body(regex_t* reg)
if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
- if (IS_NOT_NULL(REG_EXTP(reg))) xfree(REG_EXTP(reg));
+ if (IS_NOT_NULL(REG_EXTP(reg))) {
+ free_regex_ext(REG_EXTP(reg));
+ REG_EXTPL(reg) = 0;
+ }
onig_names_free(reg);
}
@@ -6245,9 +5961,6 @@ onig_transfer(regex_t* to, regex_t* from)
}
-#ifdef ONIG_DEBUG_COMPILE
-static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
-#endif
#ifdef ONIG_DEBUG_PARSE
static void print_tree P_((FILE* f, Node* node));
#endif
@@ -6286,9 +5999,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->num_null_check = 0;
reg->repeat_range_alloc = 0;
reg->repeat_range = (OnigRepeatRange* )NULL;
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- reg->num_comb_exp_check = 0;
-#endif
r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);
if (r != 0) goto err;
@@ -6346,33 +6056,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
}
reg->bt_mem_start |= reg->bt_mem_end;
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- if (scan_env.backrefed_mem == 0
-#ifdef USE_CALL
- || scan_env.num_call == 0
-#endif
- ) {
- setup_comb_exp_check(root, 0, &scan_env);
-#ifdef USE_CALL
- if (scan_env.has_recursion != 0) {
- scan_env.num_comb_exp_check = 0;
- }
- else
-#endif
- if (scan_env.comb_exp_max_regnum > 0) {
- int i;
- for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
- if (MEM_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
- scan_env.num_comb_exp_check = 0;
- break;
- }
- }
- }
- }
-
- reg->num_comb_exp_check = scan_env.num_comb_exp_check;
-#endif
-
clear_optimize_info(reg);
#ifndef ONIG_DONT_OPTIMIZE
r = set_optimize_info_from_tree(root, reg, &scan_env);
@@ -6398,13 +6081,17 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
r = add_opcode(reg, OP_END);
#ifdef USE_CALL
if (scan_env.num_call > 0) {
- r = unset_addr_list_fix(&uslist, reg);
+ r = fix_unset_addr_list(&uslist, reg);
unset_addr_list_end(&uslist);
if (r != 0) goto err;
}
#endif
- if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
+ if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)
+#ifdef USE_CALLOUT
+ || (IS_NOT_NULL(REG_EXTP(reg)) && REG_EXTP(reg)->callout_num != 0)
+#endif
+ )
reg->stack_pop_level = STACK_POP_LEVEL_ALL;
else {
if (reg->bt_mem_start != 0)
@@ -6422,7 +6109,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
#ifdef ONIG_DEBUG_COMPILE
onig_print_names(stderr, reg);
- print_compiled_byte_code_list(stderr, reg);
+ onig_print_compiled_byte_code_list(stderr, reg);
#endif
end:
@@ -6464,11 +6151,7 @@ onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_fl
#if 0
return ONIGERR_LIBRARY_IS_NOT_INITIALIZED;
#else
- r = onig_initialize(NULL, 0);
- if (r != 0)
- return ONIGERR_FAIL_TO_INITIALIZE;
-
- r = onig_initialize_encoding(enc);
+ r = onig_initialize(&enc, 1);
if (r != 0)
return ONIGERR_FAIL_TO_INITIALIZE;
@@ -6569,16 +6252,21 @@ onig_initialize(OnigEncoding encodings[], int n)
return r;
}
- return 0;
+ return ONIG_NORMAL;
}
-static OnigEndCallListItemType* EndCallTop;
+typedef struct EndCallListItem {
+ struct EndCallListItem* next;
+ void (*func)(void);
+} EndCallListItemType;
+
+static EndCallListItemType* EndCallTop;
extern void onig_add_end_call(void (*func)(void))
{
- OnigEndCallListItemType* item;
+ EndCallListItemType* item;
- item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
+ item = (EndCallListItemType* )xmalloc(sizeof(*item));
if (item == 0) return ;
item->next = EndCallTop;
@@ -6590,7 +6278,7 @@ extern void onig_add_end_call(void (*func)(void))
static void
exec_end_call_list(void)
{
- OnigEndCallListItemType* prev;
+ EndCallListItemType* prev;
void (*func)(void);
while (EndCallTop != 0) {
@@ -6608,6 +6296,12 @@ onig_end(void)
{
exec_end_call_list();
+#ifdef USE_CALLOUT
+ onig_global_callout_names_free();
+#endif
+
+ onigenc_end();
+
onig_inited = 0;
return 0;
@@ -6673,144 +6367,7 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
}
-#ifdef ONIG_DEBUG
-
-/* arguments type */
-#define ARG_SPECIAL -1
-#define ARG_NON 0
-#define ARG_RELADDR 1
-#define ARG_ABSADDR 2
-#define ARG_LENGTH 3
-#define ARG_MEMNUM 4
-#define ARG_OPTION 5
-#define ARG_STATE_CHECK 6
-#define ARG_MODE 7
-
-OnigOpInfoType OnigOpInfo[] = {
- { OP_FINISH, "finish", ARG_NON },
- { OP_END, "end", ARG_NON },
- { OP_EXACT1, "exact1", ARG_SPECIAL },
- { OP_EXACT2, "exact2", ARG_SPECIAL },
- { OP_EXACT3, "exact3", ARG_SPECIAL },
- { OP_EXACT4, "exact4", ARG_SPECIAL },
- { OP_EXACT5, "exact5", ARG_SPECIAL },
- { OP_EXACTN, "exactn", ARG_SPECIAL },
- { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
- { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
- { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
- { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
- { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
- { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
- { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
- { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
- { OP_CCLASS, "cclass", ARG_SPECIAL },
- { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
- { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
- { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
- { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
- { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
-#ifdef USE_OP_CCLASS_NODE
- { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
-#endif
- { OP_ANYCHAR, "anychar", ARG_NON },
- { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
- { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
- { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
- { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
- { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
- { OP_WORD, "word", ARG_NON },
- { OP_WORD_ASCII, "word-ascii", ARG_NON },
- { OP_NO_WORD, "not-word", ARG_NON },
- { OP_NO_WORD_ASCII, "not-word-ascii", ARG_NON },
- { OP_WORD_BOUNDARY, "word-boundary", ARG_MODE },
- { OP_NO_WORD_BOUNDARY, "not-word-boundary", ARG_MODE },
- { OP_WORD_BEGIN, "word-begin", ARG_MODE },
- { OP_WORD_END, "word-end", ARG_MODE },
- { OP_BEGIN_BUF, "begin-buf", ARG_NON },
- { OP_END_BUF, "end-buf", ARG_NON },
- { OP_BEGIN_LINE, "begin-line", ARG_NON },
- { OP_END_LINE, "end-line", ARG_NON },
- { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
- { OP_BEGIN_POSITION, "begin-position", ARG_NON },
- { OP_BACKREF1, "backref1", ARG_NON },
- { OP_BACKREF2, "backref2", ARG_NON },
- { OP_BACKREF_N, "backref-n", ARG_MEMNUM },
- { OP_BACKREF_N_IC, "backref-n-ic", ARG_SPECIAL },
- { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
- { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
- { OP_BACKREF_WITH_LEVEL, "backref_with_level", ARG_SPECIAL },
- { OP_BACKREF_CHECK, "backref_check", ARG_SPECIAL },
- { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level", ARG_SPECIAL },
- { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
- { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
- { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
- { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
- { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
- { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
- { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
- { OP_SET_OPTION, "set-option", ARG_OPTION },
- { OP_FAIL, "fail", ARG_NON },
- { OP_JUMP, "jump", ARG_RELADDR },
- { OP_PUSH, "push", ARG_RELADDR },
- { OP_PUSH_SUPER, "push_SUPER", ARG_RELADDR },
- { OP_POP, "pop", ARG_NON },
- { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
- { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
- { OP_REPEAT, "repeat", ARG_SPECIAL },
- { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
- { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
- { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
- { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
- { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
- { OP_EMPTY_CHECK_START, "empty-check-start", ARG_MEMNUM },
- { OP_EMPTY_CHECK_END, "empty-check-end", ARG_MEMNUM },
- { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM },
- { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM },
- { OP_PREC_READ_START, "push-pos", ARG_NON },
- { OP_PREC_READ_END, "pop-pos", ARG_NON },
- { OP_PREC_READ_NOT_START, "prec-read-not-start", ARG_RELADDR },
- { OP_PREC_READ_NOT_END, "prec-read-not-end", ARG_NON },
- { OP_ATOMIC_START, "atomic-start", ARG_NON },
- { OP_ATOMIC_END, "atomic-end", ARG_NON },
- { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
- { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start", ARG_SPECIAL },
- { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end", ARG_NON },
- { OP_CALL, "call", ARG_ABSADDR },
- { OP_RETURN, "return", ARG_NON },
- { OP_PUSH_SAVE_VAL, "push-save-val", ARG_SPECIAL },
- { OP_UPDATE_VAR, "update-var", ARG_SPECIAL },
- { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
- { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
- { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
- { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
- { OP_STATE_CHECK_ANYCHAR_ML_STAR,
- "state-check-anychar-ml*", ARG_STATE_CHECK },
- { -1, "", ARG_NON }
-};
-
-static char*
-op2name(int opcode)
-{
- int i;
-
- for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
- if (opcode == OnigOpInfo[i].opcode)
- return OnigOpInfo[i].name;
- }
- return "";
-}
-
-static int
-op2arg_type(int opcode)
-{
- int i;
-
- for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
- if (opcode == OnigOpInfo[i].opcode)
- return OnigOpInfo[i].arg_type;
- }
- return ARG_SPECIAL;
-}
+#ifdef ONIG_DEBUG_PARSE
static void
p_string(FILE* f, int len, UChar* s)
@@ -6820,326 +6377,6 @@ p_string(FILE* f, int len, UChar* s)
}
static void
-p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
-{
- int x = len * mb_len;
-
- fprintf(f, ":%d:", len);
- while (x-- > 0) { fputc(*s++, f); }
-}
-
-static void
-p_rel_addr(FILE* f, RelAddrType rel_addr, UChar* p, UChar* start)
-{
- RelAddrType curr = (RelAddrType )(p - start);
-
- fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr);
-}
-
-extern void
-onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
- OnigEncoding enc)
-{
- int i, n, arg_type;
- RelAddrType addr;
- LengthType len;
- MemNumType mem;
- StateCheckNumType scn;
- OnigCodePoint code;
- OnigOptionType option;
- ModeType mode;
- UChar *q;
-
- fprintf(f, "%s", op2name(*bp));
- arg_type = op2arg_type(*bp);
- if (arg_type != ARG_SPECIAL) {
- bp++;
- switch (arg_type) {
- case ARG_NON:
- break;
- case ARG_RELADDR:
- GET_RELADDR_INC(addr, bp);
- fputc(':', f);
- p_rel_addr(f, addr, bp, start);
- break;
- case ARG_ABSADDR:
- GET_ABSADDR_INC(addr, bp);
- fprintf(f, ":{/%d}", addr);
- break;
- case ARG_LENGTH:
- GET_LENGTH_INC(len, bp);
- fprintf(f, ":%d", len);
- break;
- case ARG_MEMNUM:
- mem = *((MemNumType* )bp);
- bp += SIZE_MEMNUM;
- fprintf(f, ":%d", mem);
- break;
- case ARG_OPTION:
- {
- OnigOptionType option = *((OnigOptionType* )bp);
- bp += SIZE_OPTION;
- fprintf(f, ":%d", option);
- }
- break;
-
- case ARG_STATE_CHECK:
- scn = *((StateCheckNumType* )bp);
- bp += SIZE_STATE_CHECK_NUM;
- fprintf(f, ":%d", scn);
- break;
-
- case ARG_MODE:
- mode = *((ModeType* )bp);
- bp += SIZE_MODE;
- fprintf(f, ":%d", mode);
- break;
- }
- }
- else {
- switch (*bp++) {
- case OP_EXACT1:
- case OP_ANYCHAR_STAR_PEEK_NEXT:
- case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
- p_string(f, 1, bp++); break;
- case OP_EXACT2:
- p_string(f, 2, bp); bp += 2; break;
- case OP_EXACT3:
- p_string(f, 3, bp); bp += 3; break;
- case OP_EXACT4:
- p_string(f, 4, bp); bp += 4; break;
- case OP_EXACT5:
- p_string(f, 5, bp); bp += 5; break;
- case OP_EXACTN:
- GET_LENGTH_INC(len, bp);
- p_len_string(f, len, 1, bp);
- bp += len;
- break;
-
- case OP_EXACTMB2N1:
- p_string(f, 2, bp); bp += 2; break;
- case OP_EXACTMB2N2:
- p_string(f, 4, bp); bp += 4; break;
- case OP_EXACTMB2N3:
- p_string(f, 6, bp); bp += 6; break;
- case OP_EXACTMB2N:
- GET_LENGTH_INC(len, bp);
- p_len_string(f, len, 2, bp);
- bp += len * 2;
- break;
- case OP_EXACTMB3N:
- GET_LENGTH_INC(len, bp);
- p_len_string(f, len, 3, bp);
- bp += len * 3;
- break;
- case OP_EXACTMBN:
- {
- int mb_len;
-
- GET_LENGTH_INC(mb_len, bp);
- GET_LENGTH_INC(len, bp);
- fprintf(f, ":%d:%d:", mb_len, len);
- n = len * mb_len;
- while (n-- > 0) { fputc(*bp++, f); }
- }
- break;
-
- case OP_EXACT1_IC:
- len = enclen(enc, bp);
- p_string(f, len, bp);
- bp += len;
- break;
- case OP_EXACTN_IC:
- GET_LENGTH_INC(len, bp);
- p_len_string(f, len, 1, bp);
- bp += len;
- break;
-
- case OP_CCLASS:
- n = bitset_on_num((BitSetRef )bp);
- bp += SIZE_BITSET;
- fprintf(f, ":%d", n);
- break;
-
- case OP_CCLASS_NOT:
- n = bitset_on_num((BitSetRef )bp);
- bp += SIZE_BITSET;
- fprintf(f, ":%d", n);
- break;
-
- case OP_CCLASS_MB:
- case OP_CCLASS_MB_NOT:
- GET_LENGTH_INC(len, bp);
- q = bp;
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
- ALIGNMENT_RIGHT(q);
-#endif
- GET_CODE_POINT(code, q);
- bp += len;
- fprintf(f, ":%d:%d", (int )code, len);
- break;
-
- case OP_CCLASS_MIX:
- case OP_CCLASS_MIX_NOT:
- n = bitset_on_num((BitSetRef )bp);
- bp += SIZE_BITSET;
- GET_LENGTH_INC(len, bp);
- q = bp;
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
- ALIGNMENT_RIGHT(q);
-#endif
- GET_CODE_POINT(code, q);
- bp += len;
- fprintf(f, ":%d:%d:%d", n, (int )code, len);
- break;
-
-#ifdef USE_OP_CCLASS_NODE
- case OP_CCLASS_NODE:
- {
- CClassNode *cc;
-
- GET_POINTER_INC(cc, bp);
- n = bitset_on_num(cc->bs);
- fprintf(f, ":%p:%d", cc, n);
- }
- break;
-#endif
-
- case OP_BACKREF_N_IC:
- mem = *((MemNumType* )bp);
- bp += SIZE_MEMNUM;
- fprintf(f, ":%d", mem);
- break;
-
- case OP_BACKREF_MULTI_IC:
- case OP_BACKREF_MULTI:
- case OP_BACKREF_CHECK:
- fputs(" ", f);
- GET_LENGTH_INC(len, bp);
- for (i = 0; i < len; i++) {
- GET_MEMNUM_INC(mem, bp);
- if (i > 0) fputs(", ", f);
- fprintf(f, "%d", mem);
- }
- break;
-
- case OP_BACKREF_WITH_LEVEL:
- GET_OPTION_INC(option, bp);
- fprintf(f, ":%d", option);
- /* fall */
- case OP_BACKREF_CHECK_WITH_LEVEL:
- {
- LengthType level;
-
- GET_LENGTH_INC(level, bp);
- fprintf(f, ":%d", level);
-
- fputs(" ", f);
- GET_LENGTH_INC(len, bp);
- for (i = 0; i < len; i++) {
- GET_MEMNUM_INC(mem, bp);
- if (i > 0) fputs(", ", f);
- fprintf(f, "%d", mem);
- }
- }
- break;
-
- case OP_REPEAT:
- case OP_REPEAT_NG:
- {
- mem = *((MemNumType* )bp);
- bp += SIZE_MEMNUM;
- addr = *((RelAddrType* )bp);
- bp += SIZE_RELADDR;
- fprintf(f, ":%d:%d", mem, addr);
- }
- break;
-
- case OP_PUSH_OR_JUMP_EXACT1:
- case OP_PUSH_IF_PEEK_NEXT:
- addr = *((RelAddrType* )bp);
- bp += SIZE_RELADDR;
- fputc(':', f);
- p_rel_addr(f, addr, bp, start);
- p_string(f, 1, bp);
- bp += 1;
- break;
-
- case OP_LOOK_BEHIND:
- GET_LENGTH_INC(len, bp);
- fprintf(f, ":%d", len);
- break;
-
- case OP_LOOK_BEHIND_NOT_START:
- GET_RELADDR_INC(addr, bp);
- GET_LENGTH_INC(len, bp);
- fprintf(f, ":%d:", len);
- p_rel_addr(f, addr, bp, start);
- break;
-
- case OP_STATE_CHECK_PUSH:
- case OP_STATE_CHECK_PUSH_OR_JUMP:
- scn = *((StateCheckNumType* )bp);
- bp += SIZE_STATE_CHECK_NUM;
- addr = *((RelAddrType* )bp);
- bp += SIZE_RELADDR;
- fprintf(f, ":%d:", scn);
- p_rel_addr(f, addr, bp, start);
- break;
-
- case OP_PUSH_SAVE_VAL:
- {
- SaveType type;
- GET_SAVE_TYPE_INC(type, bp);
- GET_MEMNUM_INC(mem, bp);
- fprintf(f, ":%d:%d", type, mem);
- }
- break;
-
- case OP_UPDATE_VAR:
- {
- UpdateVarType type;
- GET_UPDATE_VAR_TYPE_INC(type, bp);
- GET_MEMNUM_INC(mem, bp);
- fprintf(f, ":%d:%d", type, mem);
- }
- break;
-
- default:
- fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp);
- }
- }
- if (nextp) *nextp = bp;
-}
-#endif /* ONIG_DEBUG */
-
-#ifdef ONIG_DEBUG_COMPILE
-static void
-print_compiled_byte_code_list(FILE* f, regex_t* reg)
-{
- UChar* bp;
- UChar* start = reg->p;
- UChar* end = reg->p + reg->used;
-
- fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n",
- reg->bt_mem_start, reg->bt_mem_end);
- fprintf(f, "code-length: %d\n", reg->used);
-
- bp = start;
- while (bp < end) {
- int pos = bp - start;
-
- fprintf(f, "%4d: ", pos);
- onig_print_compiled_byte_code(f, bp, &bp, start, reg->enc);
- fprintf(f, "\n");
- }
- fprintf(f, "\n");
-}
-#endif
-
-#ifdef ONIG_DEBUG_PARSE
-
-static void
Indent(FILE* f, int indent)
{
int i;
@@ -7334,6 +6571,17 @@ print_indent_tree(FILE* f, Node* node, int indent)
case GIMMICK_UPDATE_VAR:
fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
break;
+#ifdef USE_CALLOUT
+ case GIMMICK_CALLOUT:
+ switch (GIMMICK_(node)->detail_type) {
+ case ONIG_CALLOUT_OF_CONTENTS:
+ fprintf(f, "callout:contents:%d", GIMMICK_(node)->num);
+ break;
+ case ONIG_CALLOUT_OF_NAME:
+ fprintf(f, "callout:name:%d:%d", GIMMICK_(node)->id, GIMMICK_(node)->num);
+ break;
+ }
+#endif
}
break;
diff --git a/src/regenc.c b/src/regenc.c
index 7ded5a8..21f3536 100644
--- a/src/regenc.c
+++ b/src/regenc.c
@@ -2,7 +2,7 @@
regenc.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,66 @@
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
+#define INITED_LIST_SIZE 20
+
+static int InitedListNum;
+
+static struct {
+ OnigEncoding enc;
+ int inited;
+} InitedList[INITED_LIST_SIZE];
+
+static int
+enc_inited_entry(OnigEncoding enc)
+{
+ int i;
+
+ for (i = 0; i < InitedListNum; i++) {
+ if (InitedList[i].enc == enc) {
+ InitedList[i].inited = 1;
+ return i;
+ }
+ }
+
+ i = InitedListNum;
+ if (i < INITED_LIST_SIZE - 1) {
+ InitedList[i].enc = enc;
+ InitedList[i].inited = 1;
+ InitedListNum++;
+ return i;
+ }
+
+ return -1;
+}
+
+static int
+enc_is_inited(OnigEncoding enc)
+{
+ int i;
+
+ for (i = 0; i < InitedListNum; i++) {
+ if (InitedList[i].enc == enc) {
+ return InitedList[i].inited;
+ }
+ }
+
+ return 0;
+}
+
+extern int
+onigenc_end(void)
+{
+ int i;
+
+ for (i = 0; i < InitedListNum; i++) {
+ InitedList[i].enc = 0;
+ InitedList[i].inited = 0;
+ }
+
+ InitedListNum = 0;
+ return ONIG_NORMAL;
+}
+
extern int
onigenc_init(void)
{
@@ -40,8 +100,23 @@ onigenc_init(void)
extern int
onig_initialize_encoding(OnigEncoding enc)
{
- if (enc->init != 0 && (enc->is_initialized() == 0)) {
- int r = (enc->init)();
+ int r;
+
+ if (enc != ONIG_ENCODING_ASCII &&
+ ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
+ OnigEncoding ascii = ONIG_ENCODING_ASCII;
+ if (ascii->init != 0 && enc_is_inited(ascii) == 0) {
+ r = ascii->init();
+ if (r != ONIG_NORMAL) return r;
+ enc_inited_entry(ascii);
+ }
+ }
+
+ if (enc->init != 0 &&
+ enc_is_inited(enc) == 0) {
+ r = (enc->init)();
+ if (r == ONIG_NORMAL)
+ enc_inited_entry(enc);
return r;
}
@@ -62,6 +137,25 @@ onigenc_set_default_encoding(OnigEncoding enc)
}
extern UChar*
+onigenc_strdup(OnigEncoding enc, const UChar* s, const UChar* end)
+{
+ int slen, term_len, i;
+ UChar *r;
+
+ slen = (int )(end - s);
+ term_len = ONIGENC_MBC_MINLEN(enc);
+
+ r = (UChar* )xmalloc(slen + term_len);
+ CHECK_NULL_RETURN(r);
+ xmemcpy(r, s, slen);
+
+ for (i = 0; i < term_len; i++)
+ r[slen + i] = (UChar )0;
+
+ return r;
+}
+
+extern UChar*
onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
diff --git a/src/regenc.h b/src/regenc.h
index cda3bcd..4dd89ba 100644
--- a/src/regenc.h
+++ b/src/regenc.h
@@ -4,7 +4,7 @@
regenc.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,11 +29,12 @@
* SUCH DAMAGE.
*/
-#ifndef PACKAGE
-/* PACKAGE is defined in config.h */
-#include "config.h"
+#ifndef ONIGURUMA_EXPORT
+#define ONIGURUMA_EXPORT
#endif
+#include "config.h"
+
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
#undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
@@ -118,51 +119,53 @@ struct PropertyNameCtype {
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
+
/* for encoding system implementation (internal) */
-ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
-ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
-ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
-ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
-ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]));
-ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
+extern int onigenc_end(void);
+extern int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
+extern int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
+extern int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
+extern int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
+extern int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]));
+extern int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
/* methods for single byte encoding */
-ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
-ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p));
-ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end));
-ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
-ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
-ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));
-ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
-ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
-ONIG_EXTERN int onigenc_always_true_is_valid_mbc_string P_((const UChar* s, const UChar* end));
-ONIG_EXTERN int onigenc_length_check_is_valid_mbc_string P_((OnigEncoding enc, const UChar* s, const UChar* end));
+extern int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
+extern int onigenc_single_byte_mbc_enc_len P_((const UChar* p));
+extern OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end));
+extern int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
+extern int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
+extern UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));
+extern int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
+extern int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
+extern int onigenc_always_true_is_valid_mbc_string P_((const UChar* s, const UChar* end));
+extern int onigenc_length_check_is_valid_mbc_string P_((OnigEncoding enc, const UChar* s, const UChar* end));
/* methods for multi byte encoding */
-ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
-ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
-ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
-ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
-ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
-ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
-ONIG_EXTERN int onigenc_is_mbc_word_ascii P_((OnigEncoding enc, UChar* s, const UChar* end));
-ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
-ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
-ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
-ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
-ONIG_EXTERN struct PropertyNameCtype* euc_jp_lookup_property_name P_((register const char *str, register unsigned int len));
-ONIG_EXTERN struct PropertyNameCtype* sjis_lookup_property_name P_((register const char *str, register unsigned int len));
-//ONIG_EXTERN const struct PropertyNameCtype* unicode_lookup_property_name P_((register const char *str, register unsigned int len));
+extern OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
+extern int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
+extern int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
+extern int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
+extern int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
+extern int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
+extern int onigenc_is_mbc_word_ascii P_((OnigEncoding enc, UChar* s, const UChar* end));
+extern int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
+extern int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
+extern int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
+extern int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
+extern struct PropertyNameCtype* euc_jp_lookup_property_name P_((register const char *str, register unsigned int len));
+extern struct PropertyNameCtype* sjis_lookup_property_name P_((register const char *str, register unsigned int len));
+//extern const struct PropertyNameCtype* unicode_lookup_property_name P_((register const char *str, register unsigned int len));
/* in enc/unicode.c */
-ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
-ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));
-ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((OnigCtype ctype, const OnigCodePoint* ranges[]));
-ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
-ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
-ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
-ONIG_EXTERN int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end));
+extern int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
+extern int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));
+extern int onigenc_unicode_ctype_code_range P_((OnigCtype ctype, const OnigCodePoint* ranges[]));
+extern int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
+extern int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
+extern int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
+extern int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end));
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
@@ -218,21 +221,21 @@ extern int onig_codes_byte_at(OnigCodePoint code[], int at);
#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
OnigEncISO_8859_1_ToUpperCaseTable[c]
-ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
-ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
+extern const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
+extern const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
-ONIG_EXTERN int
+extern int
onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
-ONIG_EXTERN UChar*
+extern UChar*
onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));
/* defined in regexec.c, but used in enc/xxx.c */
extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
-ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
-ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[];
-ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[];
-ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
+extern OnigEncoding OnigEncDefaultCharEncoding;
+extern const UChar OnigEncAsciiToLowerCaseTable[];
+extern const UChar OnigEncAsciiToUpperCaseTable[];
+extern const unsigned short OnigEncAsciiCtypeTable[];
#define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80)
@@ -249,4 +252,6 @@ ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
#define ONIGENC_IS_UNICODE_ENCODING(enc) \
((enc)->is_code_ctype == onigenc_unicode_is_code_ctype)
+#define ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc) ((enc)->min_enc_len == 1)
+
#endif /* REGENC_H */
diff --git a/src/regerror.c b/src/regerror.c
index e7d2570..70efe9a 100644
--- a/src/regerror.c
+++ b/src/regerror.c
@@ -2,7 +2,7 @@
regerror.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -43,19 +43,21 @@ onig_error_code_to_format(int code)
{
char *p;
- if (code >= 0) return (UChar* )0;
-
switch (code) {
case ONIG_MISMATCH:
p = "mismatch"; break;
case ONIG_NO_SUPPORT_CONFIG:
p = "no support in this configuration"; break;
+ case ONIG_ABORT:
+ p = "abort"; break;
case ONIGERR_MEMORY:
p = "fail to memory allocation"; break;
case ONIGERR_MATCH_STACK_LIMIT_OVER:
p = "match-stack limit over"; break;
case ONIGERR_PARSE_DEPTH_LIMIT_OVER:
p = "parse depth limit over"; break;
+ case ONIGERR_RETRY_LIMIT_IN_MATCH_OVER:
+ p = "retry-limit-in-match over"; break;
case ONIGERR_TYPE_BUG:
p = "undefined type (bug)"; break;
case ONIGERR_PARSER_BUG:
@@ -172,6 +174,18 @@ onig_error_code_to_format(int code)
p = "invalid absent group pattern"; break;
case ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN:
p = "invalid absent group generator pattern"; break;
+ case ONIGERR_INVALID_CALLOUT_PATTERN:
+ p = "invalid callout pattern"; break;
+ case ONIGERR_INVALID_CALLOUT_NAME:
+ p = "invalid callout name"; break;
+ case ONIGERR_UNDEFINED_CALLOUT_NAME:
+ p = "undefined callout name"; break;
+ case ONIGERR_INVALID_CALLOUT_BODY:
+ p = "invalid callout body"; break;
+ case ONIGERR_INVALID_CALLOUT_TAG_NAME:
+ p = "invalid callout tag name"; break;
+ case ONIGERR_INVALID_CALLOUT_ARG:
+ p = "invalid callout arg"; break;
case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
p = "not supported encoding combination"; break;
case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
diff --git a/src/regexec.c b/src/regexec.c
index 53f42ee..35e3698 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -2,7 +2,7 @@
regexec.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,11 +26,8 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
-
#include "regint.h"
-#define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
-
#define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \
((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end))
@@ -40,6 +37,565 @@
ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
#endif
+#define CHECK_INTERRUPT_IN_MATCH
+
+#ifdef USE_CALLOUT
+typedef struct {
+ int last_match_at_call_counter;
+ struct {
+ OnigType type;
+ OnigValue val;
+ } slot[ONIG_CALLOUT_DATA_SLOT_NUM];
+} CalloutData;
+#endif
+
+struct OnigMatchParamStruct {
+ unsigned int match_stack_limit;
+ unsigned long retry_limit_in_match;
+ OnigCalloutFunc progress_callout_of_contents;
+ OnigCalloutFunc retraction_callout_of_contents;
+#ifdef USE_CALLOUT
+ int match_at_call_counter;
+ void* callout_user_data;
+ CalloutData* callout_data;
+ int callout_data_alloc_num;
+#endif
+};
+
+extern int
+onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* param,
+ unsigned int limit)
+{
+ param->match_stack_limit = limit;
+ return ONIG_NORMAL;
+}
+
+extern int
+onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param,
+ unsigned long limit)
+{
+ param->retry_limit_in_match = limit;
+ return ONIG_NORMAL;
+}
+
+extern int
+onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
+{
+ param->progress_callout_of_contents = f;
+ return ONIG_NORMAL;
+}
+
+extern int
+onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
+{
+ param->retraction_callout_of_contents = f;
+ return ONIG_NORMAL;
+}
+
+
+
+typedef struct {
+ void* stack_p;
+ int stack_n;
+ OnigOptionType options;
+ OnigRegion* region;
+ int ptr_num;
+ const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
+ unsigned int match_stack_limit;
+ unsigned long retry_limit_in_match;
+ OnigMatchParam* mp;
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ int best_len; /* for ONIG_OPTION_FIND_LONGEST */
+ UChar* best_s;
+#endif
+} MatchArg;
+
+
+#ifdef ONIG_DEBUG
+
+/* arguments type */
+#define ARG_SPECIAL -1
+#define ARG_NON 0
+#define ARG_RELADDR 1
+#define ARG_ABSADDR 2
+#define ARG_LENGTH 3
+#define ARG_MEMNUM 4
+#define ARG_OPTION 5
+#define ARG_MODE 6
+
+typedef struct {
+ short int opcode;
+ char* name;
+ short int arg_type;
+} OpInfoType;
+
+static OpInfoType OpInfo[] = {
+ { OP_FINISH, "finish", ARG_NON },
+ { OP_END, "end", ARG_NON },
+ { OP_EXACT1, "exact1", ARG_SPECIAL },
+ { OP_EXACT2, "exact2", ARG_SPECIAL },
+ { OP_EXACT3, "exact3", ARG_SPECIAL },
+ { OP_EXACT4, "exact4", ARG_SPECIAL },
+ { OP_EXACT5, "exact5", ARG_SPECIAL },
+ { OP_EXACTN, "exactn", ARG_SPECIAL },
+ { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
+ { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
+ { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
+ { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
+ { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
+ { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
+ { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
+ { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
+ { OP_CCLASS, "cclass", ARG_SPECIAL },
+ { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
+ { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
+ { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
+ { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
+ { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
+#ifdef USE_OP_CCLASS_NODE
+ { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
+#endif
+ { OP_ANYCHAR, "anychar", ARG_NON },
+ { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
+ { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
+ { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
+ { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
+ { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
+ { OP_WORD, "word", ARG_NON },
+ { OP_WORD_ASCII, "word-ascii", ARG_NON },
+ { OP_NO_WORD, "not-word", ARG_NON },
+ { OP_NO_WORD_ASCII, "not-word-ascii", ARG_NON },
+ { OP_WORD_BOUNDARY, "word-boundary", ARG_MODE },
+ { OP_NO_WORD_BOUNDARY, "not-word-boundary", ARG_MODE },
+ { OP_WORD_BEGIN, "word-begin", ARG_MODE },
+ { OP_WORD_END, "word-end", ARG_MODE },
+ { OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "extended-grapheme-cluster-boundary", ARG_NON },
+ { OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "no-extended-grapheme-cluster-boundary", ARG_NON },
+ { OP_BEGIN_BUF, "begin-buf", ARG_NON },
+ { OP_END_BUF, "end-buf", ARG_NON },
+ { OP_BEGIN_LINE, "begin-line", ARG_NON },
+ { OP_END_LINE, "end-line", ARG_NON },
+ { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
+ { OP_BEGIN_POSITION, "begin-position", ARG_NON },
+ { OP_BACKREF1, "backref1", ARG_NON },
+ { OP_BACKREF2, "backref2", ARG_NON },
+ { OP_BACKREF_N, "backref-n", ARG_MEMNUM },
+ { OP_BACKREF_N_IC, "backref-n-ic", ARG_SPECIAL },
+ { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
+ { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
+ { OP_BACKREF_WITH_LEVEL, "backref_with_level", ARG_SPECIAL },
+ { OP_BACKREF_CHECK, "backref_check", ARG_SPECIAL },
+ { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level", ARG_SPECIAL },
+ { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
+ { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
+ { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
+ { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
+ { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
+ { OP_SET_OPTION, "set-option", ARG_OPTION },
+ { OP_FAIL, "fail", ARG_NON },
+ { OP_JUMP, "jump", ARG_RELADDR },
+ { OP_PUSH, "push", ARG_RELADDR },
+ { OP_PUSH_SUPER, "push-super", ARG_RELADDR },
+ { OP_POP_OUT, "pop-out", ARG_NON },
+ { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
+ { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
+ { OP_REPEAT, "repeat", ARG_SPECIAL },
+ { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
+ { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
+ { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
+ { OP_EMPTY_CHECK_START, "empty-check-start", ARG_MEMNUM },
+ { OP_EMPTY_CHECK_END, "empty-check-end", ARG_MEMNUM },
+ { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM },
+ { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM },
+ { OP_PREC_READ_START, "push-pos", ARG_NON },
+ { OP_PREC_READ_END, "pop-pos", ARG_NON },
+ { OP_PREC_READ_NOT_START, "prec-read-not-start", ARG_RELADDR },
+ { OP_PREC_READ_NOT_END, "prec-read-not-end", ARG_NON },
+ { OP_ATOMIC_START, "atomic-start", ARG_NON },
+ { OP_ATOMIC_END, "atomic-end", ARG_NON },
+ { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
+ { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start", ARG_SPECIAL },
+ { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end", ARG_NON },
+ { OP_CALL, "call", ARG_ABSADDR },
+ { OP_RETURN, "return", ARG_NON },
+ { OP_PUSH_SAVE_VAL, "push-save-val", ARG_SPECIAL },
+ { OP_UPDATE_VAR, "update-var", ARG_SPECIAL },
+#ifdef USE_CALLOUT
+ { OP_CALLOUT_CONTENTS, "callout-contents", ARG_SPECIAL },
+ { OP_CALLOUT_NAME, "callout-name", ARG_SPECIAL },
+#endif
+ { -1, "", ARG_NON }
+};
+
+static char*
+op2name(int opcode)
+{
+ int i;
+
+ for (i = 0; OpInfo[i].opcode >= 0; i++) {
+ if (opcode == OpInfo[i].opcode)
+ return OpInfo[i].name;
+ }
+ return "";
+}
+
+static int
+op2arg_type(int opcode)
+{
+ int i;
+
+ for (i = 0; OpInfo[i].opcode >= 0; i++) {
+ if (opcode == OpInfo[i].opcode)
+ return OpInfo[i].arg_type;
+ }
+ return ARG_SPECIAL;
+}
+
+static void
+p_string(FILE* f, int len, UChar* s)
+{
+ fputs(":", f);
+ while (len-- > 0) { fputc(*s++, f); }
+}
+
+static void
+p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
+{
+ int x = len * mb_len;
+
+ fprintf(f, ":%d:", len);
+ while (x-- > 0) { fputc(*s++, f); }
+}
+
+static void
+p_rel_addr(FILE* f, RelAddrType rel_addr, UChar* p, UChar* start)
+{
+ RelAddrType curr = (RelAddrType )(p - start);
+
+ fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr);
+}
+
+static int
+bitset_on_num(BitSetRef bs)
+{
+ int i, n;
+
+ n = 0;
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (BITSET_AT(bs, i)) n++;
+ }
+ return n;
+}
+
+extern void
+onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
+ OnigEncoding enc)
+{
+ int i, n, arg_type;
+ RelAddrType addr;
+ LengthType len;
+ MemNumType mem;
+ OnigCodePoint code;
+ OnigOptionType option;
+ ModeType mode;
+ UChar *q;
+
+ fprintf(f, "%s", op2name(*bp));
+ arg_type = op2arg_type(*bp);
+ if (arg_type != ARG_SPECIAL) {
+ bp++;
+ switch (arg_type) {
+ case ARG_NON:
+ break;
+ case ARG_RELADDR:
+ GET_RELADDR_INC(addr, bp);
+ fputc(':', f);
+ p_rel_addr(f, addr, bp, start);
+ break;
+ case ARG_ABSADDR:
+ GET_ABSADDR_INC(addr, bp);
+ fprintf(f, ":{/%d}", addr);
+ break;
+ case ARG_LENGTH:
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d", len);
+ break;
+ case ARG_MEMNUM:
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ fprintf(f, ":%d", mem);
+ break;
+ case ARG_OPTION:
+ {
+ OnigOptionType option = *((OnigOptionType* )bp);
+ bp += SIZE_OPTION;
+ fprintf(f, ":%d", option);
+ }
+ break;
+
+ case ARG_MODE:
+ mode = *((ModeType* )bp);
+ bp += SIZE_MODE;
+ fprintf(f, ":%d", mode);
+ break;
+ }
+ }
+ else {
+ switch (*bp++) {
+ case OP_EXACT1:
+ case OP_ANYCHAR_STAR_PEEK_NEXT:
+ case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
+ p_string(f, 1, bp++); break;
+ case OP_EXACT2:
+ p_string(f, 2, bp); bp += 2; break;
+ case OP_EXACT3:
+ p_string(f, 3, bp); bp += 3; break;
+ case OP_EXACT4:
+ p_string(f, 4, bp); bp += 4; break;
+ case OP_EXACT5:
+ p_string(f, 5, bp); bp += 5; break;
+ case OP_EXACTN:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 1, bp);
+ bp += len;
+ break;
+
+ case OP_EXACTMB2N1:
+ p_string(f, 2, bp); bp += 2; break;
+ case OP_EXACTMB2N2:
+ p_string(f, 4, bp); bp += 4; break;
+ case OP_EXACTMB2N3:
+ p_string(f, 6, bp); bp += 6; break;
+ case OP_EXACTMB2N:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 2, bp);
+ bp += len * 2;
+ break;
+ case OP_EXACTMB3N:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 3, bp);
+ bp += len * 3;
+ break;
+ case OP_EXACTMBN:
+ {
+ int mb_len;
+
+ GET_LENGTH_INC(mb_len, bp);
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d:%d:", mb_len, len);
+ n = len * mb_len;
+ while (n-- > 0) { fputc(*bp++, f); }
+ }
+ break;
+
+ case OP_EXACT1_IC:
+ len = enclen(enc, bp);
+ p_string(f, len, bp);
+ bp += len;
+ break;
+ case OP_EXACTN_IC:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 1, bp);
+ bp += len;
+ break;
+
+ case OP_CCLASS:
+ n = bitset_on_num((BitSetRef )bp);
+ bp += SIZE_BITSET;
+ fprintf(f, ":%d", n);
+ break;
+
+ case OP_CCLASS_NOT:
+ n = bitset_on_num((BitSetRef )bp);
+ bp += SIZE_BITSET;
+ fprintf(f, ":%d", n);
+ break;
+
+ case OP_CCLASS_MB:
+ case OP_CCLASS_MB_NOT:
+ GET_LENGTH_INC(len, bp);
+ q = bp;
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+ ALIGNMENT_RIGHT(q);
+#endif
+ GET_CODE_POINT(code, q);
+ bp += len;
+ fprintf(f, ":%d:%d", (int )code, len);
+ break;
+
+ case OP_CCLASS_MIX:
+ case OP_CCLASS_MIX_NOT:
+ n = bitset_on_num((BitSetRef )bp);
+ bp += SIZE_BITSET;
+ GET_LENGTH_INC(len, bp);
+ q = bp;
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+ ALIGNMENT_RIGHT(q);
+#endif
+ GET_CODE_POINT(code, q);
+ bp += len;
+ fprintf(f, ":%d:%d:%d", n, (int )code, len);
+ break;
+
+#ifdef USE_OP_CCLASS_NODE
+ case OP_CCLASS_NODE:
+ {
+ CClassNode *cc;
+
+ GET_POINTER_INC(cc, bp);
+ n = bitset_on_num(cc->bs);
+ fprintf(f, ":%p:%d", cc, n);
+ }
+ break;
+#endif
+
+ case OP_BACKREF_N_IC:
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ fprintf(f, ":%d", mem);
+ break;
+
+ case OP_BACKREF_MULTI_IC:
+ case OP_BACKREF_MULTI:
+ case OP_BACKREF_CHECK:
+ fputs(" ", f);
+ GET_LENGTH_INC(len, bp);
+ for (i = 0; i < len; i++) {
+ GET_MEMNUM_INC(mem, bp);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", mem);
+ }
+ break;
+
+ case OP_BACKREF_WITH_LEVEL:
+ GET_OPTION_INC(option, bp);
+ fprintf(f, ":%d", option);
+ /* fall */
+ case OP_BACKREF_CHECK_WITH_LEVEL:
+ {
+ LengthType level;
+
+ GET_LENGTH_INC(level, bp);
+ fprintf(f, ":%d", level);
+
+ fputs(" ", f);
+ GET_LENGTH_INC(len, bp);
+ for (i = 0; i < len; i++) {
+ GET_MEMNUM_INC(mem, bp);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", mem);
+ }
+ }
+ break;
+
+ case OP_REPEAT:
+ case OP_REPEAT_NG:
+ {
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":%d:%d", mem, addr);
+ }
+ break;
+
+ case OP_PUSH_OR_JUMP_EXACT1:
+ case OP_PUSH_IF_PEEK_NEXT:
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fputc(':', f);
+ p_rel_addr(f, addr, bp, start);
+ p_string(f, 1, bp);
+ bp += 1;
+ break;
+
+ case OP_LOOK_BEHIND:
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d", len);
+ break;
+
+ case OP_LOOK_BEHIND_NOT_START:
+ GET_RELADDR_INC(addr, bp);
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d:", len);
+ p_rel_addr(f, addr, bp, start);
+ break;
+
+ case OP_PUSH_SAVE_VAL:
+ {
+ SaveType type;
+ GET_SAVE_TYPE_INC(type, bp);
+ GET_MEMNUM_INC(mem, bp);
+ fprintf(f, ":%d:%d", type, mem);
+ }
+ break;
+
+ case OP_UPDATE_VAR:
+ {
+ UpdateVarType type;
+ GET_UPDATE_VAR_TYPE_INC(type, bp);
+ GET_MEMNUM_INC(mem, bp);
+ fprintf(f, ":%d:%d", type, mem);
+ }
+ break;
+
+#ifdef USE_CALLOUT
+ case OP_CALLOUT_CONTENTS:
+ {
+ GET_MEMNUM_INC(mem, bp); // number
+ fprintf(f, ":%d", mem);
+ }
+ break;
+
+ case OP_CALLOUT_NAME:
+ {
+ int id;
+
+ GET_MEMNUM_INC(id, bp); // id
+ GET_MEMNUM_INC(mem, bp); // number
+
+ fprintf(f, ":%d:%d", id, mem);
+ }
+ break;
+#endif
+
+ default:
+ fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp);
+ }
+ }
+ if (nextp) *nextp = bp;
+}
+#endif /* ONIG_DEBUG */
+
+#ifdef ONIG_DEBUG_COMPILE
+extern void
+onig_print_compiled_byte_code_list(FILE* f, regex_t* reg)
+{
+ UChar* bp;
+ UChar* start = reg->p;
+ UChar* end = reg->p + reg->used;
+
+ fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n",
+ reg->bt_mem_start, reg->bt_mem_end);
+ fprintf(f, "code-length: %d\n", reg->used);
+
+ bp = start;
+ while (bp < end) {
+ int pos = bp - start;
+
+ fprintf(f, "%4d: ", pos);
+ onig_print_compiled_byte_code(f, bp, &bp, start, reg->enc);
+ fprintf(f, "\n");
+ }
+ fprintf(f, "\n");
+}
+#endif
+
+
#ifdef USE_CAPTURE_HISTORY
static void history_tree_free(OnigCaptureTreeNode* node);
@@ -304,6 +860,45 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
#endif
}
+#ifdef USE_CALLOUT
+#define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \
+ args.in = (ain);\
+ args.name_id = (aname_id);\
+ args.num = anum;\
+ args.regex = reg;\
+ args.string = str;\
+ args.string_end = end;\
+ args.start = sstart;\
+ args.right_range = right_range;\
+ args.current = s;\
+ args.retry_in_match_counter = retry_in_match_counter;\
+ args.msa = msa;\
+ args.stk_base = stk_base;\
+ args.stk = stk;\
+ args.mem_start_stk = mem_start_stk;\
+ args.mem_end_stk = mem_end_stk;\
+ result = (func)(&args, user);\
+} while (0)
+
+#define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\
+ int result;\
+ OnigCalloutArgs args;\
+ CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\
+ switch (result) {\
+ case ONIG_CALLOUT_FAIL:\
+ case ONIG_CALLOUT_SUCCESS:\
+ break;\
+ default:\
+ if (result > 0) {\
+ result = ONIGERR_INVALID_ARGUMENT;\
+ }\
+ best_len = result;\
+ goto finish;\
+ break;\
+ }\
+} while(0)
+#endif
+
/** stack **/
#define INVALID_STACK_INDEX -1
@@ -316,40 +911,43 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
#define STK_ALT (0x0002 | STK_ALT_FLAG)
#define STK_ALT_PREC_READ_NOT (0x0004 | STK_ALT_FLAG)
#define STK_ALT_LOOK_BEHIND_NOT (0x0006 | STK_ALT_FLAG)
+
/* handled by normal-POP */
-#define STK_MEM_START 0x0100
-#define STK_MEM_END 0x8200
-#define STK_REPEAT_INC 0x0300
-#define STK_STATE_CHECK_MARK 0x1000
+#define STK_MEM_START 0x0010
+#define STK_MEM_END 0x8030
+#define STK_REPEAT_INC 0x0050
+#ifdef USE_CALLOUT
+#define STK_CALLOUT 0x0070
+#endif
+
/* avoided by normal-POP */
#define STK_VOID 0x0000 /* for fill a blank */
#define STK_EMPTY_CHECK_START 0x3000
#define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */
-#define STK_MEM_END_MARK 0x8400
-#define STK_TO_VOID_START 0x0500 /* mark for "(?>...)" */
-#define STK_REPEAT 0x0600
-#define STK_CALL_FRAME 0x0700
-#define STK_RETURN 0x0800
-#define STK_SAVE_VAL 0x0900
+#define STK_MEM_END_MARK 0x8100
+#define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */
+#define STK_REPEAT 0x0300
+#define STK_CALL_FRAME 0x0400
+#define STK_RETURN 0x0500
+#define STK_SAVE_VAL 0x0600
/* stack type check mask */
#define STK_MASK_POP_USED STK_ALT_FLAG
-#define STK_MASK_TO_VOID_TARGET 0x10fe
+#define STK_MASK_POP_HANDLED 0x0010
+#define STK_MASK_POP_HANDLED_TIL (STK_MASK_POP_HANDLED | 0x0004)
+#define STK_MASK_TO_VOID_TARGET 0x100e
#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
typedef intptr_t StackIndex;
typedef struct _StackType {
unsigned int type;
- int id;
+ int zid;
union {
struct {
UChar *pcode; /* byte code position */
UChar *pstr; /* string position */
UChar *pstr_prev; /* previous char position of pstr */
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- unsigned int state_check;
-#endif
} state;
struct {
int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
@@ -378,67 +976,66 @@ typedef struct _StackType {
UChar* v;
UChar* v2;
} val;
+#ifdef USE_CALLOUT
+ struct {
+ int num;
+ OnigCalloutFunc func;
+ } callout;
+#endif
} u;
} StackType;
+#ifdef USE_CALLOUT
+
+struct OnigCalloutArgsStruct {
+ OnigCalloutIn in;
+ int name_id; /* name id or ONIG_NON_NAME_ID */
+ int num;
+ OnigRegex regex;
+ const OnigUChar* string;
+ const OnigUChar* string_end;
+ const OnigUChar* start;
+ const OnigUChar* right_range;
+ const OnigUChar* current; // current matching position
+ unsigned long retry_in_match_counter;
+
+ /* invisible to users */
+ MatchArg* msa;
+ StackType* stk_base;
+ StackType* stk;
+ StackIndex* mem_start_stk;
+ StackIndex* mem_end_stk;
+};
+
+#endif
+
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\
+#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
(msa).stack_p = (void* )0;\
(msa).options = (arg_option);\
(msa).region = (arg_region);\
(msa).start = (arg_start);\
+ (msa).match_stack_limit = (mp)->match_stack_limit;\
+ (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
+ (msa).mp = mp;\
(msa).best_len = ONIG_MISMATCH;\
(msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
} while(0)
#else
-#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\
+#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
(msa).stack_p = (void* )0;\
(msa).options = (arg_option);\
(msa).region = (arg_region);\
(msa).start = (arg_start);\
+ (msa).match_stack_limit = (mp)->match_stack_limit;\
+ (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
+ (msa).mp = mp;\
(msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
} while(0)
#endif
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-
-#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
-
-#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do {\
- if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
- unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
- offset = ((offset) * (state_num)) >> 3;\
- if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
- if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
- (msa).state_check_buff = (void* )xmalloc(size);\
- else \
- (msa).state_check_buff = (void* )xalloca(size);\
- xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
- (size_t )(size - (offset))); \
- (msa).state_check_buff_size = size;\
- }\
- else {\
- (msa).state_check_buff = (void* )0;\
- (msa).state_check_buff_size = 0;\
- }\
- }\
- else {\
- (msa).state_check_buff = (void* )0;\
- (msa).state_check_buff_size = 0;\
- }\
-} while(0)
-
-#define MATCH_ARG_FREE(msa) do {\
- if ((msa).stack_p) xfree((msa).stack_p);\
- if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
- if ((msa).state_check_buff) xfree((msa).state_check_buff);\
- }\
-} while(0)
-#else
-#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
-#endif
#define ALLOCA_PTR_NUM_LIMIT 50
@@ -495,25 +1092,303 @@ typedef struct _StackType {
mem_end_stk = mem_start_stk + num_mem + 1;\
} while(0)
-static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
+static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;
extern unsigned int
onig_get_match_stack_limit_size(void)
{
- return MatchStackLimitSize;
+ return MatchStackLimit;
}
extern int
onig_set_match_stack_limit_size(unsigned int size)
{
- MatchStackLimitSize = size;
+ MatchStackLimit = size;
return 0;
}
+#ifdef USE_RETRY_LIMIT_IN_MATCH
+
+static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH;
+
+#define CHECK_RETRY_LIMIT_IN_MATCH do {\
+ if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\
+} while (0)
+
+#else
+
+#define CHECK_RETRY_LIMIT_IN_MATCH
+
+#endif /* USE_RETRY_LIMIT_IN_MATCH */
+
+extern unsigned long
+onig_get_retry_limit_in_match(void)
+{
+#ifdef USE_RETRY_LIMIT_IN_MATCH
+ return RetryLimitInMatch;
+#else
+ //return ONIG_NO_SUPPORT_CONFIG;
+ return 0;
+#endif
+}
+
+extern int
+onig_set_retry_limit_in_match(unsigned long size)
+{
+#ifdef USE_RETRY_LIMIT_IN_MATCH
+ RetryLimitInMatch = size;
+ return 0;
+#else
+ return ONIG_NO_SUPPORT_CONFIG;
+#endif
+}
+
+static OnigCalloutFunc DefaultProgressCallout;
+static OnigCalloutFunc DefaultRetractionCallout;
+
+extern OnigMatchParam*
+onig_new_match_param(void)
+{
+ OnigMatchParam* p;
+
+ p = (OnigMatchParam* )xmalloc(sizeof(*p));
+ if (IS_NOT_NULL(p)) {
+ onig_initialize_match_param(p);
+ }
+
+ return p;
+}
+
+extern void
+onig_free_match_param_content(OnigMatchParam* p)
+{
+#ifdef USE_CALLOUT
+ if (IS_NOT_NULL(p->callout_data)) {
+ xfree(p->callout_data);
+ p->callout_data = 0;
+ }
+#endif
+}
+
+extern void
+onig_free_match_param(OnigMatchParam* p)
+{
+ if (IS_NOT_NULL(p)) {
+ onig_free_match_param_content(p);
+ xfree(p);
+ }
+}
+
+extern int
+onig_initialize_match_param(OnigMatchParam* mp)
+{
+ mp->match_stack_limit = MatchStackLimit;
+#ifdef USE_RETRY_LIMIT_IN_MATCH
+ mp->retry_limit_in_match = RetryLimitInMatch;
+#endif
+ mp->progress_callout_of_contents = DefaultProgressCallout;
+ mp->retraction_callout_of_contents = DefaultRetractionCallout;
+
+#ifdef USE_CALLOUT
+ mp->match_at_call_counter = 0;
+ mp->callout_user_data = 0;
+ mp->callout_data = 0;
+ mp->callout_data_alloc_num = 0;
+#endif
+
+ return ONIG_NORMAL;
+}
+
+#ifdef USE_CALLOUT
+
+static int
+adjust_match_param(regex_t* reg, OnigMatchParam* mp)
+{
+ RegexExt* ext = REG_EXTP(reg);
+
+ mp->match_at_call_counter = 0;
+
+ if (IS_NULL(ext) || ext->callout_num == 0) return ONIG_NORMAL;
+
+ if (ext->callout_num > mp->callout_data_alloc_num) {
+ CalloutData* d;
+ size_t n = ext->callout_num * sizeof(*d);
+ if (IS_NOT_NULL(mp->callout_data))
+ d = (CalloutData* )xrealloc(mp->callout_data, n);
+ else
+ d = (CalloutData* )xmalloc(n);
+ CHECK_NULL_RETURN_MEMERR(d);
+
+ mp->callout_data = d;
+ mp->callout_data_alloc_num = ext->callout_num;
+ }
+
+ xmemset(mp->callout_data, 0, mp->callout_data_alloc_num * sizeof(CalloutData));
+ return ONIG_NORMAL;
+}
+
+#define ADJUST_MATCH_PARAM(reg, mp) \
+ r = adjust_match_param(reg, mp);\
+ if (r != ONIG_NORMAL) return r;
+
+#define CALLOUT_DATA_AT_NUM(mp, num) ((mp)->callout_data + ((num) - 1))
+
+extern int
+onig_check_callout_data_and_clear_old_values(OnigCalloutArgs* args)
+{
+ OnigMatchParam* mp;
+ int num;
+ CalloutData* d;
+
+ mp = args->msa->mp;
+ num = args->num;
+
+ d = CALLOUT_DATA_AT_NUM(mp, num);
+ if (d->last_match_at_call_counter != mp->match_at_call_counter) {
+ xmemset(d, 0, sizeof(*d));
+ d->last_match_at_call_counter = mp->match_at_call_counter;
+ return d->last_match_at_call_counter;
+ }
+
+ return 0;
+}
+
+extern int
+onig_get_callout_data_dont_clear_old(regex_t* reg, OnigMatchParam* mp,
+ int callout_num, int slot,
+ OnigType* type, OnigValue* val)
+{
+ OnigType t;
+ CalloutData* d;
+
+ if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
+
+ d = CALLOUT_DATA_AT_NUM(mp, callout_num);
+ t = d->slot[slot].type;
+ if (IS_NOT_NULL(type)) *type = t;
+ if (IS_NOT_NULL(val)) *val = d->slot[slot].val;
+ return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
+}
+
+extern int
+onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args,
+ int slot, OnigType* type,
+ OnigValue* val)
+{
+ return onig_get_callout_data_dont_clear_old(args->regex, args->msa->mp,
+ args->num, slot, type, val);
+}
+
+extern int
+onig_get_callout_data(regex_t* reg, OnigMatchParam* mp,
+ int callout_num, int slot,
+ OnigType* type, OnigValue* val)
+{
+ OnigType t;
+ CalloutData* d;
+
+ if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
+
+ d = CALLOUT_DATA_AT_NUM(mp, callout_num);
+ if (d->last_match_at_call_counter != mp->match_at_call_counter) {
+ xmemset(d, 0, sizeof(*d));
+ d->last_match_at_call_counter = mp->match_at_call_counter;
+ }
+
+ t = d->slot[slot].type;
+ if (IS_NOT_NULL(type)) *type = t;
+ if (IS_NOT_NULL(val)) *val = d->slot[slot].val;
+ return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
+}
+
+extern int
+onig_get_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
+ const UChar* tag, const UChar* tag_end, int slot,
+ OnigType* type, OnigValue* val)
+{
+ int num;
+
+ num = onig_get_callout_num_by_tag(reg, tag, tag_end);
+ if (num < 0) return num;
+ if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
+
+ return onig_get_callout_data(reg, mp, num, slot, type, val);
+}
+
+extern int
+onig_get_callout_data_by_callout_args(OnigCalloutArgs* args,
+ int callout_num, int slot,
+ OnigType* type, OnigValue* val)
+{
+ return onig_get_callout_data(args->regex, args->msa->mp, callout_num, slot,
+ type, val);
+}
+
+extern int
+onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args,
+ int slot, OnigType* type, OnigValue* val)
+{
+ return onig_get_callout_data(args->regex, args->msa->mp, args->num, slot,
+ type, val);
+}
+
+extern int
+onig_set_callout_data(regex_t* reg, OnigMatchParam* mp,
+ int callout_num, int slot,
+ OnigType type, OnigValue* val)
+{
+ CalloutData* d;
+
+ if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
+
+ d = CALLOUT_DATA_AT_NUM(mp, callout_num);
+ d->slot[slot].type = type;
+ d->slot[slot].val = *val;
+ d->last_match_at_call_counter = mp->match_at_call_counter;
+
+ return ONIG_NORMAL;
+}
+
+extern int
+onig_set_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
+ const UChar* tag, const UChar* tag_end, int slot,
+ OnigType type, OnigValue* val)
+{
+ int num;
+
+ num = onig_get_callout_num_by_tag(reg, tag, tag_end);
+ if (num < 0) return num;
+ if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
+
+ return onig_set_callout_data(reg, mp, num, slot, type, val);
+}
+
+extern int
+onig_set_callout_data_by_callout_args(OnigCalloutArgs* args,
+ int callout_num, int slot,
+ OnigType type, OnigValue* val)
+{
+ return onig_set_callout_data(args->regex, args->msa->mp, callout_num, slot,
+ type, val);
+}
+
+extern int
+onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args,
+ int slot, OnigType type, OnigValue* val)
+{
+ return onig_set_callout_data(args->regex, args->msa->mp, args->num, slot,
+ type, val);
+}
+
+#else
+#define ADJUST_MATCH_PARAM(reg, mp)
+#endif /* USE_CALLOUT */
+
+
static int
stack_double(int is_alloca, char** arg_alloc_base,
StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk,
- OnigMatchArg* msa)
+ MatchArg* msa)
{
unsigned int n;
int used;
@@ -541,11 +1416,11 @@ stack_double(int is_alloca, char** arg_alloc_base,
xmemcpy(new_alloc_base, alloc_base, size);
}
else {
- if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
- if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
+ if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) {
+ if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit)
return ONIGERR_MATCH_STACK_LIMIT_OVER;
else
- n = MatchStackLimitSize;
+ n = msa->match_stack_limit;
}
new_alloc_base = (char* )xrealloc(alloc_base, new_size);
if (IS_NULL(new_alloc_base)) {
@@ -584,80 +1459,36 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-#define STATE_CHECK_POS(s,snum) \
- (((s) - str) * num_comb_exp_check + ((snum) - 1))
-#define STATE_CHECK_VAL(v,snum) do {\
- if (IS_NOT_NULL(state_check_buff)) {\
- int x = STATE_CHECK_POS(s,snum);\
- (v) = state_check_buff[x/8] & (1<<(x%8));\
- }\
- else (v) = 0;\
-} while(0)
-
-
-#define ELSE_IF_STATE_CHECK_MARK(stk) \
- else if ((stk)->type == STK_STATE_CHECK_MARK) { \
- int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
- state_check_buff[x/8] |= (1<<(x%8));\
- }
-
#define STACK_PUSH(stack_type,pat,s,sprev) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
stk->u.state.pstr = (s);\
stk->u.state.pstr_prev = (sprev);\
- stk->u.state.state_check = 0;\
STACK_INC;\
} while(0)
#define STACK_PUSH_ENSURED(stack_type,pat) do {\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
- stk->u.state.state_check = 0;\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
- STACK_ENSURE(1);\
- stk->type = STK_ALT;\
- stk->u.state.pcode = (pat);\
- stk->u.state.pstr = (s);\
- stk->u.state.pstr_prev = (sprev);\
- stk->u.state.state_check = (IS_NOT_NULL(state_check_buff) ? (snum) : 0);\
STACK_INC;\
} while(0)
-#define STACK_PUSH_STATE_CHECK(s,snum) do {\
- if (IS_NOT_NULL(state_check_buff)) { \
- STACK_ENSURE(1);\
- stk->type = STK_STATE_CHECK_MARK;\
- stk->u.state.pstr = (s);\
- stk->u.state.state_check = (snum);\
- STACK_INC;\
- }\
-} while(0)
-
-#else /* USE_COMBINATION_EXPLOSION_CHECK */
-
-#define ELSE_IF_STATE_CHECK_MARK(stk)
-
-#define STACK_PUSH(stack_type,pat,s,sprev) do {\
- STACK_ENSURE(1);\
+#ifdef ONIG_DEBUG_MATCH
+#define STACK_PUSH_BOTTOM(stack_type,pat) do {\
stk->type = (stack_type);\
- stk->u.state.pcode = (pat);\
- stk->u.state.pstr = (s);\
- stk->u.state.pstr_prev = (sprev);\
+ stk->u.state.pcode = (pat);\
+ stk->u.state.pstr = s;\
+ stk->u.state.pstr_prev = sprev;\
STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_ENSURED(stack_type,pat) do {\
+} while (0)
+#else
+#define STACK_PUSH_BOTTOM(stack_type,pat) do {\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
STACK_INC;\
-} while(0)
-#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+} while (0)
+#endif
#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
@@ -672,7 +1503,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_REPEAT(sid, pat) do {\
STACK_ENSURE(1);\
stk->type = STK_REPEAT;\
- stk->id = (sid);\
+ stk->zid = (sid);\
stk->u.repeat.pcode = (pat);\
stk->u.repeat.count = 0;\
STACK_INC;\
@@ -688,7 +1519,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_MEM_START(mnum, s) do {\
STACK_ENSURE(1);\
stk->type = STK_MEM_START;\
- stk->id = (mnum);\
+ stk->zid = (mnum);\
stk->u.mem.pstr = (s);\
stk->u.mem.start = mem_start_stk[mnum];\
stk->u.mem.end = mem_end_stk[mnum];\
@@ -700,7 +1531,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_MEM_END(mnum, s) do {\
STACK_ENSURE(1);\
stk->type = STK_MEM_END;\
- stk->id = (mnum);\
+ stk->zid = (mnum);\
stk->u.mem.pstr = (s);\
stk->u.mem.start = mem_start_stk[mnum];\
stk->u.mem.end = mem_end_stk[mnum];\
@@ -711,7 +1542,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_MEM_END_MARK(mnum) do {\
STACK_ENSURE(1);\
stk->type = STK_MEM_END_MARK;\
- stk->id = (mnum);\
+ stk->zid = (mnum);\
STACK_INC;\
} while(0)
@@ -721,10 +1552,10 @@ stack_double(int is_alloca, char** arg_alloc_base,
while (k > stk_base) {\
k--;\
if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
- && k->id == (mnum)) {\
+ && k->zid == (mnum)) {\
level++;\
}\
- else if (k->type == STK_MEM_START && k->id == (mnum)) {\
+ else if (k->type == STK_MEM_START && k->zid == (mnum)) {\
if (level == 0) break;\
level--;\
}\
@@ -752,7 +1583,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\
STACK_ENSURE(1);\
stk->type = STK_EMPTY_CHECK_START;\
- stk->id = (cnum);\
+ stk->zid = (cnum);\
stk->u.empty_check.pstr = (s);\
STACK_INC;\
} while(0)
@@ -760,7 +1591,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\
STACK_ENSURE(1);\
stk->type = STK_EMPTY_CHECK_END;\
- stk->id = (cnum);\
+ stk->zid = (cnum);\
STACK_INC;\
} while(0)
@@ -780,7 +1611,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\
STACK_ENSURE(1);\
stk->type = STK_SAVE_VAL;\
- stk->id = (sid);\
+ stk->zid = (sid);\
stk->u.val.type = (stype);\
stk->u.val.v = (UChar* )(sval);\
STACK_INC;\
@@ -789,7 +1620,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\
STACK_ENSURE(1);\
stk->type = STK_SAVE_VAL;\
- stk->id = (sid);\
+ stk->zid = (sid);\
stk->u.val.type = (stype);\
stk->u.val.v = (UChar* )(sval);\
stk->u.val.v2 = sprev;\
@@ -815,7 +1646,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
k--;\
STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
- && k->id == (sid)) {\
+ && k->zid == (sid)) {\
if (level == 0) {\
(sval) = k->u.val.v;\
break;\
@@ -835,7 +1666,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
k--;\
STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
- && k->id == (sid)) {\
+ && k->zid == (sid)) {\
if (level == 0) {\
(sval) = k->u.val.v;\
sprev = k->u.val.v2;\
@@ -869,6 +1700,24 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while (0)
+#define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_CALLOUT;\
+ stk->zid = ONIG_NON_NAME_ID;\
+ stk->u.callout.num = (anum);\
+ stk->u.callout.func = (func);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_CALLOUT;\
+ stk->zid = (aid);\
+ stk->u.callout.num = (anum);\
+ stk->u.callout.func = (func);\
+ STACK_INC;\
+} while(0)
+
#ifdef ONIG_DEBUG
#define STACK_BASE_CHECK(p, at) \
if ((p) < stk_base) {\
@@ -884,6 +1733,16 @@ stack_double(int is_alloca, char** arg_alloc_base,
STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
} while(0)
+
+#ifdef USE_CALLOUT
+#define POP_CALLOUT_CASE \
+ else if (stk->type == STK_CALLOUT) {\
+ RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\
+ }
+#else
+#define POP_CALLOUT_CASE
+#endif
+
#define STACK_POP do {\
switch (pop_level) {\
case STACK_POP_LEVEL_FREE:\
@@ -891,7 +1750,6 @@ stack_double(int is_alloca, char** arg_alloc_base,
stk--;\
STACK_BASE_CHECK(stk, "STACK_POP"); \
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
- ELSE_IF_STATE_CHECK_MARK(stk);\
}\
break;\
case STACK_POP_LEVEL_MEM_START:\
@@ -900,10 +1758,9 @@ stack_double(int is_alloca, char** arg_alloc_base,
STACK_BASE_CHECK(stk, "STACK_POP 2"); \
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
else if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->id] = stk->u.mem.start;\
- mem_end_stk[stk->id] = stk->u.mem.end;\
+ mem_start_stk[stk->zid] = stk->u.mem.start;\
+ mem_end_stk[stk->zid] = stk->u.mem.end;\
}\
- ELSE_IF_STATE_CHECK_MARK(stk);\
}\
break;\
default:\
@@ -911,75 +1768,70 @@ stack_double(int is_alloca, char** arg_alloc_base,
stk--;\
STACK_BASE_CHECK(stk, "STACK_POP 3"); \
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
- else if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->id] = stk->u.mem.start;\
- mem_end_stk[stk->id] = stk->u.mem.end;\
- }\
- else if (stk->type == STK_REPEAT_INC) {\
- STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
- }\
- else if (stk->type == STK_MEM_END) {\
- mem_start_stk[stk->id] = stk->u.mem.start;\
- mem_end_stk[stk->id] = stk->u.mem.end;\
+ else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\
+ if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->zid] = stk->u.mem.start;\
+ mem_end_stk[stk->zid] = stk->u.mem.end;\
+ }\
+ else if (stk->type == STK_REPEAT_INC) {\
+ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
+ }\
+ else if (stk->type == STK_MEM_END) {\
+ mem_start_stk[stk->zid] = stk->u.mem.start;\
+ mem_end_stk[stk->zid] = stk->u.mem.end;\
+ }\
+ POP_CALLOUT_CASE\
}\
- ELSE_IF_STATE_CHECK_MARK(stk);\
}\
break;\
}\
} while(0)
-#define STACK_POP_TIL_ALT_PREC_READ_NOT do {\
+#define POP_TIL_BODY(aname, til_type) do {\
while (1) {\
stk--;\
- STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_PREC_READ_NOT"); \
- if (stk->type == STK_ALT_PREC_READ_NOT) break;\
- else if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->id] = stk->u.mem.start;\
- mem_end_stk[stk->id] = stk->u.mem.end;\
- }\
- else if (stk->type == STK_REPEAT_INC) {\
- STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
- }\
- else if (stk->type == STK_MEM_END) {\
- mem_start_stk[stk->id] = stk->u.mem.start;\
- mem_end_stk[stk->id] = stk->u.mem.end;\
+ STACK_BASE_CHECK(stk, (aname));\
+ if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
+ if (stk->type == (til_type)) break;\
+ else {\
+ if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->zid] = stk->u.mem.start;\
+ mem_end_stk[stk->zid] = stk->u.mem.end;\
+ }\
+ else if (stk->type == STK_REPEAT_INC) {\
+ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
+ }\
+ else if (stk->type == STK_MEM_END) {\
+ mem_start_stk[stk->zid] = stk->u.mem.start;\
+ mem_end_stk[stk->zid] = stk->u.mem.end;\
+ }\
+ /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
+ }\
}\
- ELSE_IF_STATE_CHECK_MARK(stk);\
}\
} while(0)
+#define STACK_POP_TIL_ALT_PREC_READ_NOT do {\
+ POP_TIL_BODY("STACK_POP_TIL_ALT_PREC_READ_NOT", STK_ALT_PREC_READ_NOT);\
+} while(0)
+
#define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT do {\
- while (1) {\
- stk--;\
- STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_LOOK_BEHIND_NOT"); \
- if (stk->type == STK_ALT_LOOK_BEHIND_NOT) break;\
- else if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->id] = stk->u.mem.start;\
- mem_end_stk[stk->id] = stk->u.mem.end;\
- }\
- else if (stk->type == STK_REPEAT_INC) {\
- STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
- }\
- else if (stk->type == STK_MEM_END) {\
- mem_start_stk[stk->id] = stk->u.mem.start;\
- mem_end_stk[stk->id] = stk->u.mem.end;\
- }\
- ELSE_IF_STATE_CHECK_MARK(stk);\
- }\
+ POP_TIL_BODY("STACK_POP_TIL_ALT_LOOK_BEHIND_NOT", STK_ALT_LOOK_BEHIND_NOT);\
} while(0)
+
#define STACK_EXEC_TO_VOID(k) do {\
k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_EXEC_TO_VOID"); \
if (IS_TO_VOID_TARGET(k)) {\
+ if (k->type == STK_TO_VOID_START) {\
+ k->type = STK_VOID;\
+ break;\
+ }\
k->type = STK_VOID;\
}\
- else if (k->type == STK_TO_VOID_START) {\
- k->type = STK_VOID;\
- break;\
- }\
}\
} while(0)
@@ -989,7 +1841,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
k--;\
STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \
if (k->type == STK_EMPTY_CHECK_START) {\
- if (k->id == (sid)) {\
+ if (k->zid == (sid)) {\
(isnull) = (k->u.empty_check.pstr == (s));\
break;\
}\
@@ -1004,7 +1856,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
k--;\
STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST"); \
if (k->type == STK_EMPTY_CHECK_START) {\
- if (k->id == (sid)) {\
+ if (k->zid == (sid)) {\
if (k->u.empty_check.pstr != (s)) {\
(isnull) = 0;\
break;\
@@ -1017,7 +1869,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
if (k->u.mem.end == INVALID_STACK_INDEX) {\
(isnull) = 0; break;\
}\
- if (MEM_STATUS_AT(reg->bt_mem_end, k->id))\
+ if (MEM_STATUS_AT(reg->bt_mem_end, k->zid))\
endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
else\
endp = (UChar* )k->u.mem.end;\
@@ -1045,7 +1897,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
k--;\
STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST_REC"); \
if (k->type == STK_EMPTY_CHECK_START) {\
- if (k->id == (sid)) {\
+ if (k->zid == (sid)) {\
if (level == 0) {\
if (k->u.empty_check.pstr != (s)) {\
(isnull) = 0;\
@@ -1059,7 +1911,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
if (k->u.mem.end == INVALID_STACK_INDEX) {\
(isnull) = 0; break;\
}\
- if (MEM_STATUS_AT(reg->bt_mem_end, k->id))\
+ if (MEM_STATUS_AT(reg->bt_mem_end, k->zid))\
endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
else\
endp = (UChar* )k->u.mem.end;\
@@ -1081,7 +1933,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
}\
else if (k->type == STK_EMPTY_CHECK_END) {\
- if (k->id == (sid)) level++;\
+ if (k->zid == (sid)) level++;\
}\
}\
} while(0)
@@ -1116,7 +1968,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
if (k->type == STK_REPEAT) {\
if (level == 0) {\
- if (k->id == (sid)) {\
+ if (k->zid == (sid)) {\
break;\
}\
}\
@@ -1208,11 +2060,7 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
#define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range
-#else
-#define INIT_RIGHT_RANGE right_range = (UChar* )end
-#endif
#ifdef USE_CAPTURE_HISTORY
static int
@@ -1225,7 +2073,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
while (k < stk_top) {
if (k->type == STK_MEM_START) {
- n = k->id;
+ n = k->zid;
if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
MEM_STATUS_AT(reg->capture_history, n) != 0) {
child = history_node_new();
@@ -1243,7 +2091,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
}
}
else if (k->type == STK_MEM_END) {
- if (k->id == node->group) {
+ if (k->zid == node->group) {
node->end = (int )(k->u.mem.pstr - str);
*kp = k;
return 0;
@@ -1292,7 +2140,7 @@ backref_match_at_nested_level(regex_t* reg,
}
else if (level == nest) {
if (k->type == STK_MEM_START) {
- if (mem_is_in_memp(k->id, mem_num, memp)) {
+ if (mem_is_in_memp(k->zid, mem_num, memp)) {
pstart = k->u.mem.pstr;
if (IS_NOT_NULL(pend)) {
if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
@@ -1316,7 +2164,7 @@ backref_match_at_nested_level(regex_t* reg,
}
}
else if (k->type == STK_MEM_END) {
- if (mem_is_in_memp(k->id, mem_num, memp)) {
+ if (mem_is_in_memp(k->zid, mem_num, memp)) {
pend = k->u.mem.pstr;
}
}
@@ -1347,7 +2195,7 @@ backref_check_at_nested_level(regex_t* reg,
}
else if (level == nest) {
if (k->type == STK_MEM_END) {
- if (mem_is_in_memp(k->id, mem_num, memp)) {
+ if (mem_is_in_memp(k->zid, mem_num, memp)) {
return 1;
}
}
@@ -1391,14 +2239,14 @@ static int OpCurr = OP_FINISH;
static int OpPrevTarget = OP_FAIL;
static int MaxStackDepth = 0;
-#define MOP_IN(opcode) do {\
+#define SOP_IN(opcode) do {\
if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
OpCurr = opcode;\
OpCounter[opcode]++;\
GETTIME(ts);\
} while(0)
-#define MOP_OUT do {\
+#define SOP_OUT do {\
GETTIME(te);\
OpTime[OpCurr] += TIMEDIFF(te, ts);\
} while(0)
@@ -1422,9 +2270,9 @@ onig_print_statistics(FILE* f)
r = fprintf(f, " count prev time\n");
if (r < 0) return -1;
- for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
+ for (i = 0; OpInfo[i].opcode >= 0; i++) {
r = fprintf(f, "%8d: %8d: %10ld: %s\n",
- OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
+ OpCounter[i], OpPrevCounter[i], OpTime[i], OpInfo[i].name);
if (r < 0) return -1;
}
r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
@@ -1442,8 +2290,8 @@ onig_print_statistics(FILE* f)
#else
#define STACK_INC stk++
-#define MOP_IN(opcode)
-#define MOP_OUT
+#define SOP_IN(opcode)
+#define SOP_OUT
#endif
@@ -1459,10 +2307,8 @@ typedef struct {
/* if sstart == str then set sprev to NULL. */
static int
match_at(regex_t* reg, const UChar* str, const UChar* end,
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
- const UChar* in_right_range,
-#endif
- const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
+ const UChar* in_right_range, const UChar* sstart, UChar* sprev,
+ MatchArg* msa)
{
static UChar FinishCode[] = { OP_FINISH };
@@ -1480,16 +2326,28 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
StackIndex *repeat_stk;
StackIndex *mem_start_stk, *mem_end_stk;
UChar* keep;
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- int scv;
- unsigned char* state_check_buff = msa->state_check_buff;
- int num_comb_exp_check = reg->num_comb_exp_check;
+#ifdef USE_RETRY_LIMIT_IN_MATCH
+ unsigned long retry_limit_in_match;
+ unsigned long retry_in_match_counter;
#endif
+
+#ifdef USE_CALLOUT
+ int of;
+#endif
+
UChar *p = reg->p;
OnigOptionType option = reg->options;
OnigEncoding encode = reg->enc;
OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
+#ifdef USE_CALLOUT
+ msa->mp->match_at_call_counter++;
+#endif
+
+#ifdef USE_RETRY_LIMIT_IN_MATCH
+ retry_limit_in_match = msa->retry_limit_in_match;
+#endif
+
//n = reg->num_repeat + reg->num_mem * 2;
pop_level = reg->stack_pop_level;
num_mem = reg->num_mem;
@@ -1506,11 +2364,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
(int )(end - str), (int )(sstart - str));
#endif
- STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
best_len = ONIG_MISMATCH;
keep = s = (UChar* )sstart;
+ STACK_PUSH_BOTTOM(STK_ALT, FinishCode); /* bottom stack */
INIT_RIGHT_RANGE;
+#ifdef USE_RETRY_LIMIT_IN_MATCH
+ retry_in_match_counter = 0;
+#endif
+
while (1) {
#ifdef ONIG_DEBUG_MATCH
{
@@ -1533,7 +2395,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
fputs((char* )buf, stderr);
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
- fprintf(stderr, "%4d: ", (int )(p - reg->p));
+ if (p == FinishCode)
+ fprintf(stderr, "----: ");
+ else
+ fprintf(stderr, "%4d: ", (int )(p - reg->p));
onig_print_compiled_byte_code(stderr, p, NULL, reg->p, encode);
fprintf(stderr, "\n");
}
@@ -1541,7 +2406,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
sbegin = s;
switch (*p++) {
- case OP_END: MOP_IN(OP_END);
+ case OP_END: SOP_IN(OP_END);
n = (int )(s - sstart);
if (n > best_len) {
OnigRegion* region;
@@ -1639,7 +2504,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
end_best_len:
#endif
- MOP_OUT;
+ SOP_OUT;
if (IS_FIND_CONDITION(option)) {
if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
@@ -1655,14 +2520,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto finish;
break;
- case OP_EXACT1: MOP_IN(OP_EXACT1);
+ case OP_EXACT1: SOP_IN(OP_EXACT1);
DATA_ENSURE(1);
if (*p != *s) goto fail;
p++; s++;
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC);
+ case OP_EXACT1_IC: SOP_IN(OP_EXACT1_IC);
{
int len;
UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
@@ -1681,21 +2546,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; q++;
}
}
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_EXACT2: MOP_IN(OP_EXACT2);
+ case OP_EXACT2: SOP_IN(OP_EXACT2);
DATA_ENSURE(2);
if (*p != *s) goto fail;
p++; s++;
if (*p != *s) goto fail;
sprev = s;
p++; s++;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACT3: MOP_IN(OP_EXACT3);
+ case OP_EXACT3: SOP_IN(OP_EXACT3);
DATA_ENSURE(3);
if (*p != *s) goto fail;
p++; s++;
@@ -1704,11 +2569,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*p != *s) goto fail;
sprev = s;
p++; s++;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACT4: MOP_IN(OP_EXACT4);
+ case OP_EXACT4: SOP_IN(OP_EXACT4);
DATA_ENSURE(4);
if (*p != *s) goto fail;
p++; s++;
@@ -1719,11 +2584,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*p != *s) goto fail;
sprev = s;
p++; s++;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACT5: MOP_IN(OP_EXACT5);
+ case OP_EXACT5: SOP_IN(OP_EXACT5);
DATA_ENSURE(5);
if (*p != *s) goto fail;
p++; s++;
@@ -1736,22 +2601,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*p != *s) goto fail;
sprev = s;
p++; s++;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACTN: MOP_IN(OP_EXACTN);
+ case OP_EXACTN: SOP_IN(OP_EXACTN);
GET_LENGTH_INC(tlen, p);
DATA_ENSURE(tlen);
while (tlen-- > 0) {
if (*p++ != *s++) goto fail;
}
sprev = s - 1;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC);
+ case OP_EXACTN_IC: SOP_IN(OP_EXACTN_IC);
{
int len;
UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
@@ -1775,20 +2640,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1);
+ case OP_EXACTMB2N1: SOP_IN(OP_EXACTMB2N1);
DATA_ENSURE(2);
if (*p != *s) goto fail;
p++; s++;
if (*p != *s) goto fail;
p++; s++;
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2);
+ case OP_EXACTMB2N2: SOP_IN(OP_EXACTMB2N2);
DATA_ENSURE(4);
if (*p != *s) goto fail;
p++; s++;
@@ -1799,11 +2664,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; s++;
if (*p != *s) goto fail;
p++; s++;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3);
+ case OP_EXACTMB2N3: SOP_IN(OP_EXACTMB2N3);
DATA_ENSURE(6);
if (*p != *s) goto fail;
p++; s++;
@@ -1818,11 +2683,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; s++;
if (*p != *s) goto fail;
p++; s++;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N);
+ case OP_EXACTMB2N: SOP_IN(OP_EXACTMB2N);
GET_LENGTH_INC(tlen, p);
DATA_ENSURE(tlen * 2);
while (tlen-- > 0) {
@@ -1832,11 +2697,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; s++;
}
sprev = s - 2;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N);
+ case OP_EXACTMB3N: SOP_IN(OP_EXACTMB3N);
GET_LENGTH_INC(tlen, p);
DATA_ENSURE(tlen * 3);
while (tlen-- > 0) {
@@ -1848,11 +2713,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; s++;
}
sprev = s - 3;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EXACTMBN: MOP_IN(OP_EXACTMBN);
+ case OP_EXACTMBN: SOP_IN(OP_EXACTMBN);
GET_LENGTH_INC(tlen, p); /* mb-len */
GET_LENGTH_INC(tlen2, p); /* string len */
tlen2 *= tlen;
@@ -1862,19 +2727,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p++; s++;
}
sprev = s - tlen;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_CCLASS: MOP_IN(OP_CCLASS);
+ case OP_CCLASS: SOP_IN(OP_CCLASS);
DATA_ENSURE(1);
if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
p += SIZE_BITSET;
s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB);
+ case OP_CCLASS_MB: SOP_IN(OP_CCLASS_MB);
if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
cclass_mb:
@@ -1900,10 +2765,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
}
p += tlen;
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX);
+ case OP_CCLASS_MIX: SOP_IN(OP_CCLASS_MIX);
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_HEAD(encode, s)) {
p += SIZE_BITSET;
@@ -1918,18 +2783,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p += tlen;
s++;
}
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT);
+ case OP_CCLASS_NOT: SOP_IN(OP_CCLASS_NOT);
DATA_ENSURE(1);
if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
p += SIZE_BITSET;
s += enclen(encode, s);
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT);
+ case OP_CCLASS_MB_NOT: SOP_IN(OP_CCLASS_MB_NOT);
DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
s++;
@@ -1967,10 +2832,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p += tlen;
cc_mb_not_success:
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT);
+ case OP_CCLASS_MIX_NOT: SOP_IN(OP_CCLASS_MIX_NOT);
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_HEAD(encode, s)) {
p += SIZE_BITSET;
@@ -1985,11 +2850,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p += tlen;
s++;
}
- MOP_OUT;
+ SOP_OUT;
break;
#ifdef USE_OP_CCLASS_NODE
- case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE);
+ case OP_CCLASS_NODE: SOP_IN(OP_CCLASS_NODE);
{
OnigCodePoint code;
void *node;
@@ -2005,28 +2870,28 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
code = ONIGENC_MBC_TO_CODE(encode, ss, s);
if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
}
- MOP_OUT;
+ SOP_OUT;
break;
#endif
- case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);
+ case OP_ANYCHAR: SOP_IN(OP_ANYCHAR);
DATA_ENSURE(1);
n = enclen(encode, s);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
s += n;
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML);
+ case OP_ANYCHAR_ML: SOP_IN(OP_ANYCHAR_ML);
DATA_ENSURE(1);
n = enclen(encode, s);
DATA_ENSURE(n);
s += n;
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);
+ case OP_ANYCHAR_STAR: SOP_IN(OP_ANYCHAR_STAR);
while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev);
n = enclen(encode, s);
@@ -2035,11 +2900,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
sprev = s;
s += n;
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);
+ case OP_ANYCHAR_ML_STAR: SOP_IN(OP_ANYCHAR_ML_STAR);
while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev);
n = enclen(encode, s);
@@ -2053,11 +2918,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
s++;
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
+ case OP_ANYCHAR_STAR_PEEK_NEXT: SOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
while (DATA_ENSURE_CHECK1) {
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
@@ -2069,10 +2934,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
s += n;
}
p++;
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ case OP_ANYCHAR_ML_STAR_PEEK_NEXT:SOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
while (DATA_ENSURE_CHECK1) {
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
@@ -2089,87 +2954,46 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
p++;
- MOP_OUT;
- break;
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
- GET_STATE_CHECK_NUM_INC(mem, p);
- while (DATA_ENSURE_CHECK1) {
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
- n = enclen(encode, s);
- DATA_ENSURE(n);
- if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
- sprev = s;
- s += n;
- }
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_STATE_CHECK_ANYCHAR_ML_STAR:
- MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
-
- GET_STATE_CHECK_NUM_INC(mem, p);
- while (DATA_ENSURE_CHECK1) {
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
- n = enclen(encode, s);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- sprev = s;
- s++;
- }
- }
- MOP_OUT;
- break;
-#endif /* USE_COMBINATION_EXPLOSION_CHECK */
-
- case OP_WORD: MOP_IN(OP_WORD);
+ case OP_WORD: SOP_IN(OP_WORD);
DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
s += enclen(encode, s);
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_WORD_ASCII: MOP_IN(OP_WORD_ASCII);
+ case OP_WORD_ASCII: SOP_IN(OP_WORD_ASCII);
DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
goto fail;
s += enclen(encode, s);
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_NO_WORD: MOP_IN(OP_NO_WORD);
+ case OP_NO_WORD: SOP_IN(OP_NO_WORD);
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
s += enclen(encode, s);
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_NO_WORD_ASCII: MOP_IN(OP_NO_WORD_ASCII);
+ case OP_NO_WORD_ASCII: SOP_IN(OP_NO_WORD_ASCII);
DATA_ENSURE(1);
if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
goto fail;
s += enclen(encode, s);
- MOP_OUT;
+ SOP_OUT;
break;
- case OP_WORD_BOUNDARY: MOP_IN(OP_WORD_BOUNDARY);
+ case OP_WORD_BOUNDARY: SOP_IN(OP_WORD_BOUNDARY);
{
ModeType mode;
GET_MODE_INC(mode, p); // ascii_mode
@@ -2189,11 +3013,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto fail;
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_NO_WORD_BOUNDARY: MOP_IN(OP_NO_WORD_BOUNDARY);
+ case OP_NO_WORD_BOUNDARY: SOP_IN(OP_NO_WORD_BOUNDARY);
{
ModeType mode;
GET_MODE_INC(mode, p); // ascii_mode
@@ -2212,20 +3036,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto fail;
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
#ifdef USE_WORD_BEGIN_END
- case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN);
+ case OP_WORD_BEGIN: SOP_IN(OP_WORD_BEGIN);
{
ModeType mode;
GET_MODE_INC(mode, p); // ascii_mode
if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
- if (ON_STR_BEGIN(s) ||
- ! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
- MOP_OUT;
+ if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
+ SOP_OUT;
continue;
}
}
@@ -2233,14 +3056,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto fail;
break;
- case OP_WORD_END: MOP_IN(OP_WORD_END);
+ case OP_WORD_END: SOP_IN(OP_WORD_END);
{
ModeType mode;
GET_MODE_INC(mode, p); // ascii_mode
if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
- MOP_OUT;
+ SOP_OUT;
continue;
}
}
@@ -2250,82 +3073,82 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
- MOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
+ SOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) {
- MOP_OUT;
+ SOP_OUT;
continue;
}
goto fail;
break;
case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
- MOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
+ SOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
if (onigenc_egcb_is_break_position(encode, s, sprev, str, end))
goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);
+ case OP_BEGIN_BUF: SOP_IN(OP_BEGIN_BUF);
if (! ON_STR_BEGIN(s)) goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_END_BUF: MOP_IN(OP_END_BUF);
+ case OP_END_BUF: SOP_IN(OP_END_BUF);
if (! ON_STR_END(s)) goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE);
+ case OP_BEGIN_LINE: SOP_IN(OP_BEGIN_LINE);
if (ON_STR_BEGIN(s)) {
if (IS_NOTBOL(msa->options)) goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
- MOP_OUT;
+ SOP_OUT;
continue;
}
goto fail;
break;
- case OP_END_LINE: MOP_IN(OP_END_LINE);
+ case OP_END_LINE: SOP_IN(OP_END_LINE);
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (IS_NOTEOL(msa->options)) goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
}
#endif
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
- MOP_OUT;
+ SOP_OUT;
continue;
}
#ifdef USE_CRNL_AS_LINE_TERMINATOR
else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
- MOP_OUT;
+ SOP_OUT;
continue;
}
#endif
goto fail;
break;
- case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF);
+ case OP_SEMI_END_BUF: SOP_IN(OP_SEMI_END_BUF);
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (IS_NOTEOL(msa->options)) goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
}
@@ -2333,7 +3156,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
ON_STR_END(s + enclen(encode, s))) {
- MOP_OUT;
+ SOP_OUT;
continue;
}
#ifdef USE_CRNL_AS_LINE_TERMINATOR
@@ -2341,7 +3164,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
UChar* ss = s + enclen(encode, s);
ss += enclen(encode, ss);
if (ON_STR_END(ss)) {
- MOP_OUT;
+ SOP_OUT;
continue;
}
}
@@ -2349,53 +3172,53 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto fail;
break;
- case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION);
+ case OP_BEGIN_POSITION: SOP_IN(OP_BEGIN_POSITION);
if (s != msa->start)
goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH);
+ case OP_MEMORY_START_PUSH: SOP_IN(OP_MEMORY_START_PUSH);
GET_MEMNUM_INC(mem, p);
STACK_PUSH_MEM_START(mem, s);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_MEMORY_START: MOP_IN(OP_MEMORY_START);
+ case OP_MEMORY_START: SOP_IN(OP_MEMORY_START);
GET_MEMNUM_INC(mem, p);
mem_start_stk[mem] = (StackIndex )((void* )s);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH);
+ case OP_MEMORY_END_PUSH: SOP_IN(OP_MEMORY_END_PUSH);
GET_MEMNUM_INC(mem, p);
STACK_PUSH_MEM_END(mem, s);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_MEMORY_END: MOP_IN(OP_MEMORY_END);
+ case OP_MEMORY_END: SOP_IN(OP_MEMORY_END);
GET_MEMNUM_INC(mem, p);
mem_end_stk[mem] = (StackIndex )((void* )s);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
#ifdef USE_CALL
- case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC);
+ case OP_MEMORY_END_PUSH_REC: SOP_IN(OP_MEMORY_END_PUSH_REC);
GET_MEMNUM_INC(mem, p);
STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
STACK_PUSH_MEM_END(mem, s);
mem_start_stk[mem] = GET_STACK_INDEX(stkp);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC);
+ case OP_MEMORY_END_REC: SOP_IN(OP_MEMORY_END_REC);
GET_MEMNUM_INC(mem, p);
mem_end_stk[mem] = (StackIndex )((void* )s);
STACK_GET_MEM_START(mem, stkp);
@@ -2406,22 +3229,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
STACK_PUSH_MEM_END_MARK(mem);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
#endif
- case OP_BACKREF1: MOP_IN(OP_BACKREF1);
+ case OP_BACKREF1: SOP_IN(OP_BACKREF1);
mem = 1;
goto backref;
break;
- case OP_BACKREF2: MOP_IN(OP_BACKREF2);
+ case OP_BACKREF2: SOP_IN(OP_BACKREF2);
mem = 2;
goto backref;
break;
- case OP_BACKREF_N: MOP_IN(OP_BACKREF_N);
+ case OP_BACKREF_N: SOP_IN(OP_BACKREF_N);
GET_MEMNUM_INC(mem, p);
backref:
{
@@ -2446,12 +3269,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
while (sprev + (len = enclen(encode, sprev)) < s)
sprev += len;
- MOP_OUT;
+ SOP_OUT;
continue;
}
break;
- case OP_BACKREF_N_IC: MOP_IN(OP_BACKREF_N_IC);
+ case OP_BACKREF_N_IC: SOP_IN(OP_BACKREF_N_IC);
GET_MEMNUM_INC(mem, p);
{
int len;
@@ -2475,12 +3298,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
while (sprev + (len = enclen(encode, sprev)) < s)
sprev += len;
- MOP_OUT;
+ SOP_OUT;
continue;
}
break;
- case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI);
+ case OP_BACKREF_MULTI: SOP_IN(OP_BACKREF_MULTI);
{
int len, is_fail;
UChar *pstart, *pend, *swork;
@@ -2514,12 +3337,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break; /* success */
}
if (i == tlen) goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
}
break;
- case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC);
+ case OP_BACKREF_MULTI_IC: SOP_IN(OP_BACKREF_MULTI_IC);
{
int len, is_fail;
UChar *pstart, *pend, *swork;
@@ -2553,7 +3376,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break; /* success */
}
if (i == tlen) goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
}
break;
@@ -2580,13 +3403,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
else
goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
}
break;
#endif
- case OP_BACKREF_CHECK: MOP_IN(OP_BACKREF_CHECK);
+ case OP_BACKREF_CHECK: SOP_IN(OP_BACKREF_CHECK);
{
GET_LENGTH_INC(tlen, p);
for (i = 0; i < tlen; i++) {
@@ -2599,7 +3422,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break; /* success */
}
if (i == tlen) goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
}
break;
@@ -2619,36 +3442,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
else
goto fail;
- MOP_OUT;
+ SOP_OUT;
continue;
}
break;
#endif
#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
- case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH);
+ case OP_SET_OPTION_PUSH: SOP_IN(OP_SET_OPTION_PUSH);
GET_OPTION_INC(option, p);
STACK_PUSH_ALT(p, s, sprev);
p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_SET_OPTION: MOP_IN(OP_SET_OPTION);
+ case OP_SET_OPTION: SOP_IN(OP_SET_OPTION);
GET_OPTION_INC(option, p);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
#endif
- case OP_EMPTY_CHECK_START: MOP_IN(OP_EMPTY_CHECK_START);
+ case OP_EMPTY_CHECK_START: SOP_IN(OP_EMPTY_CHECK_START);
GET_MEMNUM_INC(mem, p); /* mem: null check id */
STACK_PUSH_EMPTY_CHECK_START(mem, s);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_EMPTY_CHECK_END: MOP_IN(OP_EMPTY_CHECK_END);
+ case OP_EMPTY_CHECK_END: SOP_IN(OP_EMPTY_CHECK_END);
{
int is_empty;
@@ -2677,12 +3500,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT
- case OP_EMPTY_CHECK_END_MEMST: MOP_IN(OP_EMPTY_CHECK_END_MEMST);
+ case OP_EMPTY_CHECK_END_MEMST: SOP_IN(OP_EMPTY_CHECK_END_MEMST);
{
int is_empty;
@@ -2696,14 +3519,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto empty_check_found;
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
#endif
#ifdef USE_CALL
case OP_EMPTY_CHECK_END_MEMST_PUSH:
- MOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH);
+ SOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH);
{
int is_empty;
@@ -2725,103 +3548,68 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_EMPTY_CHECK_END(mem);
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
#endif
- case OP_JUMP: MOP_IN(OP_JUMP);
+ case OP_JUMP: SOP_IN(OP_JUMP);
GET_RELADDR_INC(addr, p);
p += addr;
- MOP_OUT;
- CHECK_INTERRUPT_IN_MATCH_AT;
+ SOP_OUT;
+ CHECK_INTERRUPT_IN_MATCH;
continue;
break;
- case OP_PUSH: MOP_IN(OP_PUSH);
+ case OP_PUSH: SOP_IN(OP_PUSH);
GET_RELADDR_INC(addr, p);
STACK_PUSH_ALT(p + addr, s, sprev);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_PUSH_SUPER: MOP_IN(OP_PUSH_SUPER);
+ case OP_PUSH_SUPER: SOP_IN(OP_PUSH_SUPER);
GET_RELADDR_INC(addr, p);
STACK_PUSH_SUPER_ALT(p + addr, s, sprev);
- MOP_OUT;
- continue;
- break;
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH);
- GET_STATE_CHECK_NUM_INC(mem, p);
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- GET_RELADDR_INC(addr, p);
- STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
- MOP_OUT;
- continue;
- break;
-
- case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
- GET_STATE_CHECK_NUM_INC(mem, p);
- GET_RELADDR_INC(addr, p);
- STATE_CHECK_VAL(scv, mem);
- if (scv) {
- p += addr;
- }
- else {
- STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
- }
- MOP_OUT;
- continue;
- break;
-
- case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK);
- GET_STATE_CHECK_NUM_INC(mem, p);
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- STACK_PUSH_STATE_CHECK(s, mem);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
-#endif /* USE_COMBINATION_EXPLOSION_CHECK */
- case OP_POP: MOP_IN(OP_POP);
+ case OP_POP_OUT: SOP_IN(OP_POP_OUT);
STACK_POP_ONE;
- MOP_OUT;
+ // for stop backtrack
+ //CHECK_RETRY_LIMIT_IN_MATCH;
+ SOP_OUT;
continue;
break;
- case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
+ case OP_PUSH_OR_JUMP_EXACT1: SOP_IN(OP_PUSH_OR_JUMP_EXACT1);
GET_RELADDR_INC(addr, p);
if (*p == *s && DATA_ENSURE_CHECK1) {
p++;
STACK_PUSH_ALT(p + addr, s, sprev);
- MOP_OUT;
+ SOP_OUT;
continue;
}
p += (addr + 1);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT);
+ case OP_PUSH_IF_PEEK_NEXT: SOP_IN(OP_PUSH_IF_PEEK_NEXT);
GET_RELADDR_INC(addr, p);
if (*p == *s) {
p++;
STACK_PUSH_ALT(p + addr, s, sprev);
- MOP_OUT;
+ SOP_OUT;
continue;
}
p++;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_REPEAT: MOP_IN(OP_REPEAT);
+ case OP_REPEAT: SOP_IN(OP_REPEAT);
{
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
GET_RELADDR_INC(addr, p);
@@ -2834,11 +3622,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_ALT(p + addr, s, sprev);
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG);
+ case OP_REPEAT_NG: SOP_IN(OP_REPEAT_NG);
{
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
GET_RELADDR_INC(addr, p);
@@ -2852,11 +3640,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p += addr;
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC);
+ case OP_REPEAT_INC: SOP_IN(OP_REPEAT_INC);
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
si = repeat_stk[mem];
stkp = STACK_AT(si);
@@ -2874,19 +3662,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p = stkp->u.repeat.pcode;
}
STACK_PUSH_REPEAT_INC(si);
- MOP_OUT;
- CHECK_INTERRUPT_IN_MATCH_AT;
+ SOP_OUT;
+ CHECK_INTERRUPT_IN_MATCH;
continue;
break;
- case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG);
+ case OP_REPEAT_INC_SG: SOP_IN(OP_REPEAT_INC_SG);
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
STACK_GET_REPEAT(mem, stkp);
si = GET_STACK_INDEX(stkp);
goto repeat_inc;
break;
- case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG);
+ case OP_REPEAT_INC_NG: SOP_IN(OP_REPEAT_INC_NG);
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
si = repeat_stk[mem];
stkp = STACK_AT(si);
@@ -2908,68 +3696,68 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
STACK_PUSH_REPEAT_INC(si);
}
- MOP_OUT;
- CHECK_INTERRUPT_IN_MATCH_AT;
+ SOP_OUT;
+ CHECK_INTERRUPT_IN_MATCH;
continue;
break;
- case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG);
+ case OP_REPEAT_INC_NG_SG: SOP_IN(OP_REPEAT_INC_NG_SG);
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
STACK_GET_REPEAT(mem, stkp);
si = GET_STACK_INDEX(stkp);
goto repeat_inc_ng;
break;
- case OP_PREC_READ_START: MOP_IN(OP_PREC_READ_START);
+ case OP_PREC_READ_START: SOP_IN(OP_PREC_READ_START);
STACK_PUSH_POS(s, sprev);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_PREC_READ_END: MOP_IN(OP_PREC_READ_END);
+ case OP_PREC_READ_END: SOP_IN(OP_PREC_READ_END);
{
STACK_EXEC_TO_VOID(stkp);
s = stkp->u.state.pstr;
sprev = stkp->u.state.pstr_prev;
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_PREC_READ_NOT_START: MOP_IN(OP_PREC_READ_NOT_START);
+ case OP_PREC_READ_NOT_START: SOP_IN(OP_PREC_READ_NOT_START);
GET_RELADDR_INC(addr, p);
STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_PREC_READ_NOT_END: MOP_IN(OP_PREC_READ_NOT_END);
+ case OP_PREC_READ_NOT_END: SOP_IN(OP_PREC_READ_NOT_END);
STACK_POP_TIL_ALT_PREC_READ_NOT;
goto fail;
break;
- case OP_ATOMIC_START: MOP_IN(OP_ATOMIC_START);
+ case OP_ATOMIC_START: SOP_IN(OP_ATOMIC_START);
STACK_PUSH_TO_VOID_START;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_ATOMIC_END: MOP_IN(OP_ATOMIC_END);
+ case OP_ATOMIC_END: SOP_IN(OP_ATOMIC_END);
STACK_EXEC_TO_VOID(stkp);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND);
+ case OP_LOOK_BEHIND: SOP_IN(OP_LOOK_BEHIND);
GET_LENGTH_INC(tlen, p);
s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
if (IS_NULL(s)) goto fail;
sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_LOOK_BEHIND_NOT_START: MOP_IN(OP_LOOK_BEHIND_NOT_START);
+ case OP_LOOK_BEHIND_NOT_START: SOP_IN(OP_LOOK_BEHIND_NOT_START);
GET_RELADDR_INC(addr, p);
GET_LENGTH_INC(tlen, p);
q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
@@ -2984,33 +3772,33 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
s = q;
sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_LOOK_BEHIND_NOT_END: MOP_IN(OP_LOOK_BEHIND_NOT_END);
+ case OP_LOOK_BEHIND_NOT_END: SOP_IN(OP_LOOK_BEHIND_NOT_END);
STACK_POP_TIL_ALT_LOOK_BEHIND_NOT;
goto fail;
break;
#ifdef USE_CALL
- case OP_CALL: MOP_IN(OP_CALL);
+ case OP_CALL: SOP_IN(OP_CALL);
GET_ABSADDR_INC(addr, p);
STACK_PUSH_CALL_FRAME(p);
p = reg->p + addr;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_RETURN: MOP_IN(OP_RETURN);
+ case OP_RETURN: SOP_IN(OP_RETURN);
STACK_RETURN(p);
STACK_PUSH_RETURN;
- MOP_OUT;
+ SOP_OUT;
continue;
break;
#endif
- case OP_PUSH_SAVE_VAL: MOP_IN(OP_PUSH_SAVE_VAL);
+ case OP_PUSH_SAVE_VAL: SOP_IN(OP_PUSH_SAVE_VAL);
{
SaveType type;
GET_SAVE_TYPE_INC(type, p);
@@ -3029,11 +3817,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break;
}
}
- MOP_OUT;
+ SOP_OUT;
continue;
break;
- case OP_UPDATE_VAR: MOP_IN(OP_UPDATE_VAR);
+ case OP_UPDATE_VAR: SOP_IN(OP_UPDATE_VAR);
{
UpdateVarType type;
enum SaveType save_type;
@@ -3061,31 +3849,99 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break;
}
}
- MOP_OUT;
+ SOP_OUT;
+ continue;
+ break;
+
+#ifdef USE_CALLOUT
+ case OP_CALLOUT_CONTENTS: SOP_IN(OP_CALLOUT_CONTENTS);
+ of = ONIG_CALLOUT_OF_CONTENTS;
+ goto callout_common_entry;
+
+ SOP_OUT;
continue;
break;
+ case OP_CALLOUT_NAME: SOP_IN(OP_CALLOUT_NAME);
+ {
+ int call_result;
+ int name_id;
+ int num;
+ int in;
+ CalloutListEntry* e;
+ OnigCalloutFunc func;
+ OnigCalloutArgs args;
+
+ of = ONIG_CALLOUT_OF_NAME;
+ GET_MEMNUM_INC(name_id, p);
+
+ callout_common_entry:
+ GET_MEMNUM_INC(num, p);
+ e = onig_reg_callout_list_at(reg, num);
+ in = e->in;
+ if (of == ONIG_CALLOUT_OF_NAME) {
+ func = onig_get_callout_start_func(reg, num);
+ }
+ else {
+ name_id = ONIG_NON_NAME_ID;
+ func = msa->mp->progress_callout_of_contents;
+ }
+
+ if (IS_NOT_NULL(func) && (in & ONIG_CALLOUT_IN_PROGRESS) != 0) {
+ CALLOUT_BODY(func, ONIG_CALLOUT_IN_PROGRESS, name_id,
+ num, msa->mp->callout_user_data, args, call_result);
+ switch (call_result) {
+ case ONIG_CALLOUT_FAIL:
+ goto fail;
+ break;
+ case ONIG_CALLOUT_SUCCESS:
+ goto retraction_callout2;
+ break;
+ default: /* error code */
+ if (call_result > 0) {
+ call_result = ONIGERR_INVALID_ARGUMENT;
+ }
+ best_len = call_result;
+ goto finish;
+ break;
+ }
+ }
+ else {
+ retraction_callout2:
+ if ((in & ONIG_CALLOUT_IN_RETRACTION) != 0) {
+ if (of == ONIG_CALLOUT_OF_NAME) {
+ if (IS_NOT_NULL(func)) {
+ STACK_PUSH_CALLOUT_NAME(name_id, num, func);
+ }
+ }
+ else {
+ func = msa->mp->retraction_callout_of_contents;
+ if (IS_NOT_NULL(func)) {
+ STACK_PUSH_CALLOUT_CONTENTS(num, func);
+ }
+ }
+ }
+ }
+ }
+ SOP_OUT;
+ continue;
+ break;
+#endif
+
case OP_FINISH:
goto finish;
break;
fail:
- MOP_OUT;
+ SOP_OUT;
/* fall */
- case OP_FAIL: MOP_IN(OP_FAIL);
+ case OP_FAIL: SOP_IN(OP_FAIL);
STACK_POP;
p = stk->u.state.pcode;
s = stk->u.state.pstr;
sprev = stk->u.state.pstr_prev;
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- if (stk->u.state.state_check != 0) {
- stk->type = STK_STATE_CHECK_MARK;
- stk++;
- }
-#endif
-
- MOP_OUT;
+ CHECK_RETRY_LIMIT_IN_MATCH;
+ SOP_OUT;
continue;
break;
@@ -3113,6 +3969,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
unexpected_bytecode_error:
STACK_SAVE;
return ONIGERR_UNEXPECTED_BYTECODE;
+
+#ifdef USE_RETRY_LIMIT_IN_MATCH
+ retry_limit_in_match_over:
+ STACK_SAVE;
+ return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER;
+#endif
}
@@ -3423,23 +4285,30 @@ map_search_backward(OnigEncoding enc, UChar map[],
}
return (UChar* )NULL;
}
-
extern int
onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
OnigRegion* region, OnigOptionType option)
{
int r;
- UChar *prev;
- OnigMatchArg msa;
+ OnigMatchParam mp;
- MATCH_ARG_INIT(msa, reg, option, region, at);
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- {
- int offset = at - str;
- STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
- }
-#endif
+ onig_initialize_match_param(&mp);
+ r = onig_match_with_param(reg, str, end, at, region, option, &mp);
+ onig_free_match_param_content(&mp);
+ return r;
+}
+
+extern int
+onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
+ const UChar* at, OnigRegion* region, OnigOptionType option,
+ OnigMatchParam* mp)
+{
+ int r;
+ UChar *prev;
+ MatchArg msa;
+ ADJUST_MATCH_PARAM(reg, mp);
+ MATCH_ARG_INIT(msa, reg, option, region, at, mp);
if (region
#ifdef USE_POSIX_API_REGION_OPTION
&& !IS_POSIX_REGION(option)
@@ -3459,11 +4328,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
}
prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
- r = match_at(reg, str, end,
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
- end,
-#endif
- at, prev, &msa);
+ r = match_at(reg, str, end, end, at, prev, &msa);
}
end:
@@ -3497,23 +4362,23 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
retry:
switch (reg->optimize) {
- case ONIG_OPTIMIZE_EXACT:
+ case OPTIMIZE_EXACT:
p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
break;
- case ONIG_OPTIMIZE_EXACT_IC:
+ case OPTIMIZE_EXACT_IC:
p = slow_search_ic(reg->enc, reg->case_fold_flag,
reg->exact, reg->exact_end, p, end, range);
break;
- case ONIG_OPTIMIZE_EXACT_BM:
+ case OPTIMIZE_EXACT_BM:
p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
break;
- case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
+ case OPTIMIZE_EXACT_BM_NO_REV:
p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
break;
- case ONIG_OPTIMIZE_MAP:
+ case OPTIMIZE_MAP:
p = map_search(reg->enc, reg->map, p, range);
break;
}
@@ -3621,20 +4486,20 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
retry:
switch (reg->optimize) {
- case ONIG_OPTIMIZE_EXACT:
+ case OPTIMIZE_EXACT:
exact_method:
p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
range, adjrange, end, p);
break;
- case ONIG_OPTIMIZE_EXACT_IC:
+ case OPTIMIZE_EXACT_IC:
p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
reg->exact, reg->exact_end,
range, adjrange, end, p);
break;
- case ONIG_OPTIMIZE_EXACT_BM:
- case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
+ case OPTIMIZE_EXACT_BM:
+ case OPTIMIZE_EXACT_BM_NO_REV:
#ifdef USE_INT_MAP_BACKWARD
if (IS_NULL(reg->int_map_backward)) {
int r;
@@ -3653,7 +4518,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
#endif
break;
- case ONIG_OPTIMIZE_MAP:
+ case OPTIMIZE_MAP:
p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
break;
}
@@ -3725,12 +4590,25 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
OnigOptionType option)
{
int r;
+ OnigMatchParam mp;
+
+ onig_initialize_match_param(&mp);
+ r = onig_search_with_param(reg, str, end, start, range, region, option, &mp);
+ onig_free_match_param_content(&mp);
+ return r;
+
+}
+
+extern int
+onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
+ const UChar* start, const UChar* range, OnigRegion* region,
+ OnigOptionType option, OnigMatchParam* mp)
+{
+ int r;
UChar *s, *prev;
- OnigMatchArg msa;
+ MatchArg msa;
const UChar *orig_start = start;
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
const UChar *orig_range = range;
-#endif
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
@@ -3738,6 +4616,8 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
str, (int )(end - str), (int )(start - str), (int )(range - str));
#endif
+ ADJUST_MATCH_PARAM(reg, mp);
+
if (region
#ifdef USE_POSIX_API_REGION_OPTION
&& !IS_POSIX_REGION(option)
@@ -3757,7 +4637,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
}
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
#define MATCH_AND_RETURN_CHECK(upper_range) \
r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
@@ -3779,29 +4658,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
else goto finish; /* error */ \
}
#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
-#else
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-#define MATCH_AND_RETURN_CHECK(none) \
- r = match_at(reg, str, end, s, prev, &msa);\
- if (r != ONIG_MISMATCH) {\
- if (r >= 0) {\
- if (! IS_FIND_LONGEST(reg->options)) {\
- goto match;\
- }\
- }\
- else goto finish; /* error */ \
- }
-#else
-#define MATCH_AND_RETURN_CHECK(none) \
- r = match_at(reg, str, end, s, prev, &msa);\
- if (r != ONIG_MISMATCH) {\
- if (r >= 0) {\
- goto match;\
- }\
- else goto finish; /* error */ \
- }
-#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
-#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
/* anchor optimize: resume search range */
@@ -3886,7 +4742,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
goto end_buf;
}
}
- else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
+ else if ((reg->anchor & ANCHOR_ANYCHAR_INF_ML)) {
goto begin_position;
}
}
@@ -3902,11 +4758,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
s = (UChar* )start;
prev = (UChar* )NULL;
- MATCH_ARG_INIT(msa, reg, option, region, start);
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- msa.state_check_buff = (void* )0;
- msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
-#endif
+ MATCH_ARG_INIT(msa, reg, option, region, start, mp);
MATCH_AND_RETURN_CHECK(end);
goto mismatch;
}
@@ -3918,13 +4770,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
(int )(end - str), (int )(start - str), (int )(range - str));
#endif
- MATCH_ARG_INIT(msa, reg, option, region, orig_start);
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- {
- int offset = (MIN(start, range) - str);
- STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
- }
-#endif
+ MATCH_ARG_INIT(msa, reg, option, region, orig_start, mp);
s = (UChar* )start;
if (range > start) { /* forward search */
@@ -3933,7 +4779,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
else
prev = (UChar* )NULL;
- if (reg->optimize != ONIG_OPTIMIZE_NONE) {
+ if (reg->optimize != OPTIMIZE_NONE) {
UChar *sch_range, *low, *high, *low_prev;
sch_range = (UChar* )range;
@@ -3969,7 +4815,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
if (! forward_search_range(reg, str, end, s, sch_range,
&low, &high, (UChar** )NULL)) goto mismatch;
- if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
+ if ((reg->anchor & ANCHOR_ANYCHAR_INF) != 0) {
do {
MATCH_AND_RETURN_CHECK(orig_range);
prev = s;
@@ -3998,12 +4844,10 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
}
}
else { /* backward search */
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
if (orig_start < end)
orig_start += enclen(reg->enc, orig_start); /* is upper range */
-#endif
- if (reg->optimize != ONIG_OPTIMIZE_NONE) {
+ if (reg->optimize != OPTIMIZE_NONE) {
UChar *low, *high, *adjrange, *sch_start;
if (range < end)
@@ -4204,3 +5048,600 @@ onig_copy_encoding(OnigEncoding to, OnigEncoding from)
*to = *from;
}
+
+/* for callout functions */
+
+#ifdef USE_CALLOUT
+
+extern OnigCalloutFunc
+onig_get_progress_callout(void)
+{
+ return DefaultProgressCallout;
+}
+
+extern int
+onig_set_progress_callout(OnigCalloutFunc f)
+{
+ DefaultProgressCallout = f;
+ return ONIG_NORMAL;
+}
+
+extern OnigCalloutFunc
+onig_get_retraction_callout(void)
+{
+ return DefaultRetractionCallout;
+}
+
+extern int
+onig_set_retraction_callout(OnigCalloutFunc f)
+{
+ DefaultRetractionCallout = f;
+ return ONIG_NORMAL;
+}
+
+extern int
+onig_get_callout_num_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->num;
+}
+
+extern OnigCalloutIn
+onig_get_callout_in_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->in;
+}
+
+extern int
+onig_get_name_id_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->name_id;
+}
+
+extern const UChar*
+onig_get_contents_by_callout_args(OnigCalloutArgs* args)
+{
+ int num;
+ CalloutListEntry* e;
+
+ num = args->num;
+ e = onig_reg_callout_list_at(args->regex, num);
+ if (IS_NULL(e)) return 0;
+ if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
+ return e->u.content.start;
+ }
+
+ return 0;
+}
+
+extern const UChar*
+onig_get_contents_end_by_callout_args(OnigCalloutArgs* args)
+{
+ int num;
+ CalloutListEntry* e;
+
+ num = args->num;
+ e = onig_reg_callout_list_at(args->regex, num);
+ if (IS_NULL(e)) return 0;
+ if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
+ return e->u.content.end;
+ }
+
+ return 0;
+}
+
+extern int
+onig_get_args_num_by_callout_args(OnigCalloutArgs* args)
+{
+ int num;
+ CalloutListEntry* e;
+
+ num = args->num;
+ e = onig_reg_callout_list_at(args->regex, num);
+ if (IS_NULL(e)) return 0;
+ if (e->of == ONIG_CALLOUT_OF_NAME) {
+ return e->u.arg.num;
+ }
+
+ return ONIGERR_INVALID_ARGUMENT;
+}
+
+extern int
+onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args)
+{
+ int num;
+ CalloutListEntry* e;
+
+ num = args->num;
+ e = onig_reg_callout_list_at(args->regex, num);
+ if (IS_NULL(e)) return 0;
+ if (e->of == ONIG_CALLOUT_OF_NAME) {
+ return e->u.arg.passed_num;
+ }
+
+ return ONIGERR_INVALID_ARGUMENT;
+}
+
+extern int
+onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index,
+ OnigType* type, OnigValue* val)
+{
+ int num;
+ CalloutListEntry* e;
+
+ num = args->num;
+ e = onig_reg_callout_list_at(args->regex, num);
+ if (IS_NULL(e)) return 0;
+ if (e->of == ONIG_CALLOUT_OF_NAME) {
+ if (IS_NOT_NULL(type)) *type = e->u.arg.types[index];
+ if (IS_NOT_NULL(val)) *val = e->u.arg.vals[index];
+ return ONIG_NORMAL;
+ }
+
+ return ONIGERR_INVALID_ARGUMENT;
+}
+
+extern const UChar*
+onig_get_string_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->string;
+}
+
+extern const UChar*
+onig_get_string_end_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->string_end;
+}
+
+extern const UChar*
+onig_get_start_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->start;
+}
+
+extern const UChar*
+onig_get_right_range_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->right_range;
+}
+
+extern const UChar*
+onig_get_current_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->current;
+}
+
+extern OnigRegex
+onig_get_regex_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->regex;
+}
+
+extern unsigned long
+onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args)
+{
+ return args->retry_in_match_counter;
+}
+
+
+extern int
+onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, int* end)
+{
+ OnigRegex reg;
+ const UChar* str;
+ StackType* stk_base;
+ int i;
+
+ i = mem_num;
+ reg = a->regex;
+ str = a->string;
+ stk_base = a->stk_base;
+
+ if (i > 0) {
+ if (a->mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (MEM_STATUS_AT(reg->bt_mem_start, i))
+ *begin = (int )(STACK_AT(a->mem_start_stk[i])->u.mem.pstr - str);
+ else
+ *begin = (int )((UChar* )((void* )a->mem_start_stk[i]) - str);
+
+ *end = (int )((MEM_STATUS_AT(reg->bt_mem_end, i)
+ ? STACK_AT(a->mem_end_stk[i])->u.mem.pstr
+ : (UChar* )((void* )a->mem_end_stk[i])) - str);
+ }
+ else {
+ *begin = *end = ONIG_REGION_NOTPOS;
+ }
+ }
+ else if (i == 0) {
+#if 0
+ *begin = a->start - str;
+ *end = a->current - str;
+#else
+ return ONIGERR_INVALID_ARGUMENT;
+#endif
+ }
+ else
+ return ONIGERR_INVALID_ARGUMENT;
+
+ return ONIG_NORMAL;
+}
+
+extern int
+onig_get_used_stack_size_in_callout(OnigCalloutArgs* a, int* used_num, int* used_bytes)
+{
+ int n;
+
+ n = (int )(a->stk - a->stk_base);
+
+ if (used_num != 0)
+ *used_num = n;
+
+ if (used_bytes != 0)
+ *used_bytes = n * sizeof(StackType);
+
+ return ONIG_NORMAL;
+}
+
+
+/* builtin callout functions */
+
+extern int
+onig_builtin_fail(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
+{
+ return ONIG_CALLOUT_FAIL;
+}
+
+extern int
+onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
+{
+ return ONIG_MISMATCH;
+}
+
+#if 0
+extern int
+onig_builtin_success(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
+{
+ return ONIG_CALLOUT_SUCCESS;
+}
+#endif
+
+extern int
+onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
+{
+ int r;
+ int n;
+ OnigValue val;
+
+ r = onig_get_arg_by_callout_args(args, 0, 0, &val);
+ if (r != ONIG_NORMAL) return r;
+
+ n = (int )val.l;
+ if (n >= 0) {
+ n = ONIGERR_INVALID_CALLOUT_BODY;
+ }
+
+ return n;
+}
+
+extern int
+onig_builtin_count(OnigCalloutArgs* args, void* user_data)
+{
+ (void )onig_check_callout_data_and_clear_old_values(args);
+
+ return onig_builtin_total_count(args, user_data);
+}
+
+extern int
+onig_builtin_total_count(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
+{
+ int r;
+ int slot;
+ OnigType type;
+ OnigValue val;
+ OnigValue aval;
+ OnigCodePoint count_type;
+
+ r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
+ if (r != ONIG_NORMAL) return r;
+
+ count_type = aval.c;
+ if (count_type != '>' && count_type != 'X' && count_type != '<')
+ return ONIGERR_INVALID_CALLOUT_ARG;
+
+ r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, 0,
+ &type, &val);
+ if (r < ONIG_NORMAL)
+ return r;
+ else if (r > ONIG_NORMAL) {
+ /* type == void: initial state */
+ val.l = 0;
+ }
+
+ if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
+ slot = 2;
+ if (count_type == '<')
+ val.l++;
+ else if (count_type == 'X')
+ val.l--;
+ }
+ else {
+ slot = 1;
+ if (count_type != '<')
+ val.l++;
+ }
+
+ r = onig_set_callout_data_by_callout_args_self(args, 0, ONIG_TYPE_LONG, &val);
+ if (r != ONIG_NORMAL) return r;
+
+ /* slot 1: in progress counter, slot 2: in retraction counter */
+ r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, slot,
+ &type, &val);
+ if (r < ONIG_NORMAL)
+ return r;
+ else if (r > ONIG_NORMAL) {
+ val.l = 0;
+ }
+
+ val.l++;
+ r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
+ if (r != ONIG_NORMAL) return r;
+
+ return ONIG_CALLOUT_SUCCESS;
+}
+
+extern int
+onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
+{
+ int r;
+ int slot;
+ OnigType type;
+ OnigValue val;
+ OnigValue aval;
+
+ (void )onig_check_callout_data_and_clear_old_values(args);
+
+ slot = 0;
+ r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
+ if (r < ONIG_NORMAL)
+ return r;
+ else if (r > ONIG_NORMAL) {
+ /* type == void: initial state */
+ type = ONIG_TYPE_LONG;
+ val.l = 0;
+ }
+
+ r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
+ if (r != ONIG_NORMAL) return r;
+
+ if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
+ val.l--;
+ }
+ else {
+ if (val.l >= aval.l) return ONIG_CALLOUT_FAIL;
+ val.l++;
+ }
+
+ r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
+ if (r != ONIG_NORMAL) return r;
+
+ return ONIG_CALLOUT_SUCCESS;
+}
+
+enum OP_CMP {
+ OP_EQ,
+ OP_NE,
+ OP_LT,
+ OP_GT,
+ OP_LE,
+ OP_GE
+};
+
+extern int
+onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
+{
+ int r;
+ int slot;
+ long lv;
+ long rv;
+ OnigType type;
+ OnigValue val;
+ regex_t* reg;
+ enum OP_CMP op;
+
+ reg = args->regex;
+
+ r = onig_get_arg_by_callout_args(args, 0, &type, &val);
+ if (r != ONIG_NORMAL) return r;
+
+ if (type == ONIG_TYPE_TAG) {
+ r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
+ if (r < ONIG_NORMAL) return r;
+ else if (r > ONIG_NORMAL)
+ lv = 0L;
+ else
+ lv = val.l;
+ }
+ else { /* ONIG_TYPE_LONG */
+ lv = val.l;
+ }
+
+ r = onig_get_arg_by_callout_args(args, 2, &type, &val);
+ if (r != ONIG_NORMAL) return r;
+
+ if (type == ONIG_TYPE_TAG) {
+ r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
+ if (r < ONIG_NORMAL) return r;
+ else if (r > ONIG_NORMAL)
+ rv = 0L;
+ else
+ rv = val.l;
+ }
+ else { /* ONIG_TYPE_LONG */
+ rv = val.l;
+ }
+
+ slot = 0;
+ r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
+ if (r < ONIG_NORMAL)
+ return r;
+ else if (r > ONIG_NORMAL) {
+ /* type == void: initial state */
+ OnigCodePoint c1, c2;
+ UChar* p;
+
+ r = onig_get_arg_by_callout_args(args, 1, &type, &val);
+ if (r != ONIG_NORMAL) return r;
+
+ p = val.s.start;
+ c1 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
+ p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
+ if (p < val.s.end) {
+ c2 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
+ p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
+ if (p != val.s.end) return ONIGERR_INVALID_CALLOUT_ARG;
+ }
+ else
+ c2 = 0;
+
+ switch (c1) {
+ case '=':
+ if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
+ op = OP_EQ;
+ break;
+ case '!':
+ if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
+ op = OP_NE;
+ break;
+ case '<':
+ if (c2 == '=') op = OP_LE;
+ else if (c2 == 0) op = OP_LT;
+ else return ONIGERR_INVALID_CALLOUT_ARG;
+ break;
+ case '>':
+ if (c2 == '=') op = OP_GE;
+ else if (c2 == 0) op = OP_GT;
+ else return ONIGERR_INVALID_CALLOUT_ARG;
+ break;
+ default:
+ return ONIGERR_INVALID_CALLOUT_ARG;
+ break;
+ }
+ val.l = (long )op;
+ r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
+ if (r != ONIG_NORMAL) return r;
+ }
+ else {
+ op = (enum OP_CMP )val.l;
+ }
+
+ switch (op) {
+ case OP_EQ: r = (lv == rv); break;
+ case OP_NE: r = (lv != rv); break;
+ case OP_LT: r = (lv < rv); break;
+ case OP_GT: r = (lv > rv); break;
+ case OP_LE: r = (lv <= rv); break;
+ case OP_GE: r = (lv >= rv); break;
+ }
+
+ return r == 0 ? ONIG_CALLOUT_FAIL : ONIG_CALLOUT_SUCCESS;
+}
+
+
+#include <stdio.h>
+
+static FILE* OutFp;
+
+/* name start with "onig_" for macros. */
+static int
+onig_builtin_monitor(OnigCalloutArgs* args, void* user_data)
+{
+ int r;
+ int num;
+ size_t tag_len;
+ const UChar* start;
+ const UChar* right;
+ const UChar* current;
+ const UChar* string;
+ const UChar* strend;
+ const UChar* tag_start;
+ const UChar* tag_end;
+ regex_t* reg;
+ OnigCalloutIn in;
+ OnigType type;
+ OnigValue val;
+ char buf[20];
+ FILE* fp;
+
+ fp = OutFp;
+
+ r = onig_get_arg_by_callout_args(args, 0, &type, &val);
+ if (r != ONIG_NORMAL) return r;
+
+ in = onig_get_callout_in_by_callout_args(args);
+ if (in == ONIG_CALLOUT_IN_PROGRESS) {
+ if (val.c == '<')
+ return ONIG_CALLOUT_SUCCESS;
+ }
+ else {
+ if (val.c != 'X' && val.c != '<')
+ return ONIG_CALLOUT_SUCCESS;
+ }
+
+ num = onig_get_callout_num_by_callout_args(args);
+ start = onig_get_start_by_callout_args(args);
+ right = onig_get_right_range_by_callout_args(args);
+ current = onig_get_current_by_callout_args(args);
+ string = onig_get_string_by_callout_args(args);
+ strend = onig_get_string_end_by_callout_args(args);
+ reg = onig_get_regex_by_callout_args(args);
+ tag_start = onig_get_callout_tag_start(reg, num);
+ tag_end = onig_get_callout_tag_end(reg, num);
+
+ if (tag_start == 0)
+ xsnprintf(buf, sizeof(buf), "#%d", num);
+ else {
+ /* CAUTION: tag string is not terminated with NULL. */
+ int i;
+
+ tag_len = tag_end - tag_start;
+ if (tag_len >= sizeof(buf)) tag_len = sizeof(buf) - 1;
+ for (i = 0; i < tag_len; i++) buf[i] = tag_start[i];
+ buf[tag_len] = '\0';
+ }
+
+ fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n",
+ buf,
+ in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=",
+ (int )(current - string),
+ (int )(start - string),
+ (int )(right - string),
+ (int )(strend - string));
+ fflush(fp);
+
+ return ONIG_CALLOUT_SUCCESS;
+}
+
+extern int
+onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */)
+{
+ int id;
+ char* name;
+ OnigEncoding enc;
+ unsigned int ts[4];
+ OnigValue opts[4];
+
+ if (IS_NOT_NULL(fp))
+ OutFp = (FILE* )fp;
+ else
+ OutFp = stdout;
+
+ enc = ONIG_ENCODING_ASCII;
+
+ name = "MON";
+ ts[0] = ONIG_TYPE_CHAR;
+ opts[0].c = '>';
+ BC_B_O(name, monitor, 1, ts, 1, opts);
+
+ return ONIG_NORMAL;
+}
+
+#endif /* USE_CALLOUT */
diff --git a/src/reggnu.c b/src/reggnu.c
index 50eb9b4..37c7519 100644
--- a/src/reggnu.c
+++ b/src/reggnu.c
@@ -2,7 +2,7 @@
reggnu.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,10 +28,7 @@
*/
#include "regint.h"
-
-#ifndef ONIGGNU_H
#include "oniggnu.h"
-#endif
extern void
re_free_registers(OnigRegion* r)
@@ -140,8 +137,7 @@ re_mbcinit(int mb_code)
break;
}
- onig_initialize(0, 0);
- onig_initialize_encoding(enc);
+ onig_initialize(&enc, 1);
onigenc_set_default_encoding(enc);
}
diff --git a/src/regint.h b/src/regint.h
index 256b045..ba8407a 100644
--- a/src/regint.h
+++ b/src/regint.h
@@ -4,7 +4,7 @@
regint.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -57,29 +57,48 @@
/* config */
/* spec. config */
#define USE_CALL
+#define USE_CALLOUT
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
#define USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
+#define USE_RETRY_LIMIT_IN_MATCH
+
/* internal config */
#define USE_OP_PUSH_OR_JUMP_EXACT
#define USE_QUANT_PEEK_NEXT
#define USE_ST_LIBRARY
+#include "regenc.h"
+
+#ifdef __cplusplus
+# ifndef HAVE_STDARG_PROTOTYPES
+# define HAVE_STDARG_PROTOTYPES 1
+# endif
+#endif
+
+/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
+#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
+# ifndef HAVE_STDARG_PROTOTYPES
+# define HAVE_STDARG_PROTOTYPES 1
+# endif
+#endif
+
+#ifdef HAVE_STDARG_H
+# ifndef HAVE_STDARG_PROTOTYPES
+# define HAVE_STDARG_PROTOTYPES 1
+# endif
+#endif
+
+
#define INIT_MATCH_STACK_SIZE 160
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
+#define DEFAULT_RETRY_LIMIT_IN_MATCH 10000000
#define DEFAULT_PARSE_DEPTH_LIMIT 4096
-#if defined(__GNUC__)
-# define ARG_UNUSED __attribute__ ((unused))
-#else
-# define ARG_UNUSED
-#endif
-
/* */
/* escape other system UChar definition */
-#include "config.h"
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
#undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
@@ -89,15 +108,12 @@
#define USE_VARIABLE_META_CHARS
#define USE_POSIX_API_REGION_OPTION
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
#define xmalloc malloc
#define xrealloc realloc
#define xcalloc calloc
#define xfree free
-#define CHECK_INTERRUPT_IN_MATCH_AT
-
#define st_init_table onig_st_init_table
#define st_init_table_with_size onig_st_init_table_with_size
#define st_init_numtable onig_st_init_numtable
@@ -118,9 +134,6 @@
/* */
#define onig_st_is_member st_is_member
-#define STATE_CHECK_STRING_THRESHOLD_LEN 7
-#define STATE_CHECK_BUFF_MAX_SIZE 0x4000
-
#define xmemset memset
#define xmemcpy memcpy
#define xmemmove memmove
@@ -140,6 +153,10 @@
#include <stddef.h>
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#endif
+
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
@@ -184,8 +201,6 @@ typedef unsigned int uintptr_t;
#endif
#endif
-#include "regenc.h"
-
#ifdef MIN
#undef MIN
#endif
@@ -237,14 +252,93 @@ typedef unsigned int uintptr_t;
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
+
+#ifdef USE_CALLOUT
+
+typedef struct {
+ int flag;
+ OnigCalloutOf of;
+ int in;
+ int name_id;
+ const UChar* tag_start;
+ const UChar* tag_end;
+ OnigCalloutType type;
+ OnigCalloutFunc start_func;
+ OnigCalloutFunc end_func;
+ union {
+ struct {
+ const UChar* start;
+ const UChar* end;
+ } content;
+ struct {
+ int num;
+ int passed_num;
+ OnigType types[ONIG_CALLOUT_MAX_ARGS_NUM];
+ OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM];
+ } arg;
+ } u;
+} CalloutListEntry;
+
+#endif
+
typedef struct {
- int num_keeper;
- int* keepers;
-} RegExt;
+ const UChar* pattern;
+ const UChar* pattern_end;
+#ifdef USE_CALLOUT
+ void* tag_table;
+ int callout_num;
+ int callout_list_alloc;
+ CalloutListEntry* callout_list; /* index: callout num */
+#endif
+} RegexExt;
-#define REG_EXTP(reg) (RegExt* )((reg)->chain)
+#define REG_EXTP(reg) ((RegexExt* )((reg)->chain))
#define REG_EXTPL(reg) ((reg)->chain)
+struct re_pattern_buffer {
+ /* common members of BBuf(bytes-buffer) */
+ unsigned char* p; /* compiled pattern */
+ unsigned int used; /* used space for p */
+ unsigned int alloc; /* allocated space for p */
+
+ int num_mem; /* used memory(...) num counted from 1 */
+ int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
+ int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */
+ int num_comb_exp_check; /* no longer used (combination explosion check) */
+ int num_call; /* number of subexp call */
+ unsigned int capture_history; /* (?@...) flag (1-31) */
+ unsigned int bt_mem_start; /* need backtrack flag */
+ unsigned int bt_mem_end; /* need backtrack flag */
+ int stack_pop_level;
+ int repeat_range_alloc;
+ OnigRepeatRange* repeat_range;
+
+ OnigEncoding enc;
+ OnigOptionType options;
+ OnigSyntaxType* syntax;
+ OnigCaseFoldType case_fold_flag;
+ void* name_table;
+
+ /* optimization info (string search, char-map and anchors) */
+ int optimize; /* optimize flag */
+ int threshold_len; /* search str-length for apply optimize */
+ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
+ OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */
+ OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */
+ int sub_anchor; /* start-anchor for exact or map */
+ unsigned char *exact;
+ unsigned char *exact_end;
+ unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
+ int *int_map; /* BM skip for exact_len > 255 */
+ int *int_map_backward; /* BM skip for backward search */
+ OnigLen dmin; /* min-distance of exact or map */
+ OnigLen dmax; /* max-distance of exact or map */
+
+ /* regex_t link chain */
+ struct re_pattern_buffer* chain; /* escape compile-conflict */
+};
+
+
/* stack pop level */
enum StackPopLevel {
STACK_POP_LEVEL_FREE = 0,
@@ -253,12 +347,14 @@ enum StackPopLevel {
};
/* optimize flags */
-#define ONIG_OPTIMIZE_NONE 0
-#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */
-#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */
-#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */
-#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */
-#define ONIG_OPTIMIZE_MAP 5 /* char map */
+enum OptimizeType {
+ OPTIMIZE_NONE = 0,
+ OPTIMIZE_EXACT = 1, /* Slow Search */
+ OPTIMIZE_EXACT_BM = 2, /* Boyer Moore Search */
+ OPTIMIZE_EXACT_BM_NO_REV = 3, /* BM (but not simple match) */
+ OPTIMIZE_EXACT_IC = 4, /* Slow Search (ignore case) */
+ OPTIMIZE_MAP = 5 /* char map */
+};
/* bit status */
typedef unsigned int MemStatusType;
@@ -467,8 +563,8 @@ typedef struct _BBuf {
#define ANCHOR_NO_WORD_BOUNDARY (1<<11)
#define ANCHOR_WORD_BEGIN (1<<12)
#define ANCHOR_WORD_END (1<<13)
-#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */
-#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */
+#define ANCHOR_ANYCHAR_INF (1<<14)
+#define ANCHOR_ANYCHAR_INF_ML (1<<15)
#define ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<16)
#define ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<17)
@@ -557,7 +653,7 @@ enum OpCode {
OP_JUMP,
OP_PUSH,
OP_PUSH_SUPER,
- OP_POP,
+ OP_POP_OUT,
OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
OP_REPEAT, /* {n,m} */
@@ -581,16 +677,14 @@ enum OpCode {
OP_LOOK_BEHIND_NOT_START, /* (?<!...) start */
OP_LOOK_BEHIND_NOT_END, /* (?<!...) end */
- OP_CALL, /* \g<name> */
+ OP_CALL, /* \g<name> */
OP_RETURN,
OP_PUSH_SAVE_VAL,
OP_UPDATE_VAR,
-
- OP_STATE_CHECK_PUSH, /* combination explosion check and push */
- OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
- OP_STATE_CHECK, /* check only */
- OP_STATE_CHECK_ANYCHAR_STAR,
- OP_STATE_CHECK_ANYCHAR_ML_STAR,
+#ifdef USE_CALLOUT
+ OP_CALLOUT_CONTENTS, /* (?{...}) (?{{...}}) */
+ OP_CALLOUT_NAME, /* (*name) (*name[tag](args...)) */
+#endif
/* no need: IS_DYNAMIC_OPTION() == 0 */
OP_SET_OPTION_PUSH, /* set option and push recover option */
@@ -627,7 +721,6 @@ typedef int ModeType;
#define SIZE_ABSADDR sizeof(AbsAddrType)
#define SIZE_LENGTH sizeof(LengthType)
#define SIZE_MEMNUM sizeof(MemNumType)
-#define SIZE_STATE_CHECK_NUM sizeof(StateCheckNumType)
#define SIZE_REPEATNUM sizeof(RepeatNumType)
#define SIZE_OPTION sizeof(OnigOptionType)
#define SIZE_CODE_POINT sizeof(OnigCodePoint)
@@ -643,7 +736,6 @@ typedef int ModeType;
#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
-#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType)
#define GET_SAVE_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, SaveType)
#define GET_UPDATE_VAR_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, UpdateVarType)
#define GET_MODE_INC(mode,p) PLATFORM_GET_INC(mode, p, ModeType)
@@ -662,7 +754,7 @@ typedef int ModeType;
#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_PUSH_SUPER (SIZE_OPCODE + SIZE_RELADDR)
-#define SIZE_OP_POP SIZE_OPCODE
+#define SIZE_OP_POP_OUT SIZE_OPCODE
#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1)
#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1)
#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
@@ -693,11 +785,9 @@ typedef int ModeType;
#define SIZE_OP_PUSH_SAVE_VAL (SIZE_OPCODE + SIZE_SAVE_TYPE + SIZE_MEMNUM)
#define SIZE_OP_UPDATE_VAR (SIZE_OPCODE + SIZE_UPDATE_VAR_TYPE + SIZE_MEMNUM)
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
-#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
-#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
-#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
+#ifdef USE_CALLOUT
+#define SIZE_OP_CALLOUT_CONTENTS (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_CALLOUT_NAME (SIZE_OPCODE + SIZE_MEMNUM + SIZE_MEMNUM)
#endif
#define MC_ESC(syn) (syn)->meta_char_table.esc
@@ -751,44 +841,14 @@ typedef int ModeType;
#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
-typedef struct {
- void* stack_p;
- int stack_n;
- OnigOptionType options;
- OnigRegion* region;
- int ptr_num;
- const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
- int best_len; /* for ONIG_OPTION_FIND_LONGEST */
- UChar* best_s;
-#endif
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- void* state_check_buff;
- int state_check_buff_size;
-#endif
-} OnigMatchArg;
-
-
-typedef struct OnigEndCallListItem {
- struct OnigEndCallListItem* next;
- void (*func)(void);
-} OnigEndCallListItemType;
-
extern void onig_add_end_call(void (*func)(void));
#ifdef ONIG_DEBUG
-typedef struct {
- short int opcode;
- char* name;
- short int arg_type;
-} OnigOpInfoType;
-
-extern OnigOpInfoType OnigOpInfo[];
-
-
-extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, UChar* start, OnigEncoding enc));
+#ifdef ONIG_DEBUG_COMPILE
+extern void onig_print_compiled_byte_code_list(FILE* f, regex_t* reg);
+#endif
#ifdef ONIG_DEBUG_STATISTICS
extern void onig_statistics_init P_((void));
@@ -803,6 +863,85 @@ extern int onig_bbuf_init P_((BBuf* buf, int size));
extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_transfer P_((regex_t* to, regex_t* from));
extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc));
+extern RegexExt* onig_get_regex_ext(regex_t* reg);
+extern int onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end);
+
+#ifdef USE_CALLOUT
+
+extern OnigCalloutType onig_get_callout_type_by_name_id(int name_id);
+extern OnigCalloutFunc onig_get_callout_start_func_by_name_id(int id);
+extern OnigCalloutFunc onig_get_callout_end_func_by_name_id(int id);
+extern int onig_callout_tag_table_free(void* table);
+extern void onig_free_reg_callout_list(int n, CalloutListEntry* list);
+extern CalloutListEntry* onig_reg_callout_list_at(regex_t* reg, int num);
+extern OnigCalloutFunc onig_get_callout_start_func(regex_t* reg, int callout_num);
+
+/* for definition of builtin callout */
+#define BC0_P(name, func) do {\
+ int len = onigenc_str_bytelen_null(enc, (UChar* )name);\
+ id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\
+ (UChar* )(name), (UChar* )((name) + len),\
+ ONIG_CALLOUT_IN_PROGRESS,\
+ onig_builtin_ ## func, 0, 0, 0, 0, 0);\
+ if (id < 0) return id;\
+} while(0)
+
+#define BC0_R(name, func) do {\
+ int len = onigenc_str_bytelen_null(enc, (UChar* )name);\
+ id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\
+ (UChar* )(name), (UChar* )((name) + len),\
+ ONIG_CALLOUT_IN_RETRACTION,\
+ onig_builtin_ ## func, 0, 0, 0, 0, 0);\
+ if (id < 0) return id;\
+} while(0)
+
+#define BC0_B(name, func) do {\
+ int len = onigenc_str_bytelen_null(enc, (UChar* )name);\
+ id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\
+ (UChar* )(name), (UChar* )((name) + len),\
+ ONIG_CALLOUT_IN_BOTH,\
+ onig_builtin_ ## func, 0, 0, 0, 0, 0);\
+ if (id < 0) return id;\
+} while(0)
+
+#define BC_P(name, func, na, ts) do {\
+ int len = onigenc_str_bytelen_null(enc, (UChar* )name);\
+ id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\
+ (UChar* )(name), (UChar* )((name) + len),\
+ ONIG_CALLOUT_IN_PROGRESS,\
+ onig_builtin_ ## func, 0, (na), (ts), 0, 0); \
+ if (id < 0) return id;\
+} while(0)
+
+#define BC_P_O(name, func, nts, ts, nopts, opts) do {\
+ int len = onigenc_str_bytelen_null(enc, (UChar* )name);\
+ id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\
+ (UChar* )(name), (UChar* )((name) + len),\
+ ONIG_CALLOUT_IN_PROGRESS,\
+ onig_builtin_ ## func, 0, (nts), (ts), (nopts), (opts));\
+ if (id < 0) return id;\
+} while(0)
+
+#define BC_B(name, func, na, ts) do {\
+ int len = onigenc_str_bytelen_null(enc, (UChar* )name);\
+ id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\
+ (UChar* )(name), (UChar* )((name) + len),\
+ ONIG_CALLOUT_IN_BOTH,\
+ onig_builtin_ ## func, 0, (na), (ts), 0, 0);\
+ if (id < 0) return id;\
+} while(0)
+
+#define BC_B_O(name, func, nts, ts, nopts, opts) do {\
+ int len = onigenc_str_bytelen_null(enc, (UChar* )name);\
+ id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\
+ (UChar* )(name), (UChar* )((name) + len),\
+ ONIG_CALLOUT_IN_BOTH,\
+ onig_builtin_ ## func, 0, (nts), (ts), (nopts), (opts));\
+ if (id < 0) return id;\
+} while(0)
+
+#endif /* USE_CALLOUT */
+
/* strend hash */
typedef void hash_table_type;
diff --git a/src/regparse.c b/src/regparse.c
index 1e4dc30..6e95a14 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -2,7 +2,7 @@
regparse.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,6 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
+
#include "regparse.h"
#include "st.h"
@@ -33,10 +34,17 @@
#include <stdio.h>
#endif
+#define INIT_TAG_NAMES_ALLOC_NUM 5
+
#define WARN_BUFSIZE 256
#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
+#define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \
+ ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)
+#define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \
+ ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')
+
OnigSyntaxType OnigSyntaxOniguruma = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
@@ -50,6 +58,8 @@ OnigSyntaxType OnigSyntaxOniguruma = {
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
+ ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
+ ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |
ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |
ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
@@ -188,6 +198,16 @@ onig_set_parse_depth_limit(unsigned int depth)
return 0;
}
+static int
+positive_int_multiply(int x, int y)
+{
+ if (x == 0 || y == 0) return 0;
+
+ if (x < INT_MAX / y)
+ return x * y;
+ else
+ return -1;
+}
static void
bbuf_free(BBuf* bbuf)
@@ -331,25 +351,6 @@ onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
}
}
-static UChar*
-strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
-{
- int slen, term_len, i;
- UChar *r;
-
- slen = (int )(end - s);
- term_len = ONIGENC_MBC_MINLEN(enc);
-
- r = (UChar* )xmalloc(slen + term_len);
- CHECK_NULL_RETURN(r);
- xmemcpy(r, s, slen);
-
- for (i = 0; i < term_len; i++)
- r[slen + i] = (UChar )0;
-
- return r;
-}
-
static int
save_entry(ScanEnv* env, enum SaveType type, int* id)
{
@@ -521,6 +522,106 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
return result;
}
+
+typedef struct {
+ OnigEncoding enc;
+ int type; // callout type: single or not
+ UChar* s;
+ UChar* end;
+} st_callout_name_key;
+
+static int
+callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)
+{
+ UChar *p, *q;
+ int c;
+
+ if (x->enc != y->enc) return 1;
+ if (x->type != y->type) return 1;
+ if ((x->end - x->s) != (y->end - y->s))
+ return 1;
+
+ p = x->s;
+ q = y->s;
+ while (p < x->end) {
+ c = (int )*p - (int )*q;
+ if (c != 0) return c;
+
+ p++; q++;
+ }
+
+ return 0;
+}
+
+static int
+callout_name_table_hash(st_callout_name_key* x)
+{
+ UChar *p;
+ int val = 0;
+
+ p = x->s;
+ while (p < x->end) {
+ val = val * 997 + (int )*p++;
+ }
+
+ /* use intptr_t for escape warning in Windows */
+ return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type;
+}
+
+extern hash_table_type*
+onig_st_init_callout_name_table_with_size(int size)
+{
+ static struct st_hash_type hashType = {
+ callout_name_table_cmp,
+ callout_name_table_hash,
+ };
+
+ return (hash_table_type* )
+ onig_st_init_table_with_size(&hashType, size);
+}
+
+extern int
+onig_st_lookup_callout_name_table(hash_table_type* table,
+ OnigEncoding enc,
+ int type,
+ const UChar* str_key,
+ const UChar* end_key,
+ hash_data_type *value)
+{
+ st_callout_name_key key;
+
+ key.enc = enc;
+ key.type = type;
+ key.s = (UChar* )str_key;
+ key.end = (UChar* )end_key;
+
+ return onig_st_lookup(table, (st_data_t )(&key), value);
+}
+
+static int
+st_insert_callout_name_table(hash_table_type* table,
+ OnigEncoding enc, int type,
+ UChar* str_key, UChar* end_key,
+ hash_data_type value)
+{
+ st_callout_name_key* key;
+ int result;
+
+ key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));
+ CHECK_NULL_RETURN_MEMERR(key);
+
+ /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */
+ key->enc = enc;
+ key->type = type;
+ key->s = str_key;
+ key->end = end_key;
+ result = onig_st_insert(table, (st_data_t )key, value);
+ if (result) {
+ xfree(key);
+ }
+ return result;
+}
+
#endif /* USE_ST_LIBRARY */
@@ -537,6 +638,8 @@ typedef struct {
#ifdef USE_ST_LIBRARY
+#define INIT_NAMES_ALLOC_NUM 5
+
typedef st_table NameTable;
typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
@@ -862,13 +965,13 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
if (IS_NULL(e)) {
#ifdef USE_ST_LIBRARY
if (IS_NULL(t)) {
- t = onig_st_init_strend_table_with_size(5);
+ t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);
reg->name_table = (void* )t;
}
e = (NameEntry* )xmalloc(sizeof(NameEntry));
CHECK_NULL_RETURN_MEMERR(e);
- e->name = strdup_with_null(reg->enc, name, name_end);
+ e->name = onigenc_strdup(reg->enc, name, name_end);
if (IS_NULL(e->name)) {
xfree(e); return ONIGERR_MEMORY;
}
@@ -919,7 +1022,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
}
e = &(t->e[t->num]);
t->num++;
- e->name = strdup_with_null(reg->enc, name, name_end);
+ e->name = onigenc_strdup(reg->enc, name, name_end);
if (IS_NULL(e->name)) return ONIGERR_MEMORY;
e->name_len = name_end - name;
#endif
@@ -1019,6 +1122,781 @@ onig_noname_group_capture_is_active(regex_t* reg)
return 1;
}
+#ifdef USE_CALLOUT
+
+typedef struct {
+ OnigCalloutType type;
+ int in;
+ OnigCalloutFunc start_func;
+ OnigCalloutFunc end_func;
+ int arg_num;
+ int opt_arg_num;
+ unsigned int arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];
+ OnigValue opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];
+ UChar* name; /* reference to GlobalCalloutNameTable entry: e->name */
+} CalloutNameListEntry;
+
+typedef struct {
+ int n;
+ int alloc;
+ CalloutNameListEntry* v;
+} CalloutNameListType;
+
+static CalloutNameListType* GlobalCalloutNameList;
+
+static int
+make_callout_func_list(CalloutNameListType** rs, int init_size)
+{
+ CalloutNameListType* s;
+ CalloutNameListEntry* v;
+
+ *rs = 0;
+
+ s = xmalloc(sizeof(*s));
+ if (IS_NULL(s)) return ONIGERR_MEMORY;
+
+ v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);
+ if (IS_NULL(v)) {
+ xfree(s);
+ return ONIGERR_MEMORY;
+ }
+
+ s->n = 0;
+ s->alloc = init_size;
+ s->v = v;
+
+ *rs = s;
+ return ONIG_NORMAL;
+}
+
+static void
+free_callout_func_list(CalloutNameListType* s)
+{
+ if (IS_NOT_NULL(s)) {
+ if (IS_NOT_NULL(s->v)) {
+ int i, j;
+
+ for (i = 0; i < s->n; i++) {
+ CalloutNameListEntry* e = s->v + i;
+ for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {
+ if (e->arg_types[j] == ONIG_TYPE_STRING) {
+ UChar* p = e->opt_defaults[j].s.start;
+ if (IS_NOT_NULL(p)) xfree(p);
+ }
+ }
+ }
+ xfree(s->v);
+ }
+ xfree(s);
+ }
+}
+
+static int
+callout_func_list_add(CalloutNameListType* s, int* rid)
+{
+ if (s->n >= s->alloc) {
+ int new_size = s->alloc * 2;
+ CalloutNameListEntry* nv = (CalloutNameListEntry* )
+ xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size);
+ if (IS_NULL(nv)) return ONIGERR_MEMORY;
+
+ s->alloc = new_size;
+ s->v = nv;
+ }
+
+ *rid = s->n;
+
+ xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));
+ s->n++;
+ return ONIG_NORMAL;
+}
+
+
+typedef struct {
+ UChar* name;
+ int name_len; /* byte length */
+ int id;
+} CalloutNameEntry;
+
+#ifdef USE_ST_LIBRARY
+typedef st_table CalloutNameTable;
+#else
+typedef struct {
+ CalloutNameEntry* e;
+ int num;
+ int alloc;
+} CalloutNameTable;
+#endif
+
+static CalloutNameTable* GlobalCalloutNameTable;
+static int CalloutNameIDCounter;
+
+#ifdef USE_ST_LIBRARY
+
+static int
+i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,
+ void* arg ARG_UNUSED)
+{
+ xfree(e->name);
+ /*xfree(key->s); */ /* is same as e->name */
+ xfree(key);
+ xfree(e);
+ return ST_DELETE;
+}
+
+static int
+callout_name_table_clear(CalloutNameTable* t)
+{
+ if (IS_NOT_NULL(t)) {
+ onig_st_foreach(t, i_free_callout_name_entry, 0);
+ }
+ return 0;
+}
+
+static int
+global_callout_name_table_free(void)
+{
+ if (IS_NOT_NULL(GlobalCalloutNameTable)) {
+ int r = callout_name_table_clear(GlobalCalloutNameTable);
+ if (r != 0) return r;
+
+ onig_st_free_table(GlobalCalloutNameTable);
+ GlobalCalloutNameTable = 0;
+ CalloutNameIDCounter = 0;
+ }
+
+ return 0;
+}
+
+static CalloutNameEntry*
+callout_name_find(OnigEncoding enc, int is_not_single,
+ const UChar* name, const UChar* name_end)
+{
+ int r;
+ CalloutNameEntry* e;
+ CalloutNameTable* t = GlobalCalloutNameTable;
+
+ e = (CalloutNameEntry* )NULL;
+ if (IS_NOT_NULL(t)) {
+ r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
+ (HashDataType* )((void* )(&e)));
+ if (r == 0) { /* not found */
+ if (enc != ONIG_ENCODING_ASCII &&
+ ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
+ enc = ONIG_ENCODING_ASCII;
+ onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
+ (HashDataType* )((void* )(&e)));
+ }
+ }
+ }
+ return e;
+}
+
+#else
+
+static int
+callout_name_table_clear(CalloutNameTable* t)
+{
+ int i;
+ CalloutNameEntry* e;
+
+ if (IS_NOT_NULL(t)) {
+ for (i = 0; i < t->num; i++) {
+ e = &(t->e[i]);
+ if (IS_NOT_NULL(e->name)) {
+ xfree(e->name);
+ e->name = NULL;
+ e->name_len = 0;
+ e->id = 0;
+ e->func = 0;
+ }
+ }
+ if (IS_NOT_NULL(t->e)) {
+ xfree(t->e);
+ t->e = NULL;
+ }
+ t->num = 0;
+ }
+ return 0;
+}
+
+static int
+global_callout_name_table_free(void)
+{
+ if (IS_NOT_NULL(GlobalCalloutNameTable)) {
+ int r = callout_name_table_clear(GlobalCalloutNameTable);
+ if (r != 0) return r;
+
+ xfree(GlobalCalloutNameTable);
+ GlobalCalloutNameTable = 0;
+ CalloutNameIDCounter = 0;
+ }
+ return 0;
+}
+
+static CalloutNameEntry*
+callout_name_find(UChar* name, UChar* name_end)
+{
+ int i, len;
+ CalloutNameEntry* e;
+ CalloutNameTable* t = Calloutnames;
+
+ if (IS_NOT_NULL(t)) {
+ len = name_end - name;
+ for (i = 0; i < t->num; i++) {
+ e = &(t->e[i]);
+ if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
+ return e;
+ }
+ }
+ return (CalloutNameEntry* )NULL;
+}
+
+#endif
+
+/* name string must be single byte char string. */
+static int
+callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,
+ int is_not_single, UChar* name, UChar* name_end)
+{
+ int r;
+ CalloutNameEntry* e;
+ CalloutNameTable* t = GlobalCalloutNameTable;
+
+ *rentry = 0;
+ if (name_end - name <= 0)
+ return ONIGERR_INVALID_CALLOUT_NAME;
+
+ e = callout_name_find(enc, is_not_single, name, name_end);
+ if (IS_NULL(e)) {
+#ifdef USE_ST_LIBRARY
+ if (IS_NULL(t)) {
+ t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);
+ GlobalCalloutNameTable = t;
+ }
+ e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));
+ CHECK_NULL_RETURN_MEMERR(e);
+
+ e->name = onigenc_strdup(enc, name, name_end);
+ if (IS_NULL(e->name)) {
+ xfree(e); return ONIGERR_MEMORY;
+ }
+
+ r = st_insert_callout_name_table(t, enc, is_not_single,
+ e->name, (e->name + (name_end - name)),
+ (HashDataType )e);
+ if (r < 0) return r;
+
+#else
+
+ int alloc;
+
+ if (IS_NULL(t)) {
+ alloc = INIT_NAMES_ALLOC_NUM;
+ t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));
+ CHECK_NULL_RETURN_MEMERR(t);
+ t->e = NULL;
+ t->alloc = 0;
+ t->num = 0;
+
+ t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);
+ if (IS_NULL(t->e)) {
+ xfree(t);
+ return ONIGERR_MEMORY;
+ }
+ t->alloc = alloc;
+ GlobalCalloutNameTable = t;
+ goto clear;
+ }
+ else if (t->num == t->alloc) {
+ int i;
+
+ alloc = t->alloc * 2;
+ t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc);
+ CHECK_NULL_RETURN_MEMERR(t->e);
+ t->alloc = alloc;
+
+ clear:
+ for (i = t->num; i < t->alloc; i++) {
+ t->e[i].name = NULL;
+ t->e[i].name_len = 0;
+ t->e[i].id = 0;
+ }
+ }
+ e = &(t->e[t->num]);
+ t->num++;
+ e->name = onigenc_strdup(enc, name, name_end);
+ if (IS_NULL(e->name)) return ONIGERR_MEMORY;
+#endif
+
+ CalloutNameIDCounter++;
+ e->id = CalloutNameIDCounter;
+ e->name_len = (int )(name_end - name);
+ }
+
+ *rentry = e;
+ return e->id;
+}
+
+static int
+is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)
+{
+ UChar* p;
+ OnigCodePoint c;
+
+ if (name >= name_end) return 0;
+
+ p = name;
+ while (p < name_end) {
+ c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
+ if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))
+ return 0;
+
+ if (p == name) {
+ if (c >= '0' && c <= '9') return 0;
+ }
+
+ p += ONIGENC_MBC_ENC_LEN(enc, p);
+ }
+
+ return 1;
+}
+
+static int
+is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)
+{
+ UChar* p;
+ OnigCodePoint c;
+
+ if (name >= name_end) return 0;
+
+ p = name;
+ while (p < name_end) {
+ c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
+ if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))
+ return 0;
+
+ if (p == name) {
+ if (c >= '0' && c <= '9') return 0;
+ }
+
+ p += ONIGENC_MBC_ENC_LEN(enc, p);
+ }
+
+ return 1;
+}
+
+extern int
+onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,
+ UChar* name, UChar* name_end, int in,
+ OnigCalloutFunc start_func,
+ OnigCalloutFunc end_func,
+ int arg_num, unsigned int arg_types[],
+ int opt_arg_num, OnigValue opt_defaults[])
+{
+ int r;
+ int i;
+ int j;
+ int id;
+ int is_not_single;
+ CalloutNameEntry* e;
+ CalloutNameListEntry* fe;
+
+ if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)
+ return ONIGERR_INVALID_ARGUMENT;
+
+ if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)
+ return ONIGERR_INVALID_CALLOUT_ARG;
+
+ if (opt_arg_num < 0 || opt_arg_num > arg_num)
+ return ONIGERR_INVALID_CALLOUT_ARG;
+
+ if (start_func == 0 && end_func == 0)
+ return ONIGERR_INVALID_CALLOUT_ARG;
+
+ if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)
+ return ONIGERR_INVALID_CALLOUT_ARG;
+
+ for (i = 0; i < arg_num; i++) {
+ unsigned int t = arg_types[i];
+ if (t == ONIG_TYPE_VOID)
+ return ONIGERR_INVALID_CALLOUT_ARG;
+ else {
+ if (i >= arg_num - opt_arg_num) {
+ if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&
+ t != ONIG_TYPE_TAG)
+ return ONIGERR_INVALID_CALLOUT_ARG;
+ }
+ else {
+ if (t != ONIG_TYPE_LONG) {
+ t = t & ~ONIG_TYPE_LONG;
+ if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)
+ return ONIGERR_INVALID_CALLOUT_ARG;
+ }
+ }
+ }
+ }
+
+ if (! is_allowed_callout_name(enc, name, name_end)) {
+ return ONIGERR_INVALID_CALLOUT_NAME;
+ }
+
+ is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);
+ id = callout_name_entry(&e, enc, is_not_single, name, name_end);
+ if (id < 0) return id;
+
+ r = ONIG_NORMAL;
+ if (IS_NULL(GlobalCalloutNameList)) {
+ r = make_callout_func_list(&GlobalCalloutNameList, 10);
+ if (r != ONIG_NORMAL) return r;
+ }
+
+ while (id >= GlobalCalloutNameList->n) {
+ int rid;
+ r = callout_func_list_add(GlobalCalloutNameList, &rid);
+ if (r != ONIG_NORMAL) return r;
+ }
+
+ fe = GlobalCalloutNameList->v + id;
+ fe->type = callout_type;
+ fe->in = in;
+ fe->start_func = start_func;
+ fe->end_func = end_func;
+ fe->arg_num = arg_num;
+ fe->opt_arg_num = opt_arg_num;
+ fe->name = e->name;
+
+ for (i = 0; i < arg_num; i++) {
+ fe->arg_types[i] = arg_types[i];
+ }
+ for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {
+ if (fe->arg_types[i] == ONIG_TYPE_STRING) {
+ OnigValue* val = opt_defaults + j;
+ UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end);
+ CHECK_NULL_RETURN_MEMERR(ds);
+
+ fe->opt_defaults[i].s.start = ds;
+ fe->opt_defaults[i].s.end = ds + (val->s.end - val->s.start);
+ }
+ else {
+ fe->opt_defaults[i] = opt_defaults[j];
+ }
+ }
+
+ r = id; // return id
+ return r;
+}
+
+static int
+get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,
+ UChar* name, UChar* name_end, int* rid)
+{
+ int r;
+ CalloutNameEntry* e;
+
+ if (! is_allowed_callout_name(enc, name, name_end)) {
+ return ONIGERR_INVALID_CALLOUT_NAME;
+ }
+
+ e = callout_name_find(enc, is_not_single, name, name_end);
+ if (IS_NULL(e)) {
+ return ONIGERR_UNDEFINED_CALLOUT_NAME;
+ }
+
+ r = ONIG_NORMAL;
+ *rid = e->id;
+
+ return r;
+}
+
+extern OnigCalloutFunc
+onig_get_callout_start_func(regex_t* reg, int callout_num)
+{
+ /* If used for callouts of contents, return 0. */
+ CalloutListEntry* e;
+
+ e = onig_reg_callout_list_at(reg, callout_num);
+ return e->start_func;
+}
+
+extern const UChar*
+onig_get_callout_tag_start(regex_t* reg, int callout_num)
+{
+ CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
+ return e->tag_start;
+}
+
+extern const UChar*
+onig_get_callout_tag_end(regex_t* reg, int callout_num)
+{
+ CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
+ return e->tag_end;
+}
+
+
+extern OnigCalloutType
+onig_get_callout_type_by_name_id(int name_id)
+{
+ return GlobalCalloutNameList->v[name_id].type;
+}
+
+extern OnigCalloutFunc
+onig_get_callout_start_func_by_name_id(int name_id)
+{
+ return GlobalCalloutNameList->v[name_id].start_func;
+}
+
+extern OnigCalloutFunc
+onig_get_callout_end_func_by_name_id(int name_id)
+{
+ return GlobalCalloutNameList->v[name_id].end_func;
+}
+
+extern int
+onig_get_callout_in_by_name_id(int name_id)
+{
+ return GlobalCalloutNameList->v[name_id].in;
+}
+
+static int
+get_callout_arg_num_by_name_id(int name_id)
+{
+ return GlobalCalloutNameList->v[name_id].arg_num;
+}
+
+static int
+get_callout_opt_arg_num_by_name_id(int name_id)
+{
+ return GlobalCalloutNameList->v[name_id].opt_arg_num;
+}
+
+static unsigned int
+get_callout_arg_type_by_name_id(int name_id, int index)
+{
+ return GlobalCalloutNameList->v[name_id].arg_types[index];
+}
+
+static OnigValue
+get_callout_opt_default_by_name_id(int name_id, int index)
+{
+ return GlobalCalloutNameList->v[name_id].opt_defaults[index];
+}
+
+extern UChar*
+onig_get_callout_name_by_name_id(int name_id)
+{
+ return GlobalCalloutNameList->v[name_id].name;
+}
+
+extern int
+onig_global_callout_names_free(void)
+{
+ free_callout_func_list(GlobalCalloutNameList);
+ GlobalCalloutNameList = 0;
+
+ global_callout_name_table_free();
+ return ONIG_NORMAL;
+}
+
+
+typedef st_table CalloutTagTable;
+typedef intptr_t CalloutTagVal;
+
+#define CALLOUT_TAG_LIST_FLAG_TAG_EXIST (1<<0)
+
+static int
+i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)
+{
+ int num;
+ RegexExt* ext = (RegexExt* )arg;
+
+ num = (int )e - 1;
+ ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;
+ return ST_CONTINUE;
+}
+
+static int
+setup_ext_callout_list_values(regex_t* reg)
+{
+ int i, j;
+ RegexExt* ext;
+
+ ext = REG_EXTP(reg);
+ if (IS_NOT_NULL(ext->tag_table)) {
+ onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,
+ (st_data_t )ext);
+ }
+
+ for (i = 0; i < ext->callout_num; i++) {
+ CalloutListEntry* e = ext->callout_list + i;
+ if (e->of == ONIG_CALLOUT_OF_NAME) {
+ for (j = 0; j < e->u.arg.num; j++) {
+ if (e->u.arg.types[j] == ONIG_TYPE_TAG) {
+ UChar* start;
+ UChar* end;
+ int num;
+ start = e->u.arg.vals[j].s.start;
+ end = e->u.arg.vals[j].s.end;
+ num = onig_get_callout_num_by_tag(reg, start, end);
+ if (num < 0) return num;
+ e->u.arg.vals[j].tag = num;
+ }
+ }
+ }
+ }
+
+ return ONIG_NORMAL;
+}
+
+extern int
+onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)
+{
+ RegexExt* ext = REG_EXTP(reg);
+
+ if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;
+ if (callout_num > ext->callout_num) return 0;
+
+ return (ext->callout_list[callout_num].flag &
+ CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0 ? 1 : 0;
+}
+
+static int
+i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)
+{
+ xfree(key);
+ return ST_DELETE;
+}
+
+static int
+callout_tag_table_clear(CalloutTagTable* t)
+{
+ if (IS_NOT_NULL(t)) {
+ onig_st_foreach(t, i_free_callout_tag_entry, 0);
+ }
+ return 0;
+}
+
+extern int
+onig_callout_tag_table_free(void* table)
+{
+ CalloutTagTable* t = (CalloutTagTable* )table;
+
+ if (IS_NOT_NULL(t)) {
+ int r = callout_tag_table_clear(t);
+ if (r != 0) return r;
+
+ onig_st_free_table(t);
+ }
+
+ return 0;
+}
+
+extern int
+onig_get_callout_num_by_tag(regex_t* reg,
+ const UChar* tag, const UChar* tag_end)
+{
+ int r;
+ RegexExt* ext;
+ CalloutTagVal e;
+
+ ext = REG_EXTP(reg);
+ if (IS_NULL(ext) || IS_NULL(ext->tag_table))
+ return ONIGERR_INVALID_CALLOUT_TAG_NAME;
+
+ r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,
+ (HashDataType* )((void* )(&e)));
+ if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
+ return (int )e;
+}
+
+static CalloutTagVal
+callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)
+{
+ CalloutTagVal e;
+
+ e = -1;
+ if (IS_NOT_NULL(t)) {
+ onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
+ }
+ return e;
+}
+
+static int
+callout_tag_table_new(CalloutTagTable** rt)
+{
+ CalloutTagTable* t;
+
+ *rt = 0;
+ t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);
+ CHECK_NULL_RETURN_MEMERR(t);
+
+ *rt = t;
+ return ONIG_NORMAL;
+}
+
+static int
+callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,
+ CalloutTagVal entry_val)
+{
+ int r;
+ CalloutTagVal val;
+
+ if (name_end - name <= 0)
+ return ONIGERR_INVALID_CALLOUT_TAG_NAME;
+
+ val = callout_tag_find(t, name, name_end);
+ if (val >= 0)
+ return ONIGERR_MULTIPLEX_DEFINED_NAME;
+
+ r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);
+ if (r < 0) return r;
+
+ return ONIG_NORMAL;
+}
+
+static int
+ext_ensure_tag_table(regex_t* reg)
+{
+ int r;
+ RegexExt* ext;
+ CalloutTagTable* t;
+
+ ext = onig_get_regex_ext(reg);
+ CHECK_NULL_RETURN_MEMERR(ext);
+
+ if (IS_NULL(ext->tag_table)) {
+ r = callout_tag_table_new(&t);
+ if (r != ONIG_NORMAL) return r;
+
+ ext->tag_table = t;
+ }
+
+ return ONIG_NORMAL;
+}
+
+static int
+callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,
+ CalloutTagVal entry_val)
+{
+ int r;
+ RegexExt* ext;
+ CalloutListEntry* e;
+
+ r = ext_ensure_tag_table(reg);
+ if (r != ONIG_NORMAL) return r;
+
+ ext = onig_get_regex_ext(reg);
+ r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val);
+
+ e = onig_reg_callout_list_at(reg, (int )entry_val);
+ e->tag_start = name;
+ e->tag_end = name_end;
+
+ return r;
+}
+
+#endif /* USE_CALLOUT */
+
#define INIT_SCANENV_MEMENV_ALLOC_SIZE 16
@@ -1045,12 +1923,6 @@ scan_env_clear(ScanEnv* env)
xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- env->num_comb_exp_check = 0;
- env->comb_exp_max_regnum = 0;
- env->curr_max_regnum = 0;
- env->has_recursion = 0;
-#endif
env->parse_depth = 0;
env->keep_num = 0;
env->save_num = 0;
@@ -1504,10 +2376,6 @@ node_new_quantifier(int lower, int upper, int by_number)
if (by_number != 0)
NODE_STATUS_ADD(node, NST_BY_NUMBER);
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- QUANT_(node)->comb_exp_check_num = 0;
-#endif
-
return node;
}
@@ -1642,6 +2510,116 @@ node_new_keep(Node** node, ScanEnv* env)
return ONIG_NORMAL;
}
+#ifdef USE_CALLOUT
+
+extern void
+onig_free_reg_callout_list(int n, CalloutListEntry* list)
+{
+ int i;
+ int j;
+
+ if (IS_NULL(list)) return ;
+
+ for (i = 0; i < n; i++) {
+ if (list[i].of == ONIG_CALLOUT_OF_NAME) {
+ for (j = 0; j < list[i].u.arg.passed_num; j++) {
+ if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {
+ if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))
+ xfree(list[i].u.arg.vals[j].s.start);
+ }
+ }
+ }
+ else { /* ONIG_CALLOUT_OF_CONTENTS */
+ if (IS_NOT_NULL(list[i].u.content.start)) {
+ xfree((void* )list[i].u.content.start);
+ }
+ }
+ }
+
+ xfree(list);
+}
+
+extern CalloutListEntry*
+onig_reg_callout_list_at(regex_t* reg, int num)
+{
+ RegexExt* ext = REG_EXTP(reg);
+ CHECK_NULL_RETURN(ext);
+
+ if (num <= 0 || num > ext->callout_num)
+ return 0;
+
+ num--;
+ return ext->callout_list + num;
+}
+
+static int
+reg_callout_list_entry(ScanEnv* env, int* rnum)
+{
+#define INIT_CALLOUT_LIST_NUM 3
+
+ int num;
+ CalloutListEntry* list;
+ CalloutListEntry* e;
+ RegexExt* ext;
+
+ ext = onig_get_regex_ext(env->reg);
+ CHECK_NULL_RETURN_MEMERR(ext);
+
+ if (IS_NULL(ext->callout_list)) {
+ list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);
+ CHECK_NULL_RETURN_MEMERR(list);
+
+ ext->callout_list = list;
+ ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;
+ ext->callout_num = 0;
+ }
+
+ num = ext->callout_num + 1;
+ if (num > ext->callout_list_alloc) {
+ int alloc = ext->callout_list_alloc * 2;
+ list = (CalloutListEntry* )xrealloc(ext->callout_list,
+ sizeof(CalloutListEntry) * alloc);
+ CHECK_NULL_RETURN_MEMERR(list);
+
+ ext->callout_list = list;
+ ext->callout_list_alloc = alloc;
+ }
+
+ e = ext->callout_list + (num - 1);
+
+ e->flag = 0;
+ e->of = 0;
+ e->in = ONIG_CALLOUT_OF_CONTENTS;
+ e->type = 0;
+ e->tag_start = 0;
+ e->tag_end = 0;
+ e->start_func = 0;
+ e->end_func = 0;
+ e->u.arg.num = 0;
+ e->u.arg.passed_num = 0;
+
+ ext->callout_num = num;
+ *rnum = num;
+ return ONIG_NORMAL;
+}
+
+static int
+node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,
+ ScanEnv* env)
+{
+ *node = node_new();
+ CHECK_NULL_RETURN_MEMERR(*node);
+
+ NODE_SET_TYPE(*node, NODE_GIMMICK);
+ GIMMICK_(*node)->id = id;
+ GIMMICK_(*node)->num = num;
+ GIMMICK_(*node)->type = GIMMICK_CALLOUT;
+ GIMMICK_(*node)->detail_type = (int )callout_of;
+
+ return ONIG_NORMAL;
+}
+#endif
+
static int
make_extended_grapheme_cluster(Node** node, ScanEnv* env)
{
@@ -2838,7 +3816,7 @@ is_invalid_quantifier_target(Node* node)
/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
static int
-popular_quantifier_num(QuantNode* q)
+quantifier_type_num(QuantNode* q)
{
if (q->greedy) {
if (q->lower == 0) {
@@ -2889,9 +3867,22 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
p = QUANT_(pnode);
c = QUANT_(cnode);
- pnum = popular_quantifier_num(p);
- cnum = popular_quantifier_num(c);
- if (pnum < 0 || cnum < 0) return ;
+ pnum = quantifier_type_num(p);
+ cnum = quantifier_type_num(c);
+ if (pnum < 0 || cnum < 0) {
+ if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) {
+ if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) {
+ int n = positive_int_multiply(p->lower, c->lower);
+ if (n >= 0) {
+ p->lower = p->upper = n;
+ NODE_BODY(pnode) = NODE_BODY(cnode);
+ goto remove_cnode;
+ }
+ }
+ }
+
+ return ;
+ }
switch(ReduceTypeTable[cnum][pnum]) {
case RQ_DEL:
@@ -2927,6 +3918,7 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
break;
}
+ remove_cnode:
NODE_BODY(cnode) = NULL_NODE;
onig_node_free(cnode);
}
@@ -5508,6 +6500,452 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* en
static int parse_subexp(Node** top, OnigToken* tok, int term,
UChar** src, UChar* end, ScanEnv* env);
+#ifdef USE_CALLOUT
+
+/* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */
+static int
+parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)
+{
+ int r;
+ int i;
+ int in;
+ int num;
+ OnigCodePoint c;
+ UChar* code_start;
+ UChar* code_end;
+ UChar* contents;
+ UChar* tag_start;
+ UChar* tag_end;
+ int brace_nest;
+ CalloutListEntry* e;
+ RegexExt* ext;
+ OnigEncoding enc = env->enc;
+ UChar* p = *src;
+
+ if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
+
+ brace_nest = 0;
+ while (PPEEK_IS('{')) {
+ brace_nest++;
+ PINC_S;
+ if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
+ }
+
+ in = ONIG_CALLOUT_IN_PROGRESS;
+ code_start = p;
+ while (1) {
+ if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
+
+ code_end = p;
+ PFETCH_S(c);
+ if (c == '}') {
+ i = brace_nest;
+ while (i > 0) {
+ if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
+ PFETCH_S(c);
+ if (c == '}') i--;
+ else break;
+ }
+ if (i == 0) break;
+ }
+ }
+
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+
+ PFETCH_S(c);
+ if (c == '[') {
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ tag_start = p;
+ while (! PEND) {
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ tag_end = p;
+ PFETCH_S(c);
+ if (c == ']') break;
+ }
+ if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))
+ return ONIGERR_INVALID_CALLOUT_TAG_NAME;
+
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH_S(c);
+ }
+ else {
+ tag_start = tag_end = 0;
+ }
+
+ if (c == 'X') {
+ in |= ONIG_CALLOUT_IN_RETRACTION;
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH_S(c);
+ }
+ else if (c == '<') {
+ in = ONIG_CALLOUT_IN_RETRACTION;
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH_S(c);
+ }
+ else if (c == '>') { /* no needs (default) */
+ //in = ONIG_CALLOUT_IN_PROGRESS;
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH_S(c);
+ }
+
+ if (c != cterm)
+ return ONIGERR_INVALID_CALLOUT_PATTERN;
+
+ r = reg_callout_list_entry(env, &num);
+ if (r != 0) return r;
+
+ ext = onig_get_regex_ext(env->reg);
+ if (IS_NULL(ext->pattern)) {
+ r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
+ if (r != ONIG_NORMAL) return r;
+ }
+
+ if (tag_start != tag_end) {
+ r = callout_tag_entry(env->reg, tag_start, tag_end, num);
+ if (r != ONIG_NORMAL) return r;
+ }
+
+ contents = onigenc_strdup(enc, code_start, code_end);
+ CHECK_NULL_RETURN_MEMERR(contents);
+
+ r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);
+ if (r != 0) {
+ xfree(contents);
+ return r;
+ }
+
+ e = onig_reg_callout_list_at(env->reg, num);
+ e->of = ONIG_CALLOUT_OF_CONTENTS;
+ e->in = in;
+ e->name_id = ONIG_NON_NAME_ID;
+ e->u.content.start = contents;
+ e->u.content.end = contents + (code_end - code_start);
+
+ *src = p;
+ return 0;
+}
+
+static long
+parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)
+{
+ long v;
+ long d;
+ int flag;
+ UChar* p;
+ OnigCodePoint c;
+
+ if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG;
+
+ flag = 1;
+ v = 0;
+ p = s;
+ while (p < end) {
+ c = ONIGENC_MBC_TO_CODE(enc, p, end);
+ p += ONIGENC_MBC_ENC_LEN(enc, p);
+ if (c >= '0' && c <= '9') {
+ d = (long )(c - '0');
+ if (v > (max - d) / 10)
+ return ONIGERR_INVALID_CALLOUT_ARG;
+
+ v = v * 10 + d;
+ }
+ else if (sign_on != 0 && (c == '-' || c == '+')) {
+ if (c == '-') flag = -1;
+ }
+ else
+ return ONIGERR_INVALID_CALLOUT_ARG;
+
+ sign_on = 0;
+ }
+
+ *rl = flag * v;
+ return ONIG_NORMAL;
+}
+
+static int
+parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
+ unsigned int types[], OnigValue vals[], ScanEnv* env)
+{
+#define MAX_CALLOUT_ARG_BYTE_LENGTH 128
+
+ int r;
+ int n;
+ int esc;
+ int cn;
+ UChar* s;
+ UChar* e;
+ UChar* eesc;
+ OnigCodePoint c;
+ UChar* bufend;
+ UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH];
+ OnigEncoding enc = env->enc;
+ UChar* p = *src;
+
+ if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
+
+ n = 0;
+ while (n < ONIG_CALLOUT_MAX_ARGS_NUM) {
+ c = 0;
+ cn = 0;
+ esc = 0;
+ eesc = 0;
+ bufend = buf;
+ s = e = p;
+ while (1) {
+ if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
+
+ e = p;
+ PFETCH_S(c);
+ if (esc != 0) {
+ esc = 0;
+ if (c == '\\' || c == cterm || c == ',') {
+ /* */
+ }
+ else {
+ e = eesc;
+ cn++;
+ }
+ goto add_char;
+ }
+ else {
+ if (c == '\\') {
+ esc = 1;
+ eesc = e;
+ }
+ else if (c == cterm || c == ',')
+ break;
+ else {
+ size_t clen;
+
+ add_char:
+ if (skip_mode == 0) {
+ clen = p - e;
+ if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH)
+ return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */
+
+ xmemcpy(bufend, e, clen);
+ bufend += clen;
+ }
+ cn++;
+ }
+ }
+ }
+
+ if (cn != 0) {
+ if (skip_mode == 0) {
+ if ((types[n] & ONIG_TYPE_LONG) != 0) {
+ int fixed = 0;
+ if (cn > 0) {
+ long rl;
+ r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl);
+ if (r == ONIG_NORMAL) {
+ vals[n].l = rl;
+ fixed = 1;
+ types[n] = ONIG_TYPE_LONG;
+ }
+ }
+
+ if (fixed == 0) {
+ types[n] = (types[n] & ~ONIG_TYPE_LONG);
+ if (types[n] == ONIG_TYPE_VOID)
+ return ONIGERR_INVALID_CALLOUT_ARG;
+ }
+ }
+
+ switch (types[n]) {
+ case ONIG_TYPE_LONG:
+ break;
+
+ case ONIG_TYPE_CHAR:
+ if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG;
+ vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);
+ break;
+
+ case ONIG_TYPE_STRING:
+ {
+ UChar* rs = onigenc_strdup(enc, buf, bufend);
+ CHECK_NULL_RETURN_MEMERR(rs);
+ vals[n].s.start = rs;
+ vals[n].s.end = rs + (e - s);
+ }
+ break;
+
+ case ONIG_TYPE_TAG:
+ if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e))
+ return ONIGERR_INVALID_CALLOUT_TAG_NAME;
+
+ vals[n].s.start = s;
+ vals[n].s.end = e;
+ break;
+
+ case ONIG_TYPE_VOID:
+ case ONIG_TYPE_POINTER:
+ return ONIGERR_PARSER_BUG;
+ break;
+ }
+ }
+
+ n++;
+ }
+
+ if (c == cterm) break;
+ }
+
+ if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN;
+
+ *src = p;
+ return n;
+}
+
+/* (*name[TAG]) (*name[TAG]{a,b,..}) */
+static int
+parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)
+{
+ int r;
+ int i;
+ int in;
+ int num;
+ int name_id;
+ int arg_num;
+ int max_arg_num;
+ int opt_arg_num;
+ int is_not_single;
+ OnigCodePoint c;
+ UChar* name_start;
+ UChar* name_end;
+ UChar* tag_start;
+ UChar* tag_end;
+ Node* node;
+ CalloutListEntry* e;
+ RegexExt* ext;
+ unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM];
+ OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM];
+ OnigEncoding enc = env->enc;
+ UChar* p = *src;
+
+ //PFETCH_READY;
+ if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
+
+ node = 0;
+ name_start = p;
+ while (1) {
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ name_end = p;
+ PFETCH_S(c);
+ if (c == cterm || c == '[' || c == '{') break;
+ }
+
+ if (! is_allowed_callout_name(enc, name_start, name_end))
+ return ONIGERR_INVALID_CALLOUT_NAME;
+
+ if (c == '[') {
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ tag_start = p;
+ while (! PEND) {
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ tag_end = p;
+ PFETCH_S(c);
+ if (c == ']') break;
+ }
+ if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))
+ return ONIGERR_INVALID_CALLOUT_TAG_NAME;
+
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH_S(c);
+ }
+ else {
+ tag_start = tag_end = 0;
+ }
+
+ if (c == '{') {
+ UChar* save;
+
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+
+ /* read for single check only */
+ save = p;
+ arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env);
+ if (arg_num < 0) return arg_num;
+
+ is_not_single = PPEEK_IS(cterm) ? 0 : 1;
+ p = save;
+ r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,
+ &name_id);
+ if (r != ONIG_NORMAL) return r;
+
+ max_arg_num = get_callout_arg_num_by_name_id(name_id);
+ for (i = 0; i < max_arg_num; i++) {
+ types[i] = get_callout_arg_type_by_name_id(name_id, i);
+ }
+
+ arg_num = parse_callout_args(0, '}', &p, end, types, vals, env);
+ if (arg_num < 0) return arg_num;
+
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH_S(c);
+ }
+ else {
+ arg_num = 0;
+
+ is_not_single = 0;
+ r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,
+ &name_id);
+ if (r != ONIG_NORMAL) return r;
+
+ max_arg_num = get_callout_arg_num_by_name_id(name_id);
+ for (i = 0; i < max_arg_num; i++) {
+ types[i] = get_callout_arg_type_by_name_id(name_id, i);
+ }
+ }
+
+ in = onig_get_callout_in_by_name_id(name_id);
+ opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id);
+ if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num))
+ return ONIGERR_INVALID_CALLOUT_ARG;
+
+ if (c != cterm)
+ return ONIGERR_INVALID_CALLOUT_PATTERN;
+
+ r = reg_callout_list_entry(env, &num);
+ if (r != 0) return r;
+
+ ext = onig_get_regex_ext(env->reg);
+ if (IS_NULL(ext->pattern)) {
+ r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
+ if (r != ONIG_NORMAL) return r;
+ }
+
+ if (tag_start != tag_end) {
+ r = callout_tag_entry(env->reg, tag_start, tag_end, num);
+ if (r != ONIG_NORMAL) return r;
+ }
+
+ r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);
+ if (r != ONIG_NORMAL) return r;
+
+ e = onig_reg_callout_list_at(env->reg, num);
+ e->of = ONIG_CALLOUT_OF_NAME;
+ e->in = in;
+ e->name_id = name_id;
+ e->type = onig_get_callout_type_by_name_id(name_id);
+ e->start_func = onig_get_callout_start_func_by_name_id(name_id);
+ e->end_func = onig_get_callout_end_func_by_name_id(name_id);
+ e->u.arg.num = max_arg_num;
+ e->u.arg.passed_num = arg_num;
+ for (i = 0; i < max_arg_num; i++) {
+ e->u.arg.types[i] = types[i];
+ if (i < arg_num)
+ e->u.arg.vals[i] = vals[i];
+ else
+ e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i);
+ }
+
+ *np = node;
+ *src = p;
+ return 0;
+}
+#endif
+
static int
parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
ScanEnv* env)
@@ -5526,8 +6964,8 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
option = env->options;
- if (PPEEK_IS('?') &&
- IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
+ c = PPEEK;
+ if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
PINC;
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
@@ -5673,6 +7111,18 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
}
break;
+#ifdef USE_CALLOUT
+ case '{':
+ if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+
+ r = parse_callout_of_contents(np, ')', &p, end, env);
+ if (r != 0) return r;
+
+ goto end;
+ break;
+#endif
+
case '(':
/* (?()...) */
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) {
@@ -5769,6 +7219,29 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
if (c != ')') goto err_if_else;
}
}
+#ifdef USE_CALLOUT
+ else if (c == '?') {
+ if (IS_SYNTAX_OP2(env->syntax,
+ ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) {
+ if (! PEND && PPEEK_IS('{')) {
+ /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */
+ condition_is_checker = 0;
+ PFETCH(c);
+ r = parse_callout_of_contents(&condition, ')', &p, end, env);
+ if (r != 0) return r;
+ goto end_condition;
+ }
+ }
+ goto any_condition;
+ }
+ else if (c == '*' &&
+ IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
+ condition_is_checker = 0;
+ r = parse_callout_of_name(&condition, ')', &p, end, env);
+ if (r != 0) return r;
+ goto end_condition;
+ }
+#endif
else {
any_condition:
PUNFETCH;
@@ -5782,6 +7255,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
}
}
+ end_condition:
CHECK_NULL_RETURN_MEMERR(condition);
if (PEND) {
@@ -5970,6 +7444,16 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
return ONIGERR_UNDEFINED_GROUP_OPTION;
}
}
+#ifdef USE_CALLOUT
+ else if (c == '*' &&
+ IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
+ PINC;
+ r = parse_callout_of_name(np, ')', &p, end, env);
+ if (r != 0) return r;
+
+ goto end;
+ }
+#endif
else {
if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
goto group;
@@ -6040,11 +7524,11 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
{ /* check redundant double repeat. */
/* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
QuantNode* qnt = QUANT_(target);
- int nestq_num = popular_quantifier_num(qn);
- int targetq_num = popular_quantifier_num(qnt);
+ int nestq_num = quantifier_type_num(qn);
+ int targetq_num = quantifier_type_num(qnt);
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
- if (! NODE_IS_BY_NUMBER(qnode) && ! NODE_IS_BY_NUMBER(target) &&
+ if (targetq_num >= 0 && nestq_num >= 0 &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
UChar buf[WARN_BUFSIZE];
@@ -6078,18 +7562,19 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
warn_exit:
#endif
- if (targetq_num >= 0) {
- if (nestq_num >= 0) {
- onig_reduce_nested_quantifier(qnode, target);
- goto q_exit;
- }
- else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
+ if (targetq_num >= 0 && nestq_num < 0) {
+ if (targetq_num == 1 || targetq_num == 2) { /* * or + */
/* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
qn->upper = (qn->lower == 0 ? 1 : qn->lower);
}
}
}
+ else {
+ NODE_BODY(qnode) = target;
+ onig_reduce_nested_quantifier(qnode, target);
+ goto q_exit;
+ }
}
break;
@@ -6717,6 +8202,9 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
{
int r;
UChar* p;
+#ifdef USE_CALLOUT
+ RegexExt* ext;
+#endif
names_clear(reg);
@@ -6750,6 +8238,14 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
#endif
reg->num_mem = env->num_mem;
+
+#ifdef USE_CALLOUT
+ ext = REG_EXTP(reg);
+ if (IS_NOT_NULL(ext) && ext->callout_num > 0) {
+ r = setup_ext_callout_list_values(reg);
+ }
+#endif
+
return r;
}
diff --git a/src/regparse.h b/src/regparse.h
index 99fe7c9..3ffbea4 100644
--- a/src/regparse.h
+++ b/src/regparse.h
@@ -4,7 +4,7 @@
regparse.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -51,8 +51,12 @@ enum GimmickType {
GIMMICK_KEEP = 1,
GIMMICK_SAVE = 2,
GIMMICK_UPDATE_VAR = 3,
+#ifdef USE_CALLOUT
+ GIMMICK_CALLOUT = 4,
+#endif
};
+
/* node type bit */
#define NODE_TYPE2BIT(type) (1<<(type))
@@ -97,7 +101,7 @@ enum GimmickType {
(NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options)
-#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
+#define ANCHOR_ANYCHAR_INF_MASK (ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML)
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
enum EnclosureType {
@@ -129,10 +133,12 @@ enum EnclosureType {
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
-#define QUANT_BODY_IS_NOT_EMPTY 0
-#define QUANT_BODY_IS_EMPTY 1
-#define QUANT_BODY_IS_EMPTY_MEM 2
-#define QUANT_BODY_IS_EMPTY_REC 3
+enum QuantBodyEmpty {
+ QUANT_BODY_IS_NOT_EMPTY = 0,
+ QUANT_BODY_IS_EMPTY = 1,
+ QUANT_BODY_IS_EMPTY_MEM = 2,
+ QUANT_BODY_IS_EMPTY_REC = 3
+};
/* node status bits */
#define NST_MIN_FIXED (1<<0)
@@ -221,13 +227,10 @@ typedef struct {
int lower;
int upper;
int greedy;
- int body_empty_info;
+ enum QuantBodyEmpty body_empty_info;
struct _Node* head_exact;
struct _Node* next_head_exact;
int is_refered; /* include called node. don't eliminate even if {0} */
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
-#endif
} QuantNode;
typedef struct {
@@ -330,6 +333,7 @@ typedef struct {
enum GimmickType type;
int detail_type;
+ int num;
int id;
} GimmickNode;
@@ -398,15 +402,9 @@ typedef struct {
int num_mem;
int num_named;
int mem_alloc;
- MemEnv mem_env_static[SCANENV_MEMENV_SIZE];
- MemEnv* mem_env_dynamic;
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- int num_comb_exp_check;
- int comb_exp_max_regnum;
- int curr_max_regnum;
- int has_recursion;
-#endif
- unsigned int parse_depth;
+ MemEnv mem_env_static[SCANENV_MEMENV_SIZE];
+ MemEnv* mem_env_dynamic;
+ unsigned int parse_depth;
int keep_num;
int save_num;
@@ -447,6 +445,10 @@ extern int onig_free_shared_cclass_table P_((void));
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node);
+#ifdef USE_CALLOUT
+extern int onig_global_callout_names_free(void);
+#endif
+
#ifdef ONIG_DEBUG
extern int onig_print_names(FILE*, regex_t*);
#endif
diff --git a/src/regposerr.c b/src/regposerr.c
index fc71eee..2e2a8e2 100644
--- a/src/regposerr.c
+++ b/src/regposerr.c
@@ -2,7 +2,7 @@
regposerr.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,6 +27,13 @@
* SUCH DAMAGE.
*/
+/* Can't include regint.h etc.. for conflict of regex_t.
+ Define ONIGURUMA_EXPORT here for onigposix.h.
+ */
+#ifndef ONIGURUMA_EXPORT
+#define ONIGURUMA_EXPORT
+#endif
+
#include "config.h"
#include "onigposix.h"
diff --git a/src/regposix.c b/src/regposix.c
index 0fdbcbb..895cf29 100644
--- a/src/regposix.c
+++ b/src/regposix.c
@@ -2,7 +2,7 @@
regposix.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -58,8 +58,10 @@ onig2posix_error_code(int code)
static const O2PERR o2p[] = {
{ ONIG_MISMATCH, REG_NOMATCH },
{ ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
+ { ONIG_ABORT, REG_EONIG_INTERNAL },
{ ONIGERR_MEMORY, REG_ESPACE },
{ ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
+ { ONIGERR_RETRY_LIMIT_IN_MATCH_OVER, REG_EONIG_INTERNAL },
{ ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL },
{ ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL },
{ ONIGERR_STACK_BUG, REG_EONIG_INTERNAL },
@@ -117,6 +119,12 @@ onig2posix_error_code(int code)
{ ONIGERR_INVALID_IF_ELSE_SYNTAX, REG_BADPAT },
{ ONIGERR_INVALID_ABSENT_GROUP_PATTERN, REG_BADPAT },
{ ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN, REG_BADPAT },
+ { ONIGERR_INVALID_CALLOUT_PATTERN, REG_BADPAT },
+ { ONIGERR_INVALID_CALLOUT_NAME, REG_BADPAT },
+ { ONIGERR_UNDEFINED_CALLOUT_NAME, REG_BADPAT },
+ { ONIGERR_INVALID_CALLOUT_BODY, REG_BADPAT },
+ { ONIGERR_INVALID_CALLOUT_TAG_NAME, REG_BADPAT },
+ { ONIGERR_INVALID_CALLOUT_ARG, REG_BADPAT },
{ ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG },
{ ONIGERR_LIBRARY_IS_NOT_INITIALIZED, REG_EONIG_INTERNAL }
};
@@ -260,8 +268,7 @@ reg_set_encoding(int mb_code)
break;
}
- onig_initialize(0, 0);
- onig_initialize_encoding(enc);
+ onig_initialize(&enc, 1);
onigenc_set_default_encoding(enc);
}
diff --git a/src/regsyntax.c b/src/regsyntax.c
index 3817d38..aa95479 100644
--- a/src/regsyntax.c
+++ b/src/regsyntax.c
@@ -2,7 +2,7 @@
regsyntax.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -176,6 +176,8 @@ OnigSyntaxType OnigSyntaxPerl = {
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
+ ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
+ ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |
ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
@@ -207,6 +209,8 @@ OnigSyntaxType OnigSyntaxPerl_NG = {
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
+ ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
+ ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |
ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
diff --git a/src/regversion.c b/src/regversion.c
index 245a001..594a52c 100644
--- a/src/regversion.c
+++ b/src/regversion.c
@@ -2,7 +2,7 @@
regversion.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,7 +27,6 @@
* SUCH DAMAGE.
*/
-#include "config.h"
#include "regint.h"
#include <stdio.h>
@@ -49,7 +48,7 @@ onig_copyright(void)
static char s[58];
xsnprintf(s, sizeof(s),
- "Oniguruma %d.%d.%d : Copyright (C) 2002-2016 K.Kosako",
+ "Oniguruma %d.%d.%d : Copyright (C) 2002-2018 K.Kosako",
ONIGURUMA_VERSION_MAJOR,
ONIGURUMA_VERSION_MINOR,
ONIGURUMA_VERSION_TEENY);
diff --git a/src/sjis.c b/src/sjis.c
index 88b8d02..e1bf3e1 100644
--- a/src/sjis.c
+++ b/src/sjis.c
@@ -2,7 +2,7 @@
sjis.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -321,8 +321,8 @@ get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
OnigEncodingType OnigEncodingSJIS = {
mbc_enc_len,
"Shift_JIS", /* name */
- 2, /* max byte length */
- 1, /* min byte length */
+ 2, /* max enc length */
+ 1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
mbc_to_code,
code_to_mbclen,
diff --git a/src/utf16_be.c b/src/utf16_be.c
index f220cca..098ab54 100644
--- a/src/utf16_be.c
+++ b/src/utf16_be.c
@@ -2,7 +2,7 @@
utf16_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,7 +27,50 @@
* SUCH DAMAGE.
*/
-#include "regenc.h"
+#include "regint.h" /* for USE_CALLOUT */
+
+
+static int
+init(void)
+{
+#ifdef USE_CALLOUT
+
+ int id;
+ OnigEncoding enc;
+ char* name;
+ unsigned int t_long;
+ unsigned int args[4];
+ OnigValue opts[4];
+
+ enc = ONIG_ENCODING_UTF16_BE;
+ t_long = ONIG_TYPE_LONG;
+
+ name = "\000F\000A\000I\000L\000\000"; BC0_P(name, fail);
+ name = "\000M\000I\000S\000M\000A\000T\000C\000H\000\000"; BC0_P(name, mismatch);
+ name = "\000M\000A\000X\000\000"; BC_B(name, max, 1, &t_long);
+
+ name = "\000E\000R\000R\000O\000R\000\000";
+ args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT;
+ BC_P_O(name, error, 1, args, 1, opts);
+
+ name = "\000C\000O\000U\000N\000T\000\000";
+ args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';
+ BC_B_O(name, count, 1, args, 1, opts);
+
+ name = "\000T\000O\000T\000A\000L\000_\000C\000O\000U\000N\000T\000\000";
+ args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';
+ BC_B_O(name, total_count, 1, args, 1, opts);
+
+ name = "\000C\000M\000P\000\000";
+ args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
+ args[1] = ONIG_TYPE_STRING;
+ args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
+ BC_P(name, cmp, 3, args);
+
+#endif /* USE_CALLOUT */
+
+ return ONIG_NORMAL;
+}
static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -215,8 +258,8 @@ utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingUTF16_BE = {
utf16be_mbc_enc_len,
"UTF-16BE", /* name */
- 4, /* max byte length */
- 2, /* min byte length */
+ 4, /* max enc length */
+ 2, /* min enc length */
utf16be_is_mbc_newline,
utf16be_mbc_to_code,
utf16be_code_to_mbclen,
@@ -229,7 +272,7 @@ OnigEncodingType OnigEncodingUTF16_BE = {
onigenc_utf16_32_get_ctype_code_range,
utf16be_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
- NULL, /* init */
- NULL, /* is_initialized */
+ init,
+ 0, /* is_initialized */
is_valid_mbc_string
};
diff --git a/src/utf16_le.c b/src/utf16_le.c
index 89bc72f..dc0d3f1 100644
--- a/src/utf16_le.c
+++ b/src/utf16_le.c
@@ -2,7 +2,7 @@
utf16_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,8 +26,49 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
+#include "regint.h" /* for USE_CALLOUT */
-#include "regenc.h"
+static int
+init(void)
+{
+#ifdef USE_CALLOUT
+
+ int id;
+ OnigEncoding enc;
+ char* name;
+ unsigned int t_long;
+ unsigned int args[4];
+ OnigValue opts[4];
+
+ enc = ONIG_ENCODING_UTF16_LE;
+ t_long = ONIG_TYPE_LONG;
+
+ name = "F\000A\000I\000L\000\000\000"; BC0_P(name, fail);
+ name = "M\000I\000S\000M\000A\000T\000C\000H\000\000\000"; BC0_P(name, mismatch);
+ name = "M\000A\000X\000\000\000"; BC_B(name, max, 1, &t_long);
+
+ name = "E\000R\000R\000O\000R\000\000\000";
+ args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT;
+ BC_P_O(name, error, 1, args, 1, opts);
+
+ name = "C\000O\000U\000N\000T\000\000\000";
+ args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';
+ BC_B_O(name, count, 1, args, 1, opts);
+
+ name = "T\000O\000T\000A\000L\000_\000C\000O\000U\000N\000T\000\000\000";
+ args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';
+ BC_B_O(name, total_count, 1, args, 1, opts);
+
+ name = "C\000M\000P\000\000\000";
+ args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
+ args[1] = ONIG_TYPE_STRING;
+ args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
+ BC_P(name, cmp, 3, args);
+
+#endif /* USE_CALLOUT */
+
+ return ONIG_NORMAL;
+}
static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -225,8 +266,8 @@ utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingUTF16_LE = {
utf16le_mbc_enc_len,
"UTF-16LE", /* name */
- 4, /* max byte length */
- 2, /* min byte length */
+ 4, /* max enc length */
+ 2, /* min enc length */
utf16le_is_mbc_newline,
utf16le_mbc_to_code,
utf16le_code_to_mbclen,
@@ -239,7 +280,7 @@ OnigEncodingType OnigEncodingUTF16_LE = {
onigenc_utf16_32_get_ctype_code_range,
utf16le_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
- NULL, /* init */
- NULL, /* is_initialized */
+ init,
+ 0, /* is_initialized */
is_valid_mbc_string
};
diff --git a/src/utf32_be.c b/src/utf32_be.c
index d0c7f39..68760bb 100644
--- a/src/utf32_be.c
+++ b/src/utf32_be.c
@@ -2,7 +2,7 @@
utf32_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -174,8 +174,8 @@ utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingUTF32_BE = {
utf32be_mbc_enc_len,
"UTF-32BE", /* name */
- 4, /* max byte length */
- 4, /* min byte length */
+ 4, /* max enc length */
+ 4, /* min enc length */
utf32be_is_mbc_newline,
utf32be_mbc_to_code,
utf32be_code_to_mbclen,
diff --git a/src/utf32_le.c b/src/utf32_le.c
index 33200d1..8208cd0 100644
--- a/src/utf32_le.c
+++ b/src/utf32_le.c
@@ -2,7 +2,7 @@
utf32_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -174,8 +174,8 @@ utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingUTF32_LE = {
utf32le_mbc_enc_len,
"UTF-32LE", /* name */
- 4, /* max byte length */
- 4, /* min byte length */
+ 4, /* max enc length */
+ 4, /* min enc length */
utf32le_is_mbc_newline,
utf32le_mbc_to_code,
utf32le_code_to_mbclen,
diff --git a/src/utf8.c b/src/utf8.c
index e5e59b2..a5c4dbe 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -2,7 +2,7 @@
utf8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -280,8 +280,8 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingType OnigEncodingUTF8 = {
mbc_enc_len,
"UTF-8", /* name */
- 6, /* max byte length */
- 1, /* min byte length */
+ 6, /* max enc length */
+ 1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
mbc_to_code,
code_to_mbclen,