diff options
| author | Jörg Frings-Fürst <debian@jff.email> | 2018-12-21 13:48:35 +0100 | 
|---|---|---|
| committer | Jörg Frings-Fürst <debian@jff.email> | 2018-12-21 13:48:35 +0100 | 
| commit | 70de057dbb5ea79536834e156f534279347f96f3 (patch) | |
| tree | e7fc439a3c893ffe8358d3d0c26a112d4b4cf89d /src | |
| parent | c9b3c26db59d89e320a6eb86d9558051ecba0900 (diff) | |
New upstream version 6.9.1upstream/6.9.1
Diffstat (limited to 'src')
57 files changed, 2030 insertions, 1972 deletions
| diff --git a/src/.gitignore b/src/.gitignore deleted file mode 100644 index 50ae793..0000000 --- a/src/.gitignore +++ /dev/null @@ -1,15 +0,0 @@ -Makefile -config.h -CaseFolding.txt -unicode_fold?_key.gperf -unicode_unfold_key.gperf -UNICODE_PROPERTIES -*.o -*.so -*.lo -*.la -*~ -*.txt -.libs/ -.deps/ -/mktable diff --git a/src/Makefile.windows b/src/Makefile.windows index e98dc2e..762cf07 100644 --- a/src/Makefile.windows +++ b/src/Makefile.windows @@ -1,183 +1,183 @@ -# Oniguruma Makefile for Windows
 -
 -product_name = oniguruma
 -
 -CPPFLAGS = 
 -CFLAGS = -O2 -nologo /W3
 -LDFLAGS =
 -LOADLIBES =
 -ARLIB = lib
 -ARLIB_FLAGS = -nologo
 -ARDLL = cl
 -ARDLL_FLAGS = -nologo -LD $(LINKFLAGS) -dll
 -LINKFLAGS = -link -incremental:no -pdb:none
 -
 -INSTALL = install -c
 -CP      = copy
 -CC = cl
 -DEFS = -DHAVE_CONFIG_H
 -
 -subdirs = 
 -
 -libbase   = onig
 -libname   = $(libbase)_s.lib
 -dllname   = $(libbase).dll
 -dlllib    = $(libbase).lib
 -
 -!IF defined(ENABLE_POSIX_API) && "$(ENABLE_POSIX_API)" == "NO"
 -posixobjs = 
 -!ELSE
 -posixobjs = $(BUILD_DIR)/regposix.obj $(BUILD_DIR)/regposerr.obj
 -!ENDIF
 -
 -onigheaders  = $(ONIG_DIR)/oniguruma.h $(ONIG_DIR)/regint.h $(ONIG_DIR)/regparse.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/st.h
 -posixheaders = $(ONIG_DIR)/onigposix.h
 -headers      = $(posixheaders) $(onigheaders)
 -
 -onigobjs     = $(BUILD_DIR)/reggnu.obj $(BUILD_DIR)/regerror.obj $(BUILD_DIR)/regparse.obj $(BUILD_DIR)/regext.obj $(BUILD_DIR)/regcomp.obj \
 -	       $(BUILD_DIR)/regexec.obj $(BUILD_DIR)/regenc.obj $(BUILD_DIR)/regsyntax.obj $(BUILD_DIR)/regtrav.obj \
 -	       $(BUILD_DIR)/regversion.obj $(BUILD_DIR)/st.obj $(BUILD_DIR)/onig_init.obj
 -libobjs      = $(onigobjs) $(posixobjs)
 -
 -jp_objs      =  $(BUILD_DIR)/euc_jp.obj $(BUILD_DIR)/sjis.obj
 -iso8859_objs =  $(BUILD_DIR)/iso8859_1.obj  $(BUILD_DIR)/iso8859_2.obj \
 -		$(BUILD_DIR)/iso8859_3.obj  $(BUILD_DIR)/iso8859_4.obj \
 -		$(BUILD_DIR)/iso8859_5.obj  $(BUILD_DIR)/iso8859_6.obj \
 -		$(BUILD_DIR)/iso8859_7.obj  $(BUILD_DIR)/iso8859_8.obj \
 -		$(BUILD_DIR)/iso8859_9.obj  $(BUILD_DIR)/iso8859_10.obj \
 -		$(BUILD_DIR)/iso8859_11.obj $(BUILD_DIR)/iso8859_13.obj \
 -		$(BUILD_DIR)/iso8859_14.obj $(BUILD_DIR)/iso8859_15.obj \
 -		$(BUILD_DIR)/iso8859_16.obj
 -
 -encobjs = $(BUILD_DIR)/ascii.obj $(BUILD_DIR)/utf8.obj \
 -		$(BUILD_DIR)/unicode.obj \
 -		$(BUILD_DIR)/utf16_be.obj $(BUILD_DIR)/utf16_le.obj \
 -		$(BUILD_DIR)/utf32_be.obj $(BUILD_DIR)/utf32_le.obj \
 -		$(jp_objs) $(iso8859_objs) \
 -		$(BUILD_DIR)/euc_tw.obj $(BUILD_DIR)/euc_kr.obj $(BUILD_DIR)/big5.obj \
 -		$(BUILD_DIR)/gb18030.obj \
 -		$(BUILD_DIR)/koi8_r.obj  \
 -		$(BUILD_DIR)/cp1251.obj \
 -		$(BUILD_DIR)/euc_jp_prop.obj $(BUILD_DIR)/sjis_prop.obj \
 -		$(BUILD_DIR)/unicode_unfold_key.obj $(BUILD_DIR)/unicode_fold1_key.obj \
 -		$(BUILD_DIR)/unicode_fold2_key.obj $(BUILD_DIR)/unicode_fold3_key.obj	# $(BUILD_DIR)/koi8.obj
 -
 -onigsources  = $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regparse.c $(ONIG_DIR)/regext.c $(ONIG_DIR)/regcomp.c $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regenc.c \
 -	       $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regtrav.c $(ONIG_DIR)/regversion.c $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/st.c
 -posixsources = $(ONIG_DIR)/regposix.c $(ONIG_DIR)/regposerr.c
 -libsources   = $(posixsources) $(onigsources)
 -
 -patchfiles   = re.c.168.patch re.c.181.patch
 -distfiles    = README COPYING HISTORY \
 -		Makefile.in configure.in config.h.in configure \
 -		$(headers) $(libsources) $(patchfiles) \
 -		test.rb testconv.rb
 -testc        = testc
 -testp        = testp
 -
 -makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)'
 -
 -.SUFFIXES:
 -.SUFFIXES: .obj .c .h .ps .dvi .info .texinfo
 -
 -{$(ONIG_DIR)}.c{$(BUILD_DIR)}.obj:
 -	$(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /I.. /Fo$@ /c $<
 -
 -# targets
 -default: all
 -
 -setup:
 -	$(CP) ..\win32\config.h config.h
 -	$(CP) ..\win32\testc.c  testc.c
 -
 -
 -all: $(libname) $(dllname) 
 -
 -$(libname): $(libobjs) $(encobjs)
 -	$(ARLIB) $(ARLIB_FLAGS) -out:$@ $(libobjs) $(encobjs)
 -
 -$(dllname): $(libobjs) $(encobjs)
 -	$(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS)
 -
 -$(BUILD_DIR)/regparse.obj:  $(ONIG_DIR)/regparse.c $(onigheaders) $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/regext.obj:    $(ONIG_DIR)/regext.c   $(onigheaders) $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/regtrav.obj:   $(ONIG_DIR)/regtrav.c  $(onigheaders) $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/regcomp.obj:   $(ONIG_DIR)/regcomp.c  $(onigheaders) $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/regexec.obj:   $(ONIG_DIR)/regexec.c  $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/reggnu.obj:    $(ONIG_DIR)/reggnu.c   $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/oniggnu.h
 -$(BUILD_DIR)/regerror.obj:  $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/regenc.obj:    $(ONIG_DIR)/regenc.c   $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/regsyntax.obj: $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/regversion.obj: $(ONIG_DIR)/regversion.c $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/regposix.obj:  $(ONIG_DIR)/regposix.c $(posixheaders) $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/regposerr.obj: $(ONIG_DIR)/regposerr.c $(posixheaders) $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/st.obj:        $(ONIG_DIR)/st.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/st.h
 -$(BUILD_DIR)/onig_init.obj: $(ONIG_DIR)/onig_init.c $(ONIG_DIR)/oniguruma.h
 -
 -$(BUILD_DIR)/ascii.obj:      $(ONIG_DIR)/ascii.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/unicode.obj:    $(ONIG_DIR)/unicode.c $(ONIG_DIR)/unicode_fold_data.c $(ONIG_DIR)/unicode_property_data.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/utf8.obj:       $(ONIG_DIR)/utf8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/utf16_be.obj:   $(ONIG_DIR)/utf16_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/utf16_le.obj:   $(ONIG_DIR)/utf16_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/utf32_be.obj:   $(ONIG_DIR)/utf32_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/utf32_le.obj:   $(ONIG_DIR)/utf32_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/euc_jp.obj:     $(ONIG_DIR)/euc_jp.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/euc_tw.obj:     $(ONIG_DIR)/euc_tw.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/euc_kr.obj:     $(ONIG_DIR)/euc_kr.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/sjis.obj:       $(ONIG_DIR)/sjis.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/iso8859_1.obj:  $(ONIG_DIR)/iso8859_1.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/iso8859_2.obj:  $(ONIG_DIR)/iso8859_2.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/iso8859_3.obj:  $(ONIG_DIR)/iso8859_3.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/iso8859_4.obj:  $(ONIG_DIR)/iso8859_4.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/iso8859_5.obj:  $(ONIG_DIR)/iso8859_5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/iso8859_6.obj:  $(ONIG_DIR)/iso8859_6.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/iso8859_7.obj:  $(ONIG_DIR)/iso8859_7.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/iso8859_8.obj:  $(ONIG_DIR)/iso8859_8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/iso8859_9.obj:  $(ONIG_DIR)/iso8859_9.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/iso8859_10.obj: $(ONIG_DIR)/iso8859_10.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/iso8859_11.obj: $(ONIG_DIR)/iso8859_11.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/iso8859_13.obj: $(ONIG_DIR)/iso8859_13.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/iso8859_14.obj: $(ONIG_DIR)/iso8859_14.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/iso8859_15.obj: $(ONIG_DIR)/iso8859_15.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/iso8859_16.obj: $(ONIG_DIR)/iso8859_16.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/koi8.obj:       $(ONIG_DIR)/koi8.c   $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/koi8_r.obj:     $(ONIG_DIR)/koi8_r.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/cp1251.obj:     $(ONIG_DIR)/cp1251.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/big5.obj:       $(ONIG_DIR)/big5.c   $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/gb18030.obj:    $(ONIG_DIR)/gb18030.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/euc_jp_prop.obj:  $(ONIG_DIR)/euc_jp_prop.c $(ONIG_DIR)/regenc.h
 -$(BUILD_DIR)/sjis_prop.obj:    $(ONIG_DIR)/sjis_prop.c $(ONIG_DIR)/regenc.h
 -$(BUILD_DIR)/unicode_unfold_key.obj: $(ONIG_DIR)/unicode_unfold_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/unicode_fold1_key.obj: $(ONIG_DIR)/unicode_fold1_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/unicode_fold2_key.obj: $(ONIG_DIR)/unicode_fold2_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -$(BUILD_DIR)/unicode_fold3_key.obj: $(ONIG_DIR)/unicode_fold3_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
 -
 -# C library test
 -ctest: $(testc)
 -	.\$(testc)
 -
 -# POSIX C library test
 -ptest: $(testp)
 -	.\$(testp)
 -
 -$(testc): $(testc).c $(libname)
 -	$(CC) -nologo /Fe:$(testc) -DONIG_EXTERN=extern $(testc).c $(libname)
 -
 -$(testp): $(testc).c $(dlllib)
 -	$(CC) -nologo -DPOSIX_TEST /Fe:$(testp) $(testc).c $(dlllib)
 -
 -$(testc)u: $(testc)u.c $(libname)
 -	$(CC) -nologo /Fe:$(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname)
 -
 -clean:
 -	del $(BUILD_DIR)\*.obj $(BUILD_DIR)\*.lib $(BUILD_DIR)\*.exp $(BUILD_DIR)\*.dll $(BUILD_DIR)\$(testp).exe $(BUILD_DIR)\$(testc).exe $(BUILD_DIR)\$(testc).obj
 -
 -
 -samples: all
 -	$(CC) $(CFLAGS) -I. /Fe:simple  $(ONIG_DIR)\sample\simple.c  $(dlllib)
 -	$(CC) $(CFLAGS) -I. /Fe:posix   $(ONIG_DIR)\sample\posix.c   $(dlllib)
 -	$(CC) $(CFLAGS) -I. /Fe:names   $(ONIG_DIR)\sample\names.c   $(dlllib)
 -	$(CC) $(CFLAGS) -I. /Fe:listcap $(ONIG_DIR)\sample\listcap.c $(dlllib)
 -	$(CC) $(CFLAGS) -I. /Fe:sql     $(ONIG_DIR)\sample\sql.c     $(dlllib)
 -	$(CC) $(CFLAGS) -I. /Fe:encode  $(ONIG_DIR)\sample\encode.c  $(dlllib)
 -	$(CC) $(CFLAGS) -I. /Fe:syntax  $(ONIG_DIR)\sample\syntax.c  $(dlllib)
\ No newline at end of file +# Oniguruma Makefile for Windows + +product_name = oniguruma + +CPPFLAGS = +CFLAGS = -O2 -nologo /W3 +LDFLAGS = +LOADLIBES = +ARLIB = lib +ARLIB_FLAGS = -nologo +ARDLL = cl +ARDLL_FLAGS = -nologo -LD $(LINKFLAGS) -dll +LINKFLAGS = -link -incremental:no -pdb:none + +INSTALL = install -c +CP      = copy +CC = cl +DEFS = -DHAVE_CONFIG_H + +subdirs = + +libbase   = onig +libname   = $(libbase)_s.lib +dllname   = $(libbase).dll +dlllib    = $(libbase).lib + +!IF defined(ENABLE_POSIX_API) && "$(ENABLE_POSIX_API)" == "NO" +posixobjs = +!ELSE +posixobjs = $(BUILD_DIR)/regposix.obj $(BUILD_DIR)/regposerr.obj +!ENDIF + +onigheaders  = $(ONIG_DIR)/oniguruma.h $(ONIG_DIR)/regint.h $(ONIG_DIR)/regparse.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/st.h +posixheaders = $(ONIG_DIR)/onigposix.h +headers      = $(posixheaders) $(onigheaders) + +onigobjs     = $(BUILD_DIR)/reggnu.obj $(BUILD_DIR)/regerror.obj $(BUILD_DIR)/regparse.obj $(BUILD_DIR)/regext.obj $(BUILD_DIR)/regcomp.obj \ +	       $(BUILD_DIR)/regexec.obj $(BUILD_DIR)/regenc.obj $(BUILD_DIR)/regsyntax.obj $(BUILD_DIR)/regtrav.obj \ +	       $(BUILD_DIR)/regversion.obj $(BUILD_DIR)/st.obj $(BUILD_DIR)/onig_init.obj +libobjs      = $(onigobjs) $(posixobjs) + +jp_objs      =  $(BUILD_DIR)/euc_jp.obj $(BUILD_DIR)/sjis.obj +iso8859_objs =  $(BUILD_DIR)/iso8859_1.obj  $(BUILD_DIR)/iso8859_2.obj \ +		$(BUILD_DIR)/iso8859_3.obj  $(BUILD_DIR)/iso8859_4.obj \ +		$(BUILD_DIR)/iso8859_5.obj  $(BUILD_DIR)/iso8859_6.obj \ +		$(BUILD_DIR)/iso8859_7.obj  $(BUILD_DIR)/iso8859_8.obj \ +		$(BUILD_DIR)/iso8859_9.obj  $(BUILD_DIR)/iso8859_10.obj \ +		$(BUILD_DIR)/iso8859_11.obj $(BUILD_DIR)/iso8859_13.obj \ +		$(BUILD_DIR)/iso8859_14.obj $(BUILD_DIR)/iso8859_15.obj \ +		$(BUILD_DIR)/iso8859_16.obj + +encobjs = $(BUILD_DIR)/ascii.obj $(BUILD_DIR)/utf8.obj \ +		$(BUILD_DIR)/unicode.obj \ +		$(BUILD_DIR)/utf16_be.obj $(BUILD_DIR)/utf16_le.obj \ +		$(BUILD_DIR)/utf32_be.obj $(BUILD_DIR)/utf32_le.obj \ +		$(jp_objs) $(iso8859_objs) \ +		$(BUILD_DIR)/euc_tw.obj $(BUILD_DIR)/euc_kr.obj $(BUILD_DIR)/big5.obj \ +		$(BUILD_DIR)/gb18030.obj \ +		$(BUILD_DIR)/koi8_r.obj  \ +		$(BUILD_DIR)/cp1251.obj \ +		$(BUILD_DIR)/euc_jp_prop.obj $(BUILD_DIR)/sjis_prop.obj \ +		$(BUILD_DIR)/unicode_unfold_key.obj $(BUILD_DIR)/unicode_fold1_key.obj \ +		$(BUILD_DIR)/unicode_fold2_key.obj $(BUILD_DIR)/unicode_fold3_key.obj	# $(BUILD_DIR)/koi8.obj + +onigsources  = $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regparse.c $(ONIG_DIR)/regext.c $(ONIG_DIR)/regcomp.c $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regenc.c \ +	       $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regtrav.c $(ONIG_DIR)/regversion.c $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/st.c +posixsources = $(ONIG_DIR)/regposix.c $(ONIG_DIR)/regposerr.c +libsources   = $(posixsources) $(onigsources) + +patchfiles   = re.c.168.patch re.c.181.patch +distfiles    = README COPYING HISTORY \ +		Makefile.in configure.in config.h.in configure \ +		$(headers) $(libsources) $(patchfiles) \ +		test.rb testconv.rb +testc        = testc +testp        = testp + +makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)' + +.SUFFIXES: +.SUFFIXES: .obj .c .h .ps .dvi .info .texinfo + +{$(ONIG_DIR)}.c{$(BUILD_DIR)}.obj: +	$(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /I.. /Fo$@ /c $< + +# targets +default: all + +setup: +	$(CP) ..\win32\config.h config.h +	$(CP) ..\win32\testc.c  testc.c + + +all: $(libname) $(dllname) + +$(libname): $(libobjs) $(encobjs) +	$(ARLIB) $(ARLIB_FLAGS) -out:$@ $(libobjs) $(encobjs) + +$(dllname): $(libobjs) $(encobjs) +	$(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS) + +$(BUILD_DIR)/regparse.obj:  $(ONIG_DIR)/regparse.c $(onigheaders) $(BUILD_DIR)/config.h +$(BUILD_DIR)/regext.obj:    $(ONIG_DIR)/regext.c   $(onigheaders) $(BUILD_DIR)/config.h +$(BUILD_DIR)/regtrav.obj:   $(ONIG_DIR)/regtrav.c  $(onigheaders) $(BUILD_DIR)/config.h +$(BUILD_DIR)/regcomp.obj:   $(ONIG_DIR)/regcomp.c  $(onigheaders) $(BUILD_DIR)/config.h +$(BUILD_DIR)/regexec.obj:   $(ONIG_DIR)/regexec.c  $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/reggnu.obj:    $(ONIG_DIR)/reggnu.c   $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/oniggnu.h +$(BUILD_DIR)/regerror.obj:  $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/regenc.obj:    $(ONIG_DIR)/regenc.c   $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/regsyntax.obj: $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/regversion.obj: $(ONIG_DIR)/regversion.c $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/regposix.obj:  $(ONIG_DIR)/regposix.c $(posixheaders) $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/regposerr.obj: $(ONIG_DIR)/regposerr.c $(posixheaders) $(BUILD_DIR)/config.h +$(BUILD_DIR)/st.obj:        $(ONIG_DIR)/st.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/st.h +$(BUILD_DIR)/onig_init.obj: $(ONIG_DIR)/onig_init.c $(ONIG_DIR)/oniguruma.h + +$(BUILD_DIR)/ascii.obj:      $(ONIG_DIR)/ascii.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/unicode.obj:    $(ONIG_DIR)/unicode.c $(ONIG_DIR)/unicode_fold_data.c $(ONIG_DIR)/unicode_property_data.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/utf8.obj:       $(ONIG_DIR)/utf8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/utf16_be.obj:   $(ONIG_DIR)/utf16_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/utf16_le.obj:   $(ONIG_DIR)/utf16_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/utf32_be.obj:   $(ONIG_DIR)/utf32_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/utf32_le.obj:   $(ONIG_DIR)/utf32_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/euc_jp.obj:     $(ONIG_DIR)/euc_jp.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/euc_tw.obj:     $(ONIG_DIR)/euc_tw.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/euc_kr.obj:     $(ONIG_DIR)/euc_kr.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/sjis.obj:       $(ONIG_DIR)/sjis.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_1.obj:  $(ONIG_DIR)/iso8859_1.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_2.obj:  $(ONIG_DIR)/iso8859_2.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_3.obj:  $(ONIG_DIR)/iso8859_3.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_4.obj:  $(ONIG_DIR)/iso8859_4.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_5.obj:  $(ONIG_DIR)/iso8859_5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_6.obj:  $(ONIG_DIR)/iso8859_6.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_7.obj:  $(ONIG_DIR)/iso8859_7.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_8.obj:  $(ONIG_DIR)/iso8859_8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_9.obj:  $(ONIG_DIR)/iso8859_9.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_10.obj: $(ONIG_DIR)/iso8859_10.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_11.obj: $(ONIG_DIR)/iso8859_11.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_13.obj: $(ONIG_DIR)/iso8859_13.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_14.obj: $(ONIG_DIR)/iso8859_14.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_15.obj: $(ONIG_DIR)/iso8859_15.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_16.obj: $(ONIG_DIR)/iso8859_16.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/koi8.obj:       $(ONIG_DIR)/koi8.c   $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/koi8_r.obj:     $(ONIG_DIR)/koi8_r.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/cp1251.obj:     $(ONIG_DIR)/cp1251.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/big5.obj:       $(ONIG_DIR)/big5.c   $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/gb18030.obj:    $(ONIG_DIR)/gb18030.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/euc_jp_prop.obj:  $(ONIG_DIR)/euc_jp_prop.c $(ONIG_DIR)/regenc.h +$(BUILD_DIR)/sjis_prop.obj:    $(ONIG_DIR)/sjis_prop.c $(ONIG_DIR)/regenc.h +$(BUILD_DIR)/unicode_unfold_key.obj: $(ONIG_DIR)/unicode_unfold_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/unicode_fold1_key.obj: $(ONIG_DIR)/unicode_fold1_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/unicode_fold2_key.obj: $(ONIG_DIR)/unicode_fold2_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/unicode_fold3_key.obj: $(ONIG_DIR)/unicode_fold3_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h + +# C library test +ctest: $(testc) +	.\$(testc) + +# POSIX C library test +ptest: $(testp) +	.\$(testp) + +$(testc): $(testc).c $(libname) +	$(CC) -nologo /Fe:$(testc) -DONIG_EXTERN=extern $(testc).c $(libname) + +$(testp): $(testc).c $(dlllib) +	$(CC) -nologo -DPOSIX_TEST /Fe:$(testp) $(testc).c $(dlllib) + +$(testc)u: $(testc)u.c $(libname) +	$(CC) -nologo /Fe:$(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname) + +clean: +	del $(BUILD_DIR)\*.obj $(BUILD_DIR)\*.lib $(BUILD_DIR)\*.exp $(BUILD_DIR)\*.dll $(BUILD_DIR)\$(testp).exe $(BUILD_DIR)\$(testc).exe $(BUILD_DIR)\$(testc).obj + + +samples: all +	$(CC) $(CFLAGS) -I. /Fe:simple  $(ONIG_DIR)\sample\simple.c  $(dlllib) +	$(CC) $(CFLAGS) -I. /Fe:posix   $(ONIG_DIR)\sample\posix.c   $(dlllib) +	$(CC) $(CFLAGS) -I. /Fe:names   $(ONIG_DIR)\sample\names.c   $(dlllib) +	$(CC) $(CFLAGS) -I. /Fe:listcap $(ONIG_DIR)\sample\listcap.c $(dlllib) +	$(CC) $(CFLAGS) -I. /Fe:sql     $(ONIG_DIR)\sample\sql.c     $(dlllib) +	$(CC) $(CFLAGS) -I. /Fe:encode  $(ONIG_DIR)\sample\encode.c  $(dlllib) +	$(CC) $(CFLAGS) -I. /Fe:syntax  $(ONIG_DIR)\sample\syntax.c  $(dlllib) diff --git a/src/ascii.c b/src/ascii.c index eb38944..e83e4d6 100644 --- a/src/ascii.c +++ b/src/ascii.c @@ -113,6 +113,6 @@ OnigEncodingType OnigEncodingASCII = {    init,    0, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; @@ -151,7 +151,7 @@ big5_left_adjust_char_head(const UChar* start, const UChar* s)  	p++;  	break;        } -    }  +    }    }    len = enclen(ONIG_ENCODING_BIG5, p);    if (p + len > s) return (UChar* )p; @@ -187,6 +187,6 @@ OnigEncodingType OnigEncodingBIG5 = {    NULL, /* init */    NULL, /* is_initialized */    is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/config.h.cmake.in b/src/config.h.cmake.in index b59cc8d..f49177f 100644 --- a/src/config.h.cmake.in +++ b/src/config.h.cmake.in @@ -13,27 +13,9 @@     */  #cmakedefine HAVE_ALLOCA_H  ${HAVE_ALLOCA_H} -/* Define if compilerr supports prototypes */ -#cmakedefine HAVE_PROTOTYPES  ${HAVE_PROTOTYPES} - -/* Define if compiler supports stdarg prototypes */ -#cmakedefine HAVE_STDARG_PROTOTYPES  ${HAVE_STDARG_PROTOTYPES} -  /* Define to 1 if you have the <stdint.h> header file. */  #cmakedefine HAVE_STDINT_H  ${HAVE_STDINT_H} -/* Define to 1 if you have the <stdlib.h> header file. */ -#cmakedefine HAVE_STDLIB_H  ${HAVE_STDLIB_H} - -/* Define to 1 if you have the <strings.h> header file. */ -#cmakedefine HAVE_STRINGS_H  ${HAVE_STRINGS_H} - -/* Define to 1 if you have the <string.h> header file. */ -#cmakedefine HAVE_STRING_H  ${HAVE_STRING_H} - -/* Define to 1 if you have the <limits.h> header file. */ -#cmakedefine HAVE_LIMITS_H ${HAVE_LIMITS_H} -  /* Define to 1 if you have the <sys/times.h> header file. */  #cmakedefine HAVE_SYS_TIMES_H  ${HAVE_SYS_TIMES_H} @@ -64,9 +46,6 @@  /* The size of `short', as computed by sizeof. */  #cmakedefine SIZEOF_SHORT  ${SIZEOF_SHORT} -/* Define to 1 if you have the ANSI C header files. */ -#cmakedefine STDC_HEADERS  ${STDC_HEADERS} -  /* Define if enable CR+NL as line terminator */  #cmakedefine USE_CRNL_AS_LINE_TERMINATOR  ${USE_CRNL_AS_LINE_TERMINATOR} diff --git a/src/config.h.win32 b/src/config.h.win32 index 12609df..a8a8426 100644 --- a/src/config.h.win32 +++ b/src/config.h.win32 @@ -1,81 +1,52 @@ -#define STDC_HEADERS 1
 -#define HAVE_SYS_TYPES_H 1
 -#define HAVE_SYS_STAT_H 1
 -#define HAVE_STDLIB_H 1
 -#define HAVE_STRING_H 1
 -#define HAVE_MEMORY_H 1
 -#define HAVE_FLOAT_H 1
 -#define HAVE_OFF_T 1
 -#define SIZEOF_INT 4
 -#define SIZEOF_SHORT 2
 -#define SIZEOF_LONG 4
 -#define SIZEOF_LONG_LONG 8
 -#define SIZEOF___INT64 8
 -#define SIZEOF_OFF_T 4
 -#define SIZEOF_VOIDP 4
 -#define SIZEOF_FLOAT 4
 -#define SIZEOF_DOUBLE 8
 -#define HAVE_PROTOTYPES 1
 -#define TOKEN_PASTE(x,y) x##y
 -#define HAVE_STDARG_PROTOTYPES 1
 -#ifndef NORETURN
 -#if _MSC_VER > 1100
 -#define NORETURN(x) __declspec(noreturn) x
 -#else
 -#define NORETURN(x) x
 -#endif
 -#endif
 -#define HAVE_DECL_SYS_NERR 1
 -#define STDC_HEADERS 1
 -#define HAVE_STDLIB_H 1
 -#define HAVE_STRING_H 1
 -#define HAVE_LIMITS_H 1
 -#define HAVE_FCNTL_H 1
 -#define HAVE_SYS_UTIME_H 1
 -#define HAVE_MEMORY_H 1
 -#define uid_t int
 -#define gid_t int
 -#define GETGROUPS_T int
 -#define HAVE_ALLOCA 1
 -#define HAVE_DUP2 1
 -#define HAVE_MEMCMP 1
 -#define HAVE_MEMMOVE 1
 -#define HAVE_MKDIR 1
 -#define HAVE_STRCASECMP 1
 -#define HAVE_STRNCASECMP 1
 -#define HAVE_STRERROR 1
 -#define HAVE_STRFTIME 1
 -#define HAVE_STRCHR 1
 -#define HAVE_STRSTR 1
 -#define HAVE_STRTOD 1
 -#define HAVE_STRTOL 1
 -#define HAVE_STRTOUL 1
 -#define HAVE_FLOCK 1
 -#define HAVE_VSNPRINTF 1
 -#define HAVE_FINITE 1
 -#define HAVE_FMOD 1
 -#define HAVE_FREXP 1
 -#define HAVE_HYPOT 1
 -#define HAVE_MODF 1
 -#define HAVE_WAITPID 1
 -#define HAVE_CHSIZE 1
 -#define HAVE_TIMES 1
 -#define HAVE__SETJMP 1
 -#define HAVE_TELLDIR 1
 -#define HAVE_SEEKDIR 1
 -#define HAVE_MKTIME 1
 -#define HAVE_COSH 1
 -#define HAVE_SINH 1
 -#define HAVE_TANH 1
 -#define HAVE_EXECVE 1
 -#define HAVE_TZNAME 1
 -#define HAVE_DAYLIGHT 1
 -#define SETPGRP_VOID 1
 -#define inline __inline
 -#define NEED_IO_SEEK_BETWEEN_RW 1
 -#define RSHIFT(x,y) ((x)>>(int)y)
 -#define FILE_COUNT _cnt
 -#define FILE_READPTR _ptr
 -#define DEFAULT_KCODE KCODE_NONE
 -#define DLEXT ".so"
 -#define DLEXT2 ".dll"
 +#define HAVE_SYS_TYPES_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_MEMORY_H 1 +#define HAVE_OFF_T 1 +#define SIZEOF_INT 4 +#define SIZEOF_SHORT 2 +#define SIZEOF_LONG 4 +#define SIZEOF_LONG_LONG 8 +#define SIZEOF___INT64 8 +#define SIZEOF_OFF_T 4 +#define SIZEOF_VOIDP 4 +#define SIZEOF_FLOAT 4 +#define SIZEOF_DOUBLE 8 +#define SIZEOF_SIZE_T 4 +#define TOKEN_PASTE(x,y) x##y +#ifndef NORETURN +#if _MSC_VER > 1100 +#define NORETURN(x) __declspec(noreturn) x +#else +#define NORETURN(x) x +#endif +#endif +#define HAVE_DECL_SYS_NERR 1 +#define HAVE_FCNTL_H 1 +#define HAVE_SYS_UTIME_H 1 +#define HAVE_MEMORY_H 1 +#define uid_t int +#define gid_t int +#define GETGROUPS_T int +#define HAVE_ALLOCA 1 +#define HAVE_DUP2 1 +#define HAVE_MKDIR 1 +#define HAVE_FLOCK 1 +#define HAVE_VSNPRINTF 1 +#define HAVE_FINITE 1 +#define HAVE_HYPOT 1 +#define HAVE_WAITPID 1 +#define HAVE_CHSIZE 1 +#define HAVE_TIMES 1 +#define HAVE_TELLDIR 1 +#define HAVE_SEEKDIR 1 +#define HAVE_EXECVE 1 +#define HAVE_DAYLIGHT 1 +#define SETPGRP_VOID 1 +#define inline __inline +#define NEED_IO_SEEK_BETWEEN_RW 1 +#define RSHIFT(x,y) ((x)>>(int)y) +#define FILE_COUNT _cnt +#define FILE_READPTR _ptr +#define DEFAULT_KCODE KCODE_NONE +#define DLEXT ".so" +#define DLEXT2 ".dll" diff --git a/src/config.h.win64 b/src/config.h.win64 index e892086..59485fa 100644 --- a/src/config.h.win64 +++ b/src/config.h.win64 @@ -1,81 +1,52 @@ -#define STDC_HEADERS 1
 -#define HAVE_SYS_TYPES_H 1
 -#define HAVE_SYS_STAT_H 1
 -#define HAVE_STDLIB_H 1
 -#define HAVE_STRING_H 1
 -#define HAVE_MEMORY_H 1
 -#define HAVE_FLOAT_H 1
 -#define HAVE_OFF_T 1
 -#define SIZEOF_INT 4
 -#define SIZEOF_SHORT 2
 -#define SIZEOF_LONG 4
 -#define SIZEOF_LONG_LONG 8
 -#define SIZEOF___INT64 8
 -#define SIZEOF_OFF_T 4
 -#define SIZEOF_VOIDP 8
 -#define SIZEOF_FLOAT 4
 -#define SIZEOF_DOUBLE 8
 -#define HAVE_PROTOTYPES 1
 -#define TOKEN_PASTE(x,y) x##y
 -#define HAVE_STDARG_PROTOTYPES 1
 -#ifndef NORETURN
 -#if _MSC_VER > 1100
 -#define NORETURN(x) __declspec(noreturn) x
 -#else
 -#define NORETURN(x) x
 -#endif
 -#endif
 -#define HAVE_DECL_SYS_NERR 1
 -#define STDC_HEADERS 1
 -#define HAVE_STDLIB_H 1
 -#define HAVE_STRING_H 1
 -#define HAVE_LIMITS_H 1
 -#define HAVE_FCNTL_H 1
 -#define HAVE_SYS_UTIME_H 1
 -#define HAVE_MEMORY_H 1
 -#define uid_t int
 -#define gid_t int
 -#define GETGROUPS_T int
 -#define HAVE_ALLOCA 1
 -#define HAVE_DUP2 1
 -#define HAVE_MEMCMP 1
 -#define HAVE_MEMMOVE 1
 -#define HAVE_MKDIR 1
 -#define HAVE_STRCASECMP 1
 -#define HAVE_STRNCASECMP 1
 -#define HAVE_STRERROR 1
 -#define HAVE_STRFTIME 1
 -#define HAVE_STRCHR 1
 -#define HAVE_STRSTR 1
 -#define HAVE_STRTOD 1
 -#define HAVE_STRTOL 1
 -#define HAVE_STRTOUL 1
 -#define HAVE_FLOCK 1
 -#define HAVE_VSNPRINTF 1
 -#define HAVE_FINITE 1
 -#define HAVE_FMOD 1
 -#define HAVE_FREXP 1
 -#define HAVE_HYPOT 1
 -#define HAVE_MODF 1
 -#define HAVE_WAITPID 1
 -#define HAVE_CHSIZE 1
 -#define HAVE_TIMES 1
 -#define HAVE__SETJMP 1
 -#define HAVE_TELLDIR 1
 -#define HAVE_SEEKDIR 1
 -#define HAVE_MKTIME 1
 -#define HAVE_COSH 1
 -#define HAVE_SINH 1
 -#define HAVE_TANH 1
 -#define HAVE_EXECVE 1
 -#define HAVE_TZNAME 1
 -#define HAVE_DAYLIGHT 1
 -#define SETPGRP_VOID 1
 -#define inline __inline
 -#define NEED_IO_SEEK_BETWEEN_RW 1
 -#define RSHIFT(x,y) ((x)>>(int)y)
 -#define FILE_COUNT _cnt
 -#define FILE_READPTR _ptr
 -#define DEFAULT_KCODE KCODE_NONE
 -#define DLEXT ".so"
 -#define DLEXT2 ".dll"
 +#define HAVE_SYS_TYPES_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_MEMORY_H 1 +#define HAVE_OFF_T 1 +#define SIZEOF_INT 4 +#define SIZEOF_SHORT 2 +#define SIZEOF_LONG 4 +#define SIZEOF_LONG_LONG 8 +#define SIZEOF___INT64 8 +#define SIZEOF_OFF_T 4 +#define SIZEOF_VOIDP 8 +#define SIZEOF_FLOAT 4 +#define SIZEOF_DOUBLE 8 +#define SIZEOF_SIZE_T 8 +#define TOKEN_PASTE(x,y) x##y +#ifndef NORETURN +#if _MSC_VER > 1100 +#define NORETURN(x) __declspec(noreturn) x +#else +#define NORETURN(x) x +#endif +#endif +#define HAVE_DECL_SYS_NERR 1 +#define HAVE_FCNTL_H 1 +#define HAVE_SYS_UTIME_H 1 +#define HAVE_MEMORY_H 1 +#define uid_t int +#define gid_t int +#define GETGROUPS_T int +#define HAVE_ALLOCA 1 +#define HAVE_DUP2 1 +#define HAVE_MKDIR 1 +#define HAVE_FLOCK 1 +#define HAVE_VSNPRINTF 1 +#define HAVE_FINITE 1 +#define HAVE_HYPOT 1 +#define HAVE_WAITPID 1 +#define HAVE_CHSIZE 1 +#define HAVE_TIMES 1 +#define HAVE_TELLDIR 1 +#define HAVE_SEEKDIR 1 +#define HAVE_EXECVE 1 +#define HAVE_DAYLIGHT 1 +#define SETPGRP_VOID 1 +#define inline __inline +#define NEED_IO_SEEK_BETWEEN_RW 1 +#define RSHIFT(x,y) ((x)>>(int)y) +#define FILE_COUNT _cnt +#define FILE_READPTR _ptr +#define DEFAULT_KCODE KCODE_NONE +#define DLEXT ".so" +#define DLEXT2 ".dll" diff --git a/src/config.h.windows.in b/src/config.h.windows.in index 57fb426..0a18db8 100644 --- a/src/config.h.windows.in +++ b/src/config.h.windows.in @@ -1,85 +1,57 @@ -#define STDC_HEADERS 1
 -#define HAVE_SYS_TYPES_H 1
 -#define HAVE_SYS_STAT_H 1
 -#define HAVE_STDLIB_H 1
 -#define HAVE_STRING_H 1
 -#define HAVE_MEMORY_H 1
 -#define HAVE_FLOAT_H 1
 -#define HAVE_OFF_T 1
 -#define SIZEOF_INT 4
 -#define SIZEOF_SHORT 2
 -#define SIZEOF_LONG 4
 -#define SIZEOF_LONG_LONG 8
 -#define SIZEOF___INT64 8
 -#define SIZEOF_OFF_T 4
 -#ifdef _WIN64
 -#define SIZEOF_VOIDP 8
 -#else
 -#define SIZEOF_VOIDP 4
 -#endif
 -#define SIZEOF_FLOAT 4
 -#define SIZEOF_DOUBLE 8
 -#define HAVE_PROTOTYPES 1
 -#define TOKEN_PASTE(x,y) x##y
 -#define HAVE_STDARG_PROTOTYPES 1
 -#ifndef NORETURN
 -#if _MSC_VER > 1100
 -#define NORETURN(x) __declspec(noreturn) x
 -#else
 -#define NORETURN(x) x
 -#endif
 -#endif
 -#define HAVE_DECL_SYS_NERR 1
 -#define STDC_HEADERS 1
 -#define HAVE_STDLIB_H 1
 -#define HAVE_STRING_H 1
 -#define HAVE_LIMITS_H 1
 -#define HAVE_FCNTL_H 1
 -#define HAVE_SYS_UTIME_H 1
 -#define HAVE_MEMORY_H 1
 -#define uid_t int
 -#define gid_t int
 -#define GETGROUPS_T int
 -#define HAVE_ALLOCA 1
 -#define HAVE_DUP2 1
 -#define HAVE_MEMCMP 1
 -#define HAVE_MEMMOVE 1
 -#define HAVE_MKDIR 1
 -#define HAVE_STRCASECMP 1
 -#define HAVE_STRNCASECMP 1
 -#define HAVE_STRERROR 1
 -#define HAVE_STRFTIME 1
 -#define HAVE_STRCHR 1
 -#define HAVE_STRSTR 1
 -#define HAVE_STRTOD 1
 -#define HAVE_STRTOL 1
 -#define HAVE_STRTOUL 1
 -#define HAVE_FLOCK 1
 -#define HAVE_VSNPRINTF 1
 -#define HAVE_FINITE 1
 -#define HAVE_FMOD 1
 -#define HAVE_FREXP 1
 -#define HAVE_HYPOT 1
 -#define HAVE_MODF 1
 -#define HAVE_WAITPID 1
 -#define HAVE_CHSIZE 1
 -#define HAVE_TIMES 1
 -#define HAVE__SETJMP 1
 -#define HAVE_TELLDIR 1
 -#define HAVE_SEEKDIR 1
 -#define HAVE_MKTIME 1
 -#define HAVE_COSH 1
 -#define HAVE_SINH 1
 -#define HAVE_TANH 1
 -#define HAVE_EXECVE 1
 -#define HAVE_TZNAME 1
 -#define HAVE_DAYLIGHT 1
 -#define SETPGRP_VOID 1
 -#define inline __inline
 -#define NEED_IO_SEEK_BETWEEN_RW 1
 -#define RSHIFT(x,y) ((x)>>(int)y)
 -#define FILE_COUNT _cnt
 -#define FILE_READPTR _ptr
 -#define DEFAULT_KCODE KCODE_NONE
 -#define DLEXT ".so"
 -#define DLEXT2 ".dll"
 +#define HAVE_SYS_TYPES_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_MEMORY_H 1 +#define HAVE_OFF_T 1 +#define SIZEOF_INT 4 +#define SIZEOF_SHORT 2 +#define SIZEOF_LONG 4 +#define SIZEOF_LONG_LONG 8 +#define SIZEOF___INT64 8 +#define SIZEOF_OFF_T 4 +#ifdef _WIN64 +#define SIZEOF_VOIDP  8 +#define SIZEOF_SIZE_T 8 +#else +#define SIZEOF_VOIDP  4 +#define SIZEOF_SIZE_T 4 +#endif +#define SIZEOF_FLOAT 4 +#define SIZEOF_DOUBLE 8 +#define TOKEN_PASTE(x,y) x##y +#ifndef NORETURN +#if _MSC_VER > 1100 +#define NORETURN(x) __declspec(noreturn) x +#else +#define NORETURN(x) x +#endif +#endif +#define HAVE_DECL_SYS_NERR 1 +#define HAVE_FCNTL_H 1 +#define HAVE_SYS_UTIME_H 1 +#define HAVE_MEMORY_H 1 +#define uid_t int +#define gid_t int +#define GETGROUPS_T int +#define HAVE_ALLOCA 1 +#define HAVE_DUP2 1 +#define HAVE_MKDIR 1 +#define HAVE_FLOCK 1 +#define HAVE_VSNPRINTF 1 +#define HAVE_FINITE 1 +#define HAVE_HYPOT 1 +#define HAVE_WAITPID 1 +#define HAVE_CHSIZE 1 +#define HAVE_TIMES 1 +#define HAVE_TELLDIR 1 +#define HAVE_SEEKDIR 1 +#define HAVE_EXECVE 1 +#define HAVE_DAYLIGHT 1 +#define SETPGRP_VOID 1 +#define inline __inline +#define NEED_IO_SEEK_BETWEEN_RW 1 +#define RSHIFT(x,y) ((x)>>(int)y) +#define FILE_COUNT _cnt +#define FILE_READPTR _ptr +#define DEFAULT_KCODE KCODE_NONE +#define DLEXT ".so" +#define DLEXT2 ".dll" diff --git a/src/cp1251.c b/src/cp1251.c index e217037..7b19855 100644 --- a/src/cp1251.c +++ b/src/cp1251.c @@ -200,6 +200,6 @@ OnigEncodingType OnigEncodingCP1251 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/euc_jp.c b/src/euc_jp.c index 5d3c1f9..c1ab89e 100644 --- a/src/euc_jp.c +++ b/src/euc_jp.c @@ -151,7 +151,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf)  #if 1    if (enclen(ONIG_ENCODING_EUC_JP, buf) != (p - buf))      return ONIGERR_INVALID_CODE_POINT_VALUE; -#endif   +#endif    return (int )(p - buf);  } @@ -307,6 +307,6 @@ OnigEncodingType OnigEncodingEUC_JP = {    NULL, /* init */    NULL, /* is_initialized */    is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1_OR_0,    0, 0  }; diff --git a/src/euc_kr.c b/src/euc_kr.c index def311b..9b62514 100644 --- a/src/euc_kr.c +++ b/src/euc_kr.c @@ -161,7 +161,9 @@ OnigEncodingType OnigEncodingEUC_KR = {    euckr_is_allowed_reverse_match,    NULL, /* init */    NULL, /* is_initialized */ -  is_valid_mbc_string +  is_valid_mbc_string, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1_OR_0, +  0, 0    };  /* Same with OnigEncodingEUC_KR except the name */ @@ -185,6 +187,6 @@ OnigEncodingType OnigEncodingEUC_CN = {    NULL, /* init */    NULL, /* is_initialized */    is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1_OR_0,    0, 0  }; diff --git a/src/euc_tw.c b/src/euc_tw.c index 8738598..7683336 100644 --- a/src/euc_tw.c +++ b/src/euc_tw.c @@ -168,6 +168,6 @@ OnigEncodingType OnigEncodingEUC_TW = {    NULL, /* init */    NULL, /* is_initialized */    is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/gb18030.c b/src/gb18030.c index d4a1108..7654432 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -535,6 +535,6 @@ OnigEncodingType OnigEncodingGB18030 = {    NULL, /* init */    NULL, /* is_initialized */    is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/gperf_fold_key_conv.py b/src/gperf_fold_key_conv.py index 376d343..f453186 100755 --- a/src/gperf_fold_key_conv.py +++ b/src/gperf_fold_key_conv.py @@ -54,11 +54,13 @@ def parse_line(s, key_len):  def parse_file(f, key_len):      print "/* This file was converted by gperf_fold_key_conv.py\n      from gperf output file. */" -    line = f.readline() -    while line: +    while True: +        line = f.readline() +        if not line: +            break +          s = parse_line(line, key_len)          print s -        line = f.readline()  # main diff --git a/src/iso8859_1.c b/src/iso8859_1.c index ff47b80..0ce70a6 100644 --- a/src/iso8859_1.c +++ b/src/iso8859_1.c @@ -272,6 +272,6 @@ OnigEncodingType OnigEncodingISO_8859_1 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/iso8859_10.c b/src/iso8859_10.c index f9804e2..4a34b38 100644 --- a/src/iso8859_10.c +++ b/src/iso8859_10.c @@ -239,6 +239,6 @@ OnigEncodingType OnigEncodingISO_8859_10 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/iso8859_11.c b/src/iso8859_11.c index 108ee8a..da8fda0 100644 --- a/src/iso8859_11.c +++ b/src/iso8859_11.c @@ -96,6 +96,6 @@ OnigEncodingType OnigEncodingISO_8859_11 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/iso8859_13.c b/src/iso8859_13.c index 9585355..23a0265 100644 --- a/src/iso8859_13.c +++ b/src/iso8859_13.c @@ -228,6 +228,6 @@ OnigEncodingType OnigEncodingISO_8859_13 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/iso8859_14.c b/src/iso8859_14.c index 83fc551..7281e93 100644 --- a/src/iso8859_14.c +++ b/src/iso8859_14.c @@ -241,6 +241,6 @@ OnigEncodingType OnigEncodingISO_8859_14 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/iso8859_15.c b/src/iso8859_15.c index 3a7ad05..3d9f571 100644 --- a/src/iso8859_15.c +++ b/src/iso8859_15.c @@ -235,6 +235,6 @@ OnigEncodingType OnigEncodingISO_8859_15 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/iso8859_16.c b/src/iso8859_16.c index 02022d9..a6977dd 100644 --- a/src/iso8859_16.c +++ b/src/iso8859_16.c @@ -237,6 +237,6 @@ OnigEncodingType OnigEncodingISO_8859_16 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/iso8859_2.c b/src/iso8859_2.c index ecdbb99..4f994c4 100644 --- a/src/iso8859_2.c +++ b/src/iso8859_2.c @@ -235,6 +235,6 @@ OnigEncodingType OnigEncodingISO_8859_2 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/iso8859_3.c b/src/iso8859_3.c index 739f1c9..944a7ae 100644 --- a/src/iso8859_3.c +++ b/src/iso8859_3.c @@ -235,6 +235,6 @@ OnigEncodingType OnigEncodingISO_8859_3 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/iso8859_4.c b/src/iso8859_4.c index 4f2b6a0..3a7c210 100644 --- a/src/iso8859_4.c +++ b/src/iso8859_4.c @@ -237,6 +237,6 @@ OnigEncodingType OnigEncodingISO_8859_4 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/iso8859_5.c b/src/iso8859_5.c index cf41061..0a8b7ec 100644 --- a/src/iso8859_5.c +++ b/src/iso8859_5.c @@ -226,6 +226,6 @@ OnigEncodingType OnigEncodingISO_8859_5 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/iso8859_6.c b/src/iso8859_6.c index 1ffe99f..1c16c79 100644 --- a/src/iso8859_6.c +++ b/src/iso8859_6.c @@ -96,6 +96,6 @@ OnigEncodingType OnigEncodingISO_8859_6 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/iso8859_7.c b/src/iso8859_7.c index 87288c2..0877b6f 100644 --- a/src/iso8859_7.c +++ b/src/iso8859_7.c @@ -222,6 +222,6 @@ OnigEncodingType OnigEncodingISO_8859_7 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/iso8859_8.c b/src/iso8859_8.c index 8f162a4..bd3e94d 100644 --- a/src/iso8859_8.c +++ b/src/iso8859_8.c @@ -96,6 +96,6 @@ OnigEncodingType OnigEncodingISO_8859_8 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/iso8859_9.c b/src/iso8859_9.c index 52589cf..8819f4a 100644 --- a/src/iso8859_9.c +++ b/src/iso8859_9.c @@ -228,6 +228,6 @@ OnigEncodingType OnigEncodingISO_8859_9 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; @@ -250,6 +250,6 @@ OnigEncodingType OnigEncodingKOI8 = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/koi8_r.c b/src/koi8_r.c index 8adc399..5994ebe 100644 --- a/src/koi8_r.c +++ b/src/koi8_r.c @@ -212,6 +212,6 @@ OnigEncodingType OnigEncodingKOI8_R = {    NULL, /* init */    NULL, /* is_initialized */    onigenc_always_true_is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/make_unicode_fold.sh b/src/make_unicode_fold.sh index 45e9566..d5828e1 100755 --- a/src/make_unicode_fold.sh +++ b/src/make_unicode_fold.sh @@ -23,6 +23,13 @@ ${GPERF} ${GPERF_OPT} -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf  ${GPERF} ${GPERF_OPT} -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf > ${TMP3}  ./gperf_fold_key_conv.py 3 < ${TMP3} > unicode_fold3_key.c +# remove redundant EOLs before EOF +perl -i -pe 'BEGIN{undef $/}s/\n\n*\z/\n/;' unicode_fold_data.c +perl -i -pe 'BEGIN{undef $/}s/\n\n*\z/\n/;' unicode_fold1_key.c +perl -i -pe 'BEGIN{undef $/}s/\n\n*\z/\n/;' unicode_fold2_key.c +perl -i -pe 'BEGIN{undef $/}s/\n\n*\z/\n/;' unicode_fold3_key.c +perl -i -pe 'BEGIN{undef $/}s/\n\n*\z/\n/;' unicode_unfold_key.c +  rm -f ${TMP0} ${TMP1} ${TMP2} ${TMP3}  rm -f unicode_unfold_key.gperf unicode_fold1_key.gperf unicode_fold2_key.gperf unicode_fold3_key.gperf diff --git a/src/make_unicode_fold_data.py b/src/make_unicode_fold_data.py index 5c87d4c..783988c 100755 --- a/src/make_unicode_fold_data.py +++ b/src/make_unicode_fold_data.py @@ -5,7 +5,6 @@  import sys  import re -import codecs  SOURCE_FILE = 'CaseFolding.txt'  GPERF_UNFOLD_KEY_FILE = 'unicode_unfold_key.gperf' diff --git a/src/make_unicode_property_data.py b/src/make_unicode_property_data.py index 9a48ced..dc3071a 100755 --- a/src/make_unicode_property_data.py +++ b/src/make_unicode_property_data.py @@ -6,8 +6,6 @@  import sys  import re -INCLUDE_GRAPHEME_CLUSTER_DATA = False -  POSIX_LIST = [      'NEWLINE', 'Alpha', 'Blank', 'Cntrl', 'Digit', 'Graph', 'Lower',      'Print', 'Punct', 'Space', 'Upper', 'XDigit', 'Word', 'Alnum', 'ASCII' @@ -427,9 +425,17 @@ argv = sys.argv  argc = len(argv)  POSIX_ONLY = False -if argc >= 2: -  if argv[1] == '-posix': +INCLUDE_GRAPHEME_CLUSTER_DATA = False + +for i in range(1, argc): +  arg = argv[i] +  if arg == '-posix':      POSIX_ONLY = True +  elif arg == '-gc': +    INCLUDE_GRAPHEME_CLUSTER_DATA = True +  else: +    print >> sys.stderr, "Invalid argument: %s" % arg +  OUTPUT_LIST_MODE = not(POSIX_ONLY) @@ -441,11 +447,11 @@ with open('UnicodeData.txt', 'r') as f:  PROPS = DIC.keys()  PROPS = list_sub(PROPS, POSIX_LIST) -dic, props = parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property') +parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property')  dic, props = parse_and_merge_properties('Scripts.txt', 'Script')  DIC['Unknown'] = inverse_ranges(add_ranges_in_dic(dic)) -dic, props = parse_and_merge_properties('PropList.txt',   'Binary Property') -dic, props = parse_and_merge_properties('emoji-data.txt', 'Emoji Property') +parse_and_merge_properties('PropList.txt',   'Binary Property') +parse_and_merge_properties('emoji-data.txt', 'Emoji Property')  PROPS.append('Unknown')  KDIC['Unknown'] = 'Script' diff --git a/src/oniguruma.h b/src/oniguruma.h index ab917c6..746445a 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -36,9 +36,9 @@ extern "C" {  #define ONIGURUMA  #define ONIGURUMA_VERSION_MAJOR   6  #define ONIGURUMA_VERSION_MINOR   9 -#define ONIGURUMA_VERSION_TEENY   0 +#define ONIGURUMA_VERSION_TEENY   1 -#define ONIGURUMA_VERSION_INT     60900 +#define ONIGURUMA_VERSION_INT     60901  #ifndef P_  #if defined(__STDC__) || defined(_WIN32) diff --git a/src/regcomp.c b/src/regcomp.c index 83b9252..400368d 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -138,6 +138,17 @@ int_multiply_cmp(int x, int y, int v)      return 1;  } +extern int +onig_positive_int_multiply(int x, int y) +{ +  if (x == 0 || y == 0) return 0; + +  if (x < INT_MAX / y) +    return x * y; +  else +    return -1; +} +  #ifndef PLATFORM_UNALIGNED_WORD_ACCESS  static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; @@ -152,7 +163,7 @@ swap_node(Node* a, Node* b)    if (NODE_TYPE(a) == NODE_STRING) {      StrNode* sn = STR_(a); -    if (sn->capa == 0) { +    if (sn->capacity == 0) {        int len = (int )(sn->end - sn->s);        sn->s   = sn->buf;        sn->end = sn->s + len; @@ -161,7 +172,7 @@ swap_node(Node* a, Node* b)    if (NODE_TYPE(b) == NODE_STRING) {      StrNode* sn = STR_(b); -    if (sn->capa == 0) { +    if (sn->capacity == 0) {        int len = (int )(sn->end - sn->s);        sn->s   = sn->buf;        sn->end = sn->s + len; @@ -970,8 +981,9 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)      if (r != 0) return r;      for (i = 0; i < n; i++) { -      r = add_opcode_rel_addr(reg, OP_PUSH, -                              (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); +      int v = onig_positive_int_multiply(n - i, tlen); +      if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; +      r = add_opcode_rel_addr(reg, OP_PUSH, v + (n - i - 1) * SIZE_OP_PUSH);        if (r != 0) return r;        r = compile_tree(NODE_QUANT_BODY(qn), reg, env);        if (r != 0) return r; @@ -991,49 +1003,49 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)  }  static int -compile_length_option_node(EnclosureNode* node, regex_t* reg) +compile_length_option_node(BagNode* node, regex_t* reg)  {    int tlen;    OnigOptionType prev = reg->options;    reg->options = node->o.options; -  tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg); +  tlen = compile_length_tree(NODE_BAG_BODY(node), reg);    reg->options = prev;    return tlen;  }  static int -compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) +compile_option_node(BagNode* node, regex_t* reg, ScanEnv* env)  {    int r;    OnigOptionType prev = reg->options;    reg->options = node->o.options; -  r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); +  r = compile_tree(NODE_BAG_BODY(node), reg, env);    reg->options = prev;    return r;  }  static int -compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) +compile_length_bag_node(BagNode* node, regex_t* reg)  {    int len;    int tlen; -  if (node->type == ENCLOSURE_OPTION) +  if (node->type == BAG_OPTION)      return compile_length_option_node(node, reg); -  if (NODE_ENCLOSURE_BODY(node)) { -    tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg); +  if (NODE_BAG_BODY(node)) { +    tlen = compile_length_tree(NODE_BAG_BODY(node), reg);      if (tlen < 0) return tlen;    }    else      tlen = 0;    switch (node->type) { -  case ENCLOSURE_MEMORY: +  case BAG_MEMORY:  #ifdef USE_CALL      if (node->m.regnum == 0 && NODE_IS_CALLED(node)) { @@ -1069,23 +1081,27 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)      }      break; -  case ENCLOSURE_STOP_BACKTRACK: +  case BAG_STOP_BACKTRACK:      if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) { -      QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node)); +      int v; +      QuantNode* qn; + +      qn = QUANT_(NODE_BAG_BODY(node));        tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);        if (tlen < 0) return tlen; -      len = tlen * qn->lower -        + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP; +      v = onig_positive_int_multiply(qn->lower, tlen); +      if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; +      len = v + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;      }      else {        len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END;      }      break; -  case ENCLOSURE_IF_ELSE: +  case BAG_IF_ELSE:      { -      Node* cond = NODE_ENCLOSURE_BODY(node); +      Node* cond = NODE_BAG_BODY(node);        Node* Then = node->te.Then;        Node* Else = node->te.Else; @@ -1109,18 +1125,18 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)      }      break; -  default: -    return ONIGERR_TYPE_BUG; +  case BAG_OPTION: +    len = tlen;      break;    }    return len;  } -static int get_char_length_tree(Node* node, regex_t* reg, int* len); +static int get_char_len_node(Node* node, regex_t* reg, int* len);  static int -compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) +compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)  {    int r;    int len; @@ -1133,12 +1149,12 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)      NODE_STATUS_ADD(node, ADDR_FIXED);      r = add_abs_addr(reg, (int )node->m.called_addr);      if (r != 0) return r; -    len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg); +    len = compile_length_tree(NODE_BAG_BODY(node), reg);      len += SIZE_OP_RETURN;      r = add_opcode_rel_addr(reg, OP_JUMP, len);      if (r != 0) return r; -    r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); +    r = compile_tree(NODE_BAG_BODY(node), reg, env);      if (r != 0) return r;      r = add_opcode(reg, OP_RETURN);      return r; @@ -1151,7 +1167,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)      NODE_STATUS_ADD(node, ADDR_FIXED);      r = add_abs_addr(reg, (int )node->m.called_addr);      if (r != 0) return r; -    len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg); +    len = compile_length_tree(NODE_BAG_BODY(node), reg);      len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);      if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))        len += (NODE_IS_RECURSION(node) @@ -1172,7 +1188,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)    if (r != 0) return r;    r = add_mem_num(reg, node->m.regnum);    if (r != 0) return r; -  r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); +  r = compile_tree(NODE_BAG_BODY(node), reg, env);    if (r != 0) return r;  #ifdef USE_CALL @@ -1201,22 +1217,22 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)  }  static int -compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) +compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)  {    int r, len;    switch (node->type) { -  case ENCLOSURE_MEMORY: -    r = compile_enclosure_memory_node(node, reg, env); +  case BAG_MEMORY: +    r = compile_bag_memory_node(node, reg, env);      break; -  case ENCLOSURE_OPTION: +  case BAG_OPTION:      r = compile_option_node(node, reg, env);      break; -  case ENCLOSURE_STOP_BACKTRACK: +  case BAG_STOP_BACKTRACK:      if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) { -      QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node)); +      QuantNode* qn = QUANT_(NODE_BAG_BODY(node));        r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);        if (r != 0) return r; @@ -1235,16 +1251,16 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)      else {        r = add_opcode(reg, OP_ATOMIC_START);        if (r != 0) return r; -      r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); +      r = compile_tree(NODE_BAG_BODY(node), reg, env);        if (r != 0) return r;        r = add_opcode(reg, OP_ATOMIC_END);      }      break; -  case ENCLOSURE_IF_ELSE: +  case BAG_IF_ELSE:      {        int cond_len, then_len, jump_len; -      Node* cond = NODE_ENCLOSURE_BODY(node); +      Node* cond = NODE_BAG_BODY(node);        Node* Then = node->te.Then;        Node* Else = node->te.Else; @@ -1283,10 +1299,6 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)        }      }      break; - -  default: -    return ONIGERR_TYPE_BUG; -    break;    }    return r; @@ -1304,30 +1316,30 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg)    }    switch (node->type) { -  case ANCHOR_PREC_READ: +  case ANCR_PREC_READ:      len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;      break; -  case ANCHOR_PREC_READ_NOT: +  case ANCR_PREC_READ_NOT:      len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END;      break; -  case ANCHOR_LOOK_BEHIND: +  case ANCR_LOOK_BEHIND:      len = SIZE_OP_LOOK_BEHIND + tlen;      break; -  case ANCHOR_LOOK_BEHIND_NOT: +  case ANCR_LOOK_BEHIND_NOT:      len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END;      break; -  case ANCHOR_WORD_BOUNDARY: -  case ANCHOR_NO_WORD_BOUNDARY: +  case ANCR_WORD_BOUNDARY: +  case ANCR_NO_WORD_BOUNDARY:  #ifdef USE_WORD_BEGIN_END -  case ANCHOR_WORD_BEGIN: -  case ANCHOR_WORD_END: +  case ANCR_WORD_BEGIN: +  case ANCR_WORD_END:  #endif      len = SIZE_OP_WORD_BOUNDARY;      break; -  case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: -  case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: +  case ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: +  case ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:      len = SIZE_OPCODE;      break; @@ -1346,14 +1358,14 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)    enum OpCode op;    switch (node->type) { -  case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break; -  case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break; -  case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break; -  case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break; -  case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break; -  case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; - -  case ANCHOR_WORD_BOUNDARY: +  case ANCR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break; +  case ANCR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break; +  case ANCR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break; +  case ANCR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break; +  case ANCR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break; +  case ANCR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; + +  case ANCR_WORD_BOUNDARY:      op = OP_WORD_BOUNDARY;    word:      r = add_opcode(reg, op); @@ -1361,27 +1373,27 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)      r = add_mode(reg, (ModeType )node->ascii_mode);      break; -  case ANCHOR_NO_WORD_BOUNDARY: +  case ANCR_NO_WORD_BOUNDARY:      op = OP_NO_WORD_BOUNDARY; goto word;      break;  #ifdef USE_WORD_BEGIN_END -  case ANCHOR_WORD_BEGIN: +  case ANCR_WORD_BEGIN:      op = OP_WORD_BEGIN; goto word;      break; -  case ANCHOR_WORD_END: +  case ANCR_WORD_END:      op = OP_WORD_END; goto word;      break;  #endif -  case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: +  case ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:      r = add_opcode(reg, OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);      break; -  case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: +  case ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:      r = add_opcode(reg, OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);      break; -  case ANCHOR_PREC_READ: +  case ANCR_PREC_READ:      r = add_opcode(reg, OP_PREC_READ_START);      if (r != 0) return r;      r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); @@ -1389,7 +1401,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)      r = add_opcode(reg, OP_PREC_READ_END);      break; -  case ANCHOR_PREC_READ_NOT: +  case ANCR_PREC_READ_NOT:      len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);      if (len < 0) return len;      r = add_opcode_rel_addr(reg, OP_PREC_READ_NOT_START, len + SIZE_OP_PREC_READ_NOT_END); @@ -1399,13 +1411,13 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)      r = add_opcode(reg, OP_PREC_READ_NOT_END);      break; -  case ANCHOR_LOOK_BEHIND: +  case ANCR_LOOK_BEHIND:      {        int n;        r = add_opcode(reg, OP_LOOK_BEHIND);        if (r != 0) return r;        if (node->char_len < 0) { -        r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n); +        r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);          if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;        }        else @@ -1417,7 +1429,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)      }      break; -  case ANCHOR_LOOK_BEHIND_NOT: +  case ANCR_LOOK_BEHIND_NOT:      {        int n; @@ -1426,7 +1438,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)                                len + SIZE_OP_LOOK_BEHIND_NOT_END);        if (r != 0) return r;        if (node->char_len < 0) { -        r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n); +        r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);          if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;        }        else @@ -1635,8 +1647,8 @@ compile_length_tree(Node* node, regex_t* reg)      r = compile_length_quantifier_node(QUANT_(node), reg);      break; -  case NODE_ENCLOSURE: -    r = compile_length_enclosure_node(ENCLOSURE_(node), reg); +  case NODE_BAG: +    r = compile_length_bag_node(BAG_(node), reg);      break;    case NODE_ANCHOR: @@ -1826,8 +1838,8 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)      r = compile_quantifier_node(QUANT_(node), reg, env);      break; -  case NODE_ENCLOSURE: -    r = compile_enclosure_node(ENCLOSURE_(node), reg, env); +  case NODE_BAG: +    r = compile_bag_node(BAG_(node), reg, env);      break;    case NODE_ANCHOR: @@ -1873,10 +1885,10 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)      }      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); -      if (en->type == ENCLOSURE_MEMORY) { +      BagNode* en = BAG_(node); +      if (en->type == BAG_MEMORY) {          if (NODE_IS_NAMED_GROUP(node)) {            (*counter)++;            map[en->m.regnum].new_val = *counter; @@ -1890,8 +1902,8 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)            r = noname_disable_map(plink, map, counter);          }        } -      else if (en->type == ENCLOSURE_IF_ELSE) { -        r = noname_disable_map(&(NODE_ENCLOSURE_BODY(en)), map, counter); +      else if (en->type == BAG_IF_ELSE) { +        r = noname_disable_map(&(NODE_BAG_BODY(en)), map, counter);          if (r != 0) return r;          if (IS_NOT_NULL(en->te.Then)) {            r = noname_disable_map(&(en->te.Then), map, counter); @@ -1964,14 +1976,14 @@ renumber_by_map(Node* node, GroupNumRemap* map)      r = renumber_by_map(NODE_BODY(node), map);      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node);        r = renumber_by_map(NODE_BODY(node), map);        if (r != 0) return r; -      if (en->type == ENCLOSURE_IF_ELSE) { +      if (en->type == BAG_IF_ELSE) {          if (IS_NOT_NULL(en->te.Then)) {            r = renumber_by_map(en->te.Then, map);            if (r != 0) return r; @@ -2021,14 +2033,14 @@ numbered_ref_check(Node* node)      r = numbered_ref_check(NODE_BODY(node));      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node);        r = numbered_ref_check(NODE_BODY(node));        if (r != 0) return r; -      if (en->type == ENCLOSURE_IF_ELSE) { +      if (en->type == BAG_IF_ELSE) {          if (IS_NOT_NULL(en->te.Then)) {            r = numbered_ref_check(en->te.Then);            if (r != 0) return r; @@ -2099,14 +2111,14 @@ static int  fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)  {    int i, offset; -  EnclosureNode* en; +  BagNode* en;    AbsAddrType addr;    for (i = 0; i < uslist->num; i++) {      if (! NODE_IS_ADDR_FIXED(uslist->us[i].target))        return ONIGERR_PARSER_BUG; -    en = ENCLOSURE_(uslist->us[i].target); +    en = BAG_(uslist->us[i].target);      addr   = en->m.called_addr;      offset = uslist->us[i].offset; @@ -2122,7 +2134,7 @@ fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)  /* fixed size pattern node only */  static int -get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) +get_char_len_node1(Node* node, regex_t* reg, int* len, int level)  {    int tlen;    int r = 0; @@ -2132,7 +2144,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)    switch (NODE_TYPE(node)) {    case NODE_LIST:      do { -      r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level); +      r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level);        if (r == 0)          *len = distance_add(*len, tlen);      } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); @@ -2143,9 +2155,9 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)        int tlen2;        int varlen = 0; -      r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level); +      r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level);        while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) { -        r = get_char_length_tree1(NODE_CAR(node), reg, &tlen2, level); +        r = get_char_len_node1(NODE_CAR(node), reg, &tlen2, level);          if (r == 0) {            if (tlen != tlen2)              varlen = 1; @@ -2185,7 +2197,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)            *len = 0;          }          else { -          r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level); +          r = get_char_len_node1(NODE_BODY(node), reg, &tlen, level);            if (r == 0)              *len = distance_multiply(tlen, qn->lower);          } @@ -2198,7 +2210,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)  #ifdef USE_CALL    case NODE_CALL:      if (! NODE_IS_RECURSION(node)) -      r = get_char_length_tree1(NODE_BODY(node), reg, len, level); +      r = get_char_len_node1(NODE_BODY(node), reg, len, level);      else        r = GET_CHAR_LEN_VARLEN;      break; @@ -2209,17 +2221,17 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)      *len = 1;      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node);        switch (en->type) { -      case ENCLOSURE_MEMORY: +      case BAG_MEMORY:  #ifdef USE_CALL          if (NODE_IS_CLEN_FIXED(node))            *len = en->char_len;          else { -          r = get_char_length_tree1(NODE_BODY(node), reg, len, level); +          r = get_char_len_node1(NODE_BODY(node), reg, len, level);            if (r == 0) {              en->char_len = *len;              NODE_STATUS_ADD(node, CLEN_FIXED); @@ -2227,23 +2239,23 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)          }          break;  #endif -      case ENCLOSURE_OPTION: -      case ENCLOSURE_STOP_BACKTRACK: -        r = get_char_length_tree1(NODE_BODY(node), reg, len, level); +      case BAG_OPTION: +      case BAG_STOP_BACKTRACK: +        r = get_char_len_node1(NODE_BODY(node), reg, len, level);          break; -      case ENCLOSURE_IF_ELSE: +      case BAG_IF_ELSE:          {            int clen, elen; -          r = get_char_length_tree1(NODE_BODY(node), reg, &clen, level); +          r = get_char_len_node1(NODE_BODY(node), reg, &clen, level);            if (r == 0) {              if (IS_NOT_NULL(en->te.Then)) { -              r = get_char_length_tree1(en->te.Then, reg, &tlen, level); +              r = get_char_len_node1(en->te.Then, reg, &tlen, level);                if (r != 0) break;              }              else tlen = 0;              if (IS_NOT_NULL(en->te.Else)) { -              r = get_char_length_tree1(en->te.Else, reg, &elen, level); +              r = get_char_len_node1(en->te.Else, reg, &elen, level);                if (r != 0) break;              }              else elen = 0; @@ -2257,9 +2269,6 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)            }          }          break; - -      default: -        break;        }      }      break; @@ -2281,9 +2290,9 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)  }  static int -get_char_length_tree(Node* node, regex_t* reg, int* len) +get_char_len_node(Node* node, regex_t* reg, int* len)  { -  return get_char_length_tree1(node, reg, len, 0); +  return get_char_len_node1(node, reg, len, 0);  }  /* x is not included y ==>  1 : 0 */ @@ -2450,7 +2459,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg)            code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,                                       xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); -          return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); +          return onig_is_code_in_cc(reg->enc, code, cc) == 0;          }          break; @@ -2520,10 +2529,8 @@ get_head_value_node(Node* node, int exact, regex_t* reg)        if (sn->end <= sn->s)          break; -      if (exact != 0 && -          !NODE_STRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { -      } -      else { +      if (exact == 0 || +          ! IS_IGNORECASE(reg->options) || NODE_STRING_IS_RAW(node)) {          n = node;        }      } @@ -2541,23 +2548,23 @@ get_head_value_node(Node* node, int exact, regex_t* reg)      }      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node);        switch (en->type) { -      case ENCLOSURE_OPTION: +      case BAG_OPTION:          {            OnigOptionType options = reg->options; -          reg->options = ENCLOSURE_(node)->o.options; +          reg->options = BAG_(node)->o.options;            n = get_head_value_node(NODE_BODY(node), exact, reg);            reg->options = options;          }          break; -      case ENCLOSURE_MEMORY: -      case ENCLOSURE_STOP_BACKTRACK: -      case ENCLOSURE_IF_ELSE: +      case BAG_MEMORY: +      case BAG_STOP_BACKTRACK: +      case BAG_IF_ELSE:          n = get_head_value_node(NODE_BODY(node), exact, reg);          break;        } @@ -2565,7 +2572,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)      break;    case NODE_ANCHOR: -    if (ANCHOR_(node)->type == ANCHOR_PREC_READ) +    if (ANCHOR_(node)->type == ANCR_PREC_READ)        n = get_head_value_node(NODE_BODY(node), exact, reg);      break; @@ -2578,7 +2585,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)  }  static int -check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask) +check_type_tree(Node* node, int type_mask, int bag_mask, int anchor_mask)  {    NodeType type;    int r = 0; @@ -2591,29 +2598,29 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)    case NODE_LIST:    case NODE_ALT:      do { -      r = check_type_tree(NODE_CAR(node), type_mask, enclosure_mask, +      r = check_type_tree(NODE_CAR(node), type_mask, bag_mask,                            anchor_mask);      } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));      break;    case NODE_QUANT: -    r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask); +    r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); -      if (((1<<en->type) & enclosure_mask) == 0) +      BagNode* en = BAG_(node); +      if (((1<<en->type) & bag_mask) == 0)          return 1; -      r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask); -      if (r == 0 && en->type == ENCLOSURE_IF_ELSE) { +      r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask); +      if (r == 0 && en->type == BAG_IF_ELSE) {          if (IS_NOT_NULL(en->te.Then)) { -          r = check_type_tree(en->te.Then, type_mask, enclosure_mask, anchor_mask); +          r = check_type_tree(en->te.Then, type_mask, bag_mask, anchor_mask);            if (r != 0) break;          }          if (IS_NOT_NULL(en->te.Else)) { -          r = check_type_tree(en->te.Else, type_mask, enclosure_mask, anchor_mask); +          r = check_type_tree(en->te.Else, type_mask, bag_mask, anchor_mask);          }        }      } @@ -2625,7 +2632,7 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)        return 1;      if (IS_NOT_NULL(NODE_BODY(node))) -      r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask); +      r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);      break;    case NODE_GIMMICK: @@ -2666,7 +2673,7 @@ tree_min_len(Node* node, ScanEnv* env)        Node* t = NODE_BODY(node);        if (NODE_IS_RECURSION(node)) {          if (NODE_IS_MIN_FIXED(t)) -          len = ENCLOSURE_(t)->min_len; +          len = BAG_(t)->min_len;        }        else          len = tree_min_len(t, env); @@ -2717,11 +2724,11 @@ tree_min_len(Node* node, ScanEnv* env)      }      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node);        switch (en->type) { -      case ENCLOSURE_MEMORY: +      case BAG_MEMORY:          if (NODE_IS_MIN_FIXED(node))            len = en->min_len;          else { @@ -2738,11 +2745,11 @@ tree_min_len(Node* node, ScanEnv* env)          }          break; -      case ENCLOSURE_OPTION: -      case ENCLOSURE_STOP_BACKTRACK: +      case BAG_OPTION: +      case BAG_STOP_BACKTRACK:          len = tree_min_len(NODE_BODY(node), env);          break; -      case ENCLOSURE_IF_ELSE: +      case BAG_IF_ELSE:          {            OnigLen elen; @@ -2854,11 +2861,11 @@ tree_max_len(Node* node, ScanEnv* env)      }      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node);        switch (en->type) { -      case ENCLOSURE_MEMORY: +      case BAG_MEMORY:          if (NODE_IS_MAX_FIXED(node))            len = en->max_len;          else { @@ -2875,11 +2882,11 @@ tree_max_len(Node* node, ScanEnv* env)          }          break; -      case ENCLOSURE_OPTION: -      case ENCLOSURE_STOP_BACKTRACK: +      case BAG_OPTION: +      case BAG_STOP_BACKTRACK:          len = tree_max_len(NODE_BODY(node), env);          break; -      case ENCLOSURE_IF_ELSE: +      case BAG_IF_ELSE:          {            OnigLen tlen, elen; @@ -2931,12 +2938,12 @@ check_backrefs(Node* node, ScanEnv* env)      r = check_backrefs(NODE_BODY(node), env);      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      r = check_backrefs(NODE_BODY(node), env);      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node); -      if (en->type == ENCLOSURE_IF_ELSE) { +      if (en->type == BAG_IF_ELSE) {          if (r != 0) return r;          if (IS_NOT_NULL(en->te.Then)) {            r = check_backrefs(en->te.Then, env); @@ -3039,11 +3046,11 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head)      r = infinite_recursive_call_check(NODE_BODY(node), env, head);      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node); -      if (en->type == ENCLOSURE_MEMORY) { +      if (en->type == BAG_MEMORY) {          if (NODE_IS_MARK2(node))            return 0;          else if (NODE_IS_MARK1(node)) @@ -3055,7 +3062,7 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head)            NODE_STATUS_REMOVE(node, MARK2);          }        } -      else if (en->type == ENCLOSURE_IF_ELSE) { +      else if (en->type == BAG_IF_ELSE) {          int eret;          ret = infinite_recursive_call_check(NODE_BODY(node), env, head); @@ -3116,11 +3123,11 @@ infinite_recursive_call_check_trav(Node* node, ScanEnv* env)      r = infinite_recursive_call_check_trav(NODE_BODY(node), env);      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node); -      if (en->type == ENCLOSURE_MEMORY) { +      if (en->type == BAG_MEMORY) {          if (NODE_IS_RECURSION(node) && NODE_IS_CALLED(node)) {            int ret; @@ -3134,7 +3141,7 @@ infinite_recursive_call_check_trav(Node* node, ScanEnv* env)            NODE_STATUS_REMOVE(node, MARK1);          }        } -      else if (en->type == ENCLOSURE_IF_ELSE) { +      else if (en->type == BAG_IF_ELSE) {          if (IS_NOT_NULL(en->te.Then)) {            r = infinite_recursive_call_check_trav(en->te.Then, env);            if (r != 0) return r; @@ -3189,11 +3196,11 @@ recursive_call_check(Node* node)      }      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node); -      if (en->type == ENCLOSURE_MEMORY) { +      if (en->type == BAG_MEMORY) {          if (NODE_IS_MARK2(node))            return 0;          else if (NODE_IS_MARK1(node)) @@ -3204,7 +3211,7 @@ recursive_call_check(Node* node)            NODE_STATUS_REMOVE(node, MARK2);          }        } -      else if (en->type == ENCLOSURE_IF_ELSE) { +      else if (en->type == BAG_IF_ELSE) {          r = 0;          if (IS_NOT_NULL(en->te.Then)) {            r |= recursive_call_check(en->te.Then); @@ -3265,13 +3272,13 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)      }      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      {        int ret;        int state1; -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node); -      if (en->type == ENCLOSURE_MEMORY) { +      if (en->type == BAG_MEMORY) {          if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) {            if (! NODE_IS_RECURSION(node)) {              NODE_STATUS_ADD(node, MARK1); @@ -3294,7 +3301,7 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)        if (ret == FOUND_CALLED_NODE)          r = FOUND_CALLED_NODE; -      if (en->type == ENCLOSURE_IF_ELSE) { +      if (en->type == BAG_IF_ELSE) {          if (IS_NOT_NULL(en->te.Then)) {            ret = recursive_call_check_trav(en->te.Then, env, state1);            if (ret == FOUND_CALLED_NODE) @@ -3318,6 +3325,15 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)  #endif +#define IN_ALT          (1<<0) +#define IN_NOT          (1<<1) +#define IN_REAL_REPEAT  (1<<2) +#define IN_VAR_REPEAT   (1<<3) +#define IN_ZERO_REPEAT  (1<<4) +#define IN_MULTI_ENTRY  (1<<5) +#define IN_LOOK_BEHIND  (1<<6) + +  /* divide different length alternatives in look-behind.    (?<=A|B) ==> (?<=A)|(?<=B)    (?<!A|B) ==> (?<!A)(?<!B) @@ -3343,7 +3359,7 @@ divide_look_behind_alternatives(Node* node)      NODE_CAR(np) = insert_node;    } -  if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { +  if (anc_type == ANCR_LOOK_BEHIND_NOT) {      np = node;      do {        NODE_SET_TYPE(np, NODE_LIST);  /* alt -> list */ @@ -3358,7 +3374,7 @@ setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)    int r, len;    AnchorNode* an = ANCHOR_(node); -  r = get_char_length_tree(NODE_ANCHOR_BODY(an), reg, &len); +  r = get_char_len_node(NODE_ANCHOR_BODY(an), reg, &len);    if (r == 0)      an->char_len = len;    else if (r == GET_CHAR_LEN_VARLEN) @@ -3398,7 +3414,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg)            if (IS_NOT_NULL(x)) {              y = get_head_value_node(next_node,  0, reg);              if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) { -              Node* en = onig_node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); +              Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK);                CHECK_NULL_RETURN_MEMERR(en);                NODE_STATUS_ADD(en, STOP_BT_SIMPLE_REPEAT);                swap_node(node, en); @@ -3409,9 +3425,9 @@ next_setup(Node* node, Node* next_node, regex_t* reg)        }      }    } -  else if (type == NODE_ENCLOSURE) { -    EnclosureNode* en = ENCLOSURE_(node); -    if (en->type == ENCLOSURE_MEMORY) { +  else if (type == NODE_BAG) { +    BagNode* en = BAG_(node); +    if (en->type == BAG_MEMORY) {        node = NODE_BODY(node);        goto retry;      } @@ -3527,7 +3543,7 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p    for (i = 0; i < item_num; i++) {      snode = onig_node_new_str(NULL, NULL);      if (IS_NULL(snode)) goto mem_err; -     +      for (j = 0; j < items[i].code_len; j++) {        len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);        if (len < 0) { @@ -3544,7 +3560,7 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p        goto mem_err2;      } -    if (items[i].byte_len != slen) { +    if (items[i].byte_len != slen && IS_NOT_NULL(var_anode)) {        Node *rem;        UChar *q = p + items[i].byte_len; @@ -3596,37 +3612,69 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p  }  static int -expand_case_fold_string(Node* node, regex_t* reg) +is_good_case_fold_items_for_search(OnigEncoding enc, int slen, +                                   int n, OnigCaseFoldCodeItem items[])  { +  int i, len; +  UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + +  for (i = 0; i < n; i++) { +    OnigCaseFoldCodeItem* item = items + i; + +    if (item->code_len != 1)    return 0; +    if (item->byte_len != slen) return 0; +    len = ONIGENC_CODE_TO_MBC(enc, item->code[0], buf); +    if (len != slen) return 0; +  } + +  return 1; +} +  #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8 +static int +expand_case_fold_string(Node* node, regex_t* reg, int state) +{    int r, n, len, alt_num; +  int fold_len; +  int prev_is_ambig, prev_is_good, is_good, is_in_look_behind;    UChar *start, *end, *p; +  UChar* foldp;    Node *top_root, *root, *snode, *prev_node;    OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; -  StrNode* sn = STR_(node); +  UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; +  StrNode* sn;    if (NODE_STRING_IS_AMBIG(node)) return 0; +  sn = STR_(node); +    start = sn->s;    end   = sn->end;    if (start >= end) return 0; +  is_in_look_behind = (state & IN_LOOK_BEHIND) != 0; +    r = 0;    top_root = root = prev_node = snode = NULL_NODE;    alt_num = 1;    p = start;    while (p < end) { -    n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, p, end, -                                           items); +    n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, +                                           p, end, items);      if (n < 0) {        r = n;        goto err;      }      len = enclen(reg->enc, p); +    is_good = is_good_case_fold_items_for_search(reg->enc, len, n, items); -    if (n == 0) { +    if (is_in_look_behind || +        (IS_NOT_NULL(snode) || +         (is_good +          /* expand single char case: ex. /(?i:a)/ */ +          && !(p == start && p + len >= end)))) {        if (IS_NULL(snode)) {          if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {            top_root = root = onig_node_list_add(NULL_NODE, prev_node); @@ -3644,10 +3692,49 @@ expand_case_fold_string(Node* node, regex_t* reg)              goto mem_err;            }          } + +        prev_is_ambig = -1; /* -1: new */ +        prev_is_good  =  0; /* escape compiler warning */ +      } +      else { +        prev_is_ambig = NODE_STRING_IS_AMBIG(snode); +        prev_is_good  = NODE_STRING_IS_GOOD_AMBIG(snode);        } -      r = onig_node_str_cat(snode, p, p + len); -      if (r != 0) goto err; +      if (n != 0) { +        foldp = p; +        fold_len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, +                                         &foldp, end, buf); +        foldp = buf; +      } +      else { +        foldp = p; fold_len = len; +      } + +      if ((prev_is_ambig == 0 && n != 0) || +          (prev_is_ambig > 0 && (n == 0 || prev_is_good != is_good))) { +        if (IS_NULL(root) /* && IS_NOT_NULL(prev_node) */) { +          top_root = root = onig_node_list_add(NULL_NODE, prev_node); +          if (IS_NULL(root)) { +            onig_node_free(prev_node); +            goto mem_err; +          } +        } + +        prev_node = snode = onig_node_new_str(foldp, foldp + fold_len); +        if (IS_NULL(snode)) goto mem_err; +        if (IS_NULL(onig_node_list_add(root, snode))) { +          onig_node_free(snode); +          goto mem_err; +        } +      } +      else { +        r = onig_node_str_cat(snode, foldp, foldp + fold_len); +        if (r != 0) goto err; +      } + +      if (n != 0) NODE_STRING_SET_AMBIG(snode); +      if (is_good != 0) NODE_STRING_SET_GOOD_AMBIG(snode);      }      else {        alt_num *= (n + 1); @@ -3768,22 +3855,22 @@ quantifiers_memory_node_info(Node* node)      }      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node);        switch (en->type) { -      case ENCLOSURE_MEMORY: +      case BAG_MEMORY:          if (NODE_IS_RECURSION(node)) {            return QUANT_BODY_IS_EMPTY_REC;          }          return QUANT_BODY_IS_EMPTY_MEM;          break; -      case ENCLOSURE_OPTION: -      case ENCLOSURE_STOP_BACKTRACK: +      case BAG_OPTION: +      case BAG_STOP_BACKTRACK:          r = quantifiers_memory_node_info(NODE_BODY(node));          break; -      case ENCLOSURE_IF_ELSE: +      case BAG_IF_ELSE:          {            int v;            r = quantifiers_memory_node_info(NODE_BODY(node)); @@ -3797,8 +3884,6 @@ quantifiers_memory_node_info(Node* node)            }          }          break; -      default: -        break;        }      }      break; @@ -3818,13 +3903,6 @@ quantifiers_memory_node_info(Node* node)  #endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */ -#define IN_ALT          (1<<0) -#define IN_NOT          (1<<1) -#define IN_REAL_REPEAT  (1<<2) -#define IN_VAR_REPEAT   (1<<3) -#define IN_ZERO_REPEAT  (1<<4) -#define IN_MULTI_ENTRY  (1<<5) -  #ifdef USE_CALL  #ifdef __GNUC__ @@ -3901,18 +3979,18 @@ setup_call2_call(Node* node)        setup_call2_call(NODE_BODY(node));      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node); -      if (en->type == ENCLOSURE_MEMORY) { +      if (en->type == BAG_MEMORY) {          if (! NODE_IS_MARK1(node)) {            NODE_STATUS_ADD(node, MARK1);            setup_call2_call(NODE_BODY(node));            NODE_STATUS_REMOVE(node, MARK1);          }        } -      else if (en->type == ENCLOSURE_IF_ELSE) { +      else if (en->type == BAG_IF_ELSE) {          setup_call2_call(NODE_BODY(node));          if (IS_NOT_NULL(en->te.Then))            setup_call2_call(en->te.Then); @@ -3935,7 +4013,7 @@ setup_call2_call(Node* node)          cn->entry_count++;          NODE_STATUS_ADD(called, CALLED); -        ENCLOSURE_(called)->m.entry_count++; +        BAG_(called)->m.entry_count++;          setup_call2_call(called);        }        NODE_STATUS_REMOVE(node, MARK1); @@ -3974,18 +4052,18 @@ setup_call(Node* node, ScanEnv* env, int state)        r = 0;      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node); -      if (en->type == ENCLOSURE_MEMORY) { +      if (en->type == BAG_MEMORY) {          if ((state & IN_ZERO_REPEAT) != 0) {            NODE_STATUS_ADD(node, IN_ZERO_REPEAT); -          ENCLOSURE_(node)->m.entry_count--; +          BAG_(node)->m.entry_count--;          }          r = setup_call(NODE_BODY(node), env, state);        } -      else if (en->type == ENCLOSURE_IF_ELSE) { +      else if (en->type == BAG_IF_ELSE) {          r = setup_call(NODE_BODY(node), env, state);          if (r != 0) return r;          if (IS_NOT_NULL(en->te.Then)) { @@ -4040,15 +4118,15 @@ setup_call2(Node* node)        r = setup_call2(NODE_BODY(node));      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      if (! NODE_IS_IN_ZERO_REPEAT(node))        r = setup_call2(NODE_BODY(node));      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node);        if (r != 0) return r; -      if (en->type == ENCLOSURE_IF_ELSE) { +      if (en->type == BAG_IF_ELSE) {          if (IS_NOT_NULL(en->te.Then)) {            r = setup_call2(en->te.Then);            if (r != 0) return r; @@ -4104,12 +4182,12 @@ setup_called_state_call(Node* node, int state)        AnchorNode* an = ANCHOR_(node);        switch (an->type) { -      case ANCHOR_PREC_READ_NOT: -      case ANCHOR_LOOK_BEHIND_NOT: +      case ANCR_PREC_READ_NOT: +      case ANCR_LOOK_BEHIND_NOT:          state |= IN_NOT;          /* fall */ -      case ANCHOR_PREC_READ: -      case ANCHOR_LOOK_BEHIND: +      case ANCR_PREC_READ: +      case ANCR_LOOK_BEHIND:          setup_called_state_call(NODE_ANCHOR_BODY(an), state);          break;        default: @@ -4118,11 +4196,11 @@ setup_called_state_call(Node* node, int state)      }      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node); -      if (en->type == ENCLOSURE_MEMORY) { +      if (en->type == BAG_MEMORY) {          if (NODE_IS_MARK1(node)) {            if ((~en->m.called_state & state) != 0) {              en->m.called_state |= state; @@ -4136,7 +4214,7 @@ setup_called_state_call(Node* node, int state)            NODE_STATUS_REMOVE(node, MARK1);          }        } -      else if (en->type == ENCLOSURE_IF_ELSE) { +      else if (en->type == BAG_IF_ELSE) {          if (IS_NOT_NULL(en->te.Then)) {            setup_called_state_call(en->te.Then, state);          } @@ -4177,22 +4255,22 @@ setup_called_state(Node* node, int state)      break;  #endif -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node);        switch (en->type) { -      case ENCLOSURE_MEMORY: +      case BAG_MEMORY:          if (en->m.entry_count > 1)            state |= IN_MULTI_ENTRY;          en->m.called_state |= state;          /* fall */ -      case ENCLOSURE_OPTION: -      case ENCLOSURE_STOP_BACKTRACK: +      case BAG_OPTION: +      case BAG_STOP_BACKTRACK:          setup_called_state(NODE_BODY(node), state);          break; -      case ENCLOSURE_IF_ELSE: +      case BAG_IF_ELSE:          setup_called_state(NODE_BODY(node), state);          if (IS_NOT_NULL(en->te.Then))            setup_called_state(en->te.Then, state); @@ -4221,12 +4299,12 @@ setup_called_state(Node* node, int state)        AnchorNode* an = ANCHOR_(node);        switch (an->type) { -      case ANCHOR_PREC_READ_NOT: -      case ANCHOR_LOOK_BEHIND_NOT: +      case ANCR_PREC_READ_NOT: +      case ANCR_LOOK_BEHIND_NOT:          state |= IN_NOT;          /* fall */ -      case ANCHOR_PREC_READ: -      case ANCHOR_LOOK_BEHIND: +      case ANCR_PREC_READ: +      case ANCR_LOOK_BEHIND:          setup_called_state(NODE_ANCHOR_BODY(an), state);          break;        default: @@ -4259,56 +4337,57 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)  /* allowed node types in look-behind */  #define ALLOWED_TYPE_IN_LB \    ( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \ -  | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_ENCLOSURE | NODE_BIT_QUANT \ +  | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_BAG | NODE_BIT_QUANT \    | NODE_BIT_CALL | NODE_BIT_GIMMICK) -#define ALLOWED_ENCLOSURE_IN_LB       ( 1<<ENCLOSURE_MEMORY | 1<<ENCLOSURE_OPTION ) -#define ALLOWED_ENCLOSURE_IN_LB_NOT   (1<<ENCLOSURE_OPTION) +#define ALLOWED_BAG_IN_LB       ( 1<<BAG_MEMORY | 1<<BAG_OPTION ) +#define ALLOWED_BAG_IN_LB_NOT   (1<<BAG_OPTION)  #define ALLOWED_ANCHOR_IN_LB \ -  ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF \ -  | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY | ANCHOR_NO_WORD_BOUNDARY \ -  | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \ -  | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \ -  | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY ) +  ( ANCR_LOOK_BEHIND | ANCR_BEGIN_LINE | ANCR_END_LINE | ANCR_BEGIN_BUF \ +  | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY | ANCR_NO_WORD_BOUNDARY \ +  | ANCR_WORD_BEGIN | ANCR_WORD_END \ +  | ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \ +  | ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )  #define ALLOWED_ANCHOR_IN_LB_NOT \ -  ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE \ -  | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY \ -  | ANCHOR_NO_WORD_BOUNDARY | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \ -  | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \ -  | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY ) +  ( ANCR_LOOK_BEHIND | ANCR_LOOK_BEHIND_NOT | ANCR_BEGIN_LINE \ +  | ANCR_END_LINE | ANCR_BEGIN_BUF | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY \ +  | ANCR_NO_WORD_BOUNDARY | ANCR_WORD_BEGIN | ANCR_WORD_END \ +  | ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \ +  | ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )    int r;    AnchorNode* an = ANCHOR_(node);    switch (an->type) { -  case ANCHOR_PREC_READ: +  case ANCR_PREC_READ:      r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);      break; -  case ANCHOR_PREC_READ_NOT: +  case ANCR_PREC_READ_NOT:      r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);      break; -  case ANCHOR_LOOK_BEHIND: +  case ANCR_LOOK_BEHIND:      {        r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB, -                          ALLOWED_ENCLOSURE_IN_LB, ALLOWED_ANCHOR_IN_LB); +                          ALLOWED_BAG_IN_LB, ALLOWED_ANCHOR_IN_LB);        if (r < 0) return r;        if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; -      r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env); +      r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_LOOK_BEHIND), env);        if (r != 0) return r;        r = setup_look_behind(node, reg, env);      }      break; -  case ANCHOR_LOOK_BEHIND_NOT: +  case ANCR_LOOK_BEHIND_NOT:      {        r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB, -                          ALLOWED_ENCLOSURE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); +                          ALLOWED_BAG_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);        if (r < 0) return r;        if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; -      r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env); +      r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_NOT|IN_LOOK_BEHIND), +                     env);        if (r != 0) return r;        r = setup_look_behind(node, reg, env);      } @@ -4346,9 +4425,9 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)  #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT        qn->body_empty_info = quantifiers_memory_node_info(body);        if (qn->body_empty_info == QUANT_BODY_IS_EMPTY_REC) { -        if (NODE_TYPE(body) == NODE_ENCLOSURE && -            ENCLOSURE_(body)->type == ENCLOSURE_MEMORY) { -          MEM_STATUS_ON(env->bt_mem_end, ENCLOSURE_(body)->m.regnum); +        if (NODE_TYPE(body) == NODE_BAG && +            BAG_(body)->type == BAG_MEMORY) { +          MEM_STATUS_ON(env->bt_mem_end, BAG_(body)->m.regnum);          }        }  #else @@ -4439,7 +4518,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)    case NODE_STRING:      if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) { -      r = expand_case_fold_string(node, reg); +      r = expand_case_fold_string(node, reg, state);      }      break; @@ -4462,21 +4541,21 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)      }      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node);        switch (en->type) { -      case ENCLOSURE_OPTION: +      case BAG_OPTION:          {            OnigOptionType options = reg->options; -          reg->options = ENCLOSURE_(node)->o.options; +          reg->options = BAG_(node)->o.options;            r = setup_tree(NODE_BODY(node), reg, state, env);            reg->options = options;          }          break; -      case ENCLOSURE_MEMORY: +      case BAG_MEMORY:  #ifdef USE_CALL          state |= en->m.called_state;  #endif @@ -4488,7 +4567,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)          r = setup_tree(NODE_BODY(node), reg, state, env);          break; -      case ENCLOSURE_STOP_BACKTRACK: +      case BAG_STOP_BACKTRACK:          {            Node* target = NODE_BODY(node);            r = setup_tree(target, reg, state, env); @@ -4503,7 +4582,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)          }          break; -      case ENCLOSURE_IF_ELSE: +      case BAG_IF_ELSE:          r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env);          if (r != 0) return r;          if (IS_NOT_NULL(en->te.Then)) { @@ -4538,35 +4617,83 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)    return r;  } -/* set skip map for Boyer-Moore search */  static int -set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, -            UChar skip[], int** int_skip) +set_sunday_quick_search_or_bmh_skip_table(regex_t* reg, int case_expand, +                                          UChar* s, UChar* end, +                                          UChar skip[], int* roffset)  { -  int i, len; +  int i, j, k, len, offset; +  int n, clen; +  UChar* p; +  OnigEncoding enc; +  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; +  UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + +  enc = reg->enc; +  offset = ENC_GET_SKIP_OFFSET(enc); +  if (offset == ENC_SKIP_OFFSET_1_OR_0) { +    UChar* p = s; +    while (1) { +      len = enclen(enc, p); +      if (p + len >= end) { +        if (len == 1) offset = 1; +        else          offset = 0; +        break; +      } +      p += len; +    } +  }    len = (int )(end - s); -  if (len < ONIG_CHAR_TABLE_SIZE) { -    for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len; +  if (len + offset >= UCHAR_MAX) +    return ONIGERR_PARSER_BUG; -    for (i = 0; i < len - 1; i++) -      skip[s[i]] = len - 1 - i; +  *roffset = offset; + +  for (i = 0; i < CHAR_MAP_SIZE; i++) { +    skip[i] = (UChar )(len + offset);    } -  else { -    if (IS_NULL(*int_skip)) { -      *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); -      if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; + +  for (p = s; p < end; ) { +    int z; + +    clen = enclen(enc, p); +    if (p + clen > end) clen = (int )(end - p); + +    len = (int )(end - p); +    for (j = 0; j < clen; j++) { +      z = len - j + (offset - 1); +      if (z <= 0) break; +      skip[p[j]] = z; +    } + +    if (case_expand != 0) { +      n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, +                                             p, end, items); +      for (k = 0; k < n; k++) { +        ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf); +        for (j = 0; j < clen; j++) { +          z = len - j + (offset - 1); +          if (z <= 0) break; +          if (skip[buf[j]] > z) +            skip[buf[j]] = z; +        } +      }      } -    for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len; -    for (i = 0; i < len - 1; i++) -      (*int_skip)[s[i]] = len - 1 - i; +    p += clen;    } +    return 0;  } +  #define OPT_EXACT_MAXLEN   24 +#if OPT_EXACT_MAXLEN >= UCHAR_MAX +#error Too big OPT_EXACT_MAXLEN +#endif +  typedef struct {    OnigLen min;  /* min byte length */    OnigLen max;  /* max byte length */ @@ -4589,26 +4716,27 @@ typedef struct {    MinMax     mmd;   /* position */    OptAnc     anc;    int        reach_end; -  int        ignore_case; +  int        case_fold; +  int        good_case_fold;    int        len;    UChar      s[OPT_EXACT_MAXLEN]; -} OptExact; +} OptStr;  typedef struct {    MinMax    mmd;    /* position */    OptAnc    anc;    int       value;  /* weighted value */ -  UChar     map[ONIG_CHAR_TABLE_SIZE]; +  UChar     map[CHAR_MAP_SIZE];  } OptMap;  typedef struct { -  MinMax    len; -  OptAnc    anc; -  OptExact  exb;     /* boundary */ -  OptExact  exm;     /* middle */ -  OptExact  expr;    /* prec read (?=...) */ -  OptMap    map;     /* boundary */ -} NodeOpt; +  MinMax  len; +  OptAnc  anc; +  OptStr  sb;     /* boundary */ +  OptStr  sm;     /* middle */ +  OptStr  spr;    /* prec read (?=...) */ +  OptMap  map;    /* boundary */ +} OptNode;  static int @@ -4640,15 +4768,15 @@ distance_value(MinMax* mm)  {    /* 1000 / (min-max-dist + 1) */    static const short int dist_vals[] = { -    1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,  -      91,   83,   77,   71,   67,   63,   59,   56,   53,   50,  -      48,   45,   43,   42,   40,   38,   37,   36,   34,   33,  -      32,   31,   30,   29,   29,   28,   27,   26,   26,   25,  -      24,   24,   23,   23,   22,   22,   21,   21,   20,   20,  -      20,   19,   19,   19,   18,   18,   18,   17,   17,   17,  -      16,   16,   16,   16,   15,   15,   15,   15,   14,   14,  -      14,   14,   14,   14,   13,   13,   13,   13,   13,   13,  -      12,   12,   12,   12,   12,   12,   11,   11,   11,   11,  +    1000,  500,  333,  250,  200,  167,  143,  125,  111,  100, +      91,   83,   77,   71,   67,   63,   59,   56,   53,   50, +      48,   45,   43,   42,   40,   38,   37,   36,   34,   33, +      32,   31,   30,   29,   29,   28,   27,   26,   26,   25, +      24,   24,   23,   23,   22,   22,   21,   21,   20,   20, +      20,   19,   19,   19,   18,   18,   18,   17,   17,   17, +      16,   16,   16,   16,   15,   15,   15,   15,   14,   14, +      14,   14,   14,   14,   13,   13,   13,   13,   13,   13, +      12,   12,   12,   12,   12,   12,   11,   11,   11,   11,        11,   11,   11,   11,   11,   10,   10,   10,   10,   10    }; @@ -4684,7 +4812,7 @@ comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2)  static int  is_equal_mml(MinMax* a, MinMax* b)  { -  return (a->min == b->min && a->max == b->max) ? 1 : 0; +  return a->min == b->min && a->max == b->max;  }  static void @@ -4756,15 +4884,15 @@ concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right,      to->right |= left->right;    }    else { -    to->right |= (left->right & ANCHOR_PREC_READ_NOT); +    to->right |= (left->right & ANCR_PREC_READ_NOT);    }  }  static int  is_left(int a)  { -  if (a == ANCHOR_END_BUF  || a == ANCHOR_SEMI_END_BUF || -      a == ANCHOR_END_LINE || a == ANCHOR_PREC_READ || a == ANCHOR_PREC_READ_NOT) +  if (a == ANCR_END_BUF  || a == ANCR_SEMI_END_BUF || +      a == ANCR_END_LINE || a == ANCR_PREC_READ || a == ANCR_PREC_READ_NOT)      return 0;    return 1; @@ -4804,39 +4932,47 @@ alt_merge_opt_anc_info(OptAnc* to, OptAnc* add)  }  static int -is_full_opt_exact(OptExact* e) +is_full_opt_exact(OptStr* e)  { -  return (e->len >= OPT_EXACT_MAXLEN ? 1 : 0); +  return e->len >= OPT_EXACT_MAXLEN;  }  static void -clear_opt_exact(OptExact* e) +clear_opt_exact(OptStr* e)  {    clear_mml(&e->mmd);    clear_opt_anc_info(&e->anc); -  e->reach_end   = 0; -  e->ignore_case = 0; -  e->len         = 0; -  e->s[0]        = '\0'; +  e->reach_end      = 0; +  e->case_fold      = 0; +  e->good_case_fold = 0; +  e->len            = 0; +  e->s[0]           = '\0';  }  static void -copy_opt_exact(OptExact* to, OptExact* from) +copy_opt_exact(OptStr* to, OptStr* from)  {    *to = *from;  }  static int -concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc) +concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc)  {    int i, j, len, r;    UChar *p, *end;    OptAnc tanc; -  if (! to->ignore_case && add->ignore_case) { -    if (to->len >= add->len) return 0;  /* avoid */ +  if (add->case_fold != 0) { +    if (! to->case_fold) { +      if (to->len > 1 || to->len >= add->len) return 0;  /* avoid */ -    to->ignore_case = 1; +      to->case_fold = 1; +    } +    else { +      if (to->good_case_fold != 0) { +        if (add->good_case_fold == 0) return 0; +      } +    }    }    r = 0; @@ -4863,7 +4999,7 @@ concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc)  }  static void -concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc) +concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc)  {    int i, j, len;    UChar *p; @@ -4876,10 +5012,13 @@ concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc)    }    to->len = i; + +  if (p >= end && to->len == (int )(end - s)) +    to->reach_end = 1;  }  static void -alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env) +alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env)  {    int i, j, len; @@ -4908,14 +5047,17 @@ alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env)      to->reach_end = 0;    }    to->len = i; -  to->ignore_case |= add->ignore_case; +  if (add->case_fold != 0) +    to->case_fold = 1; +  if (add->good_case_fold == 0) +    to->good_case_fold = 0;    alt_merge_opt_anc_info(&to->anc, &add->anc);    if (! to->reach_end) to->anc.right = 0;  }  static void -select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt) +select_opt_exact(OnigEncoding enc, OptStr* now, OptStr* alt)  {    int vn, va; @@ -4938,8 +5080,11 @@ select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt)      if (alt->len > 1) va += 5;    } -  if (now->ignore_case == 0) vn *= 2; -  if (alt->ignore_case == 0) va *= 2; +  if (now->case_fold == 0) vn *= 2; +  if (alt->case_fold == 0) va *= 2; + +  if (now->good_case_fold != 0) vn *= 4; +  if (alt->good_case_fold != 0) va *= 4;    if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)      copy_opt_exact(now, alt); @@ -5030,14 +5175,24 @@ select_opt_map(OptMap* now, OptMap* alt)  }  static int -comp_opt_exact_or_map(OptExact* e, OptMap* m) +comp_opt_exact_or_map(OptStr* e, OptMap* m)  {  #define COMP_EM_BASE  20    int ae, am; +  int case_value;    if (m->value <= 0) return -1; -  ae = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); +  if (e->case_fold != 0) { +    if (e->good_case_fold != 0) +      case_value = 2; +    else +      case_value = 1; +  } +  else +    case_value = 3; + +  ae = COMP_EM_BASE * e->len * case_value;    am = COMP_EM_BASE * 5 * 2 / m->value;    return comp_distance_value(&e->mmd, &m->mmd, ae, am);  } @@ -5057,7 +5212,7 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)    alt_merge_mml(&to->mmd, &add->mmd);    val = 0; -  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { +  for (i = 0; i < CHAR_MAP_SIZE; i++) {      if (add->map[i])        to->map[i] = 1; @@ -5070,42 +5225,42 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)  }  static void -set_bound_node_opt_info(NodeOpt* opt, MinMax* plen) +set_bound_node_opt_info(OptNode* opt, MinMax* plen)  { -  copy_mml(&(opt->exb.mmd),  plen); -  copy_mml(&(opt->expr.mmd), plen); -  copy_mml(&(opt->map.mmd),  plen); +  copy_mml(&(opt->sb.mmd),  plen); +  copy_mml(&(opt->spr.mmd), plen); +  copy_mml(&(opt->map.mmd), plen);  }  static void -clear_node_opt_info(NodeOpt* opt) +clear_node_opt_info(OptNode* opt)  {    clear_mml(&opt->len);    clear_opt_anc_info(&opt->anc); -  clear_opt_exact(&opt->exb); -  clear_opt_exact(&opt->exm); -  clear_opt_exact(&opt->expr); +  clear_opt_exact(&opt->sb); +  clear_opt_exact(&opt->sm); +  clear_opt_exact(&opt->spr);    clear_opt_map(&opt->map);  }  static void -copy_node_opt_info(NodeOpt* to, NodeOpt* from) +copy_node_opt_info(OptNode* to, OptNode* from)  {    *to = *from;  }  static void -concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add) +concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add)  { -  int exb_reach, exm_reach; +  int sb_reach, sm_reach;    OptAnc tanc;    concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);    copy_opt_anc_info(&to->anc, &tanc); -  if (add->exb.len > 0 && to->len.max == 0) { -    concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, to->len.max, add->len.max); -    copy_opt_anc_info(&add->exb.anc, &tanc); +  if (add->sb.len > 0 && to->len.max == 0) { +    concat_opt_anc_info(&tanc, &to->anc, &add->sb.anc, to->len.max, add->len.max); +    copy_opt_anc_info(&add->sb.anc, &tanc);    }    if (add->map.value > 0 && to->len.max == 0) { @@ -5113,38 +5268,38 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add)        add->map.anc.left |= to->anc.left;    } -  exb_reach = to->exb.reach_end; -  exm_reach = to->exm.reach_end; +  sb_reach = to->sb.reach_end; +  sm_reach = to->sm.reach_end;    if (add->len.max != 0) -    to->exb.reach_end = to->exm.reach_end = 0; +    to->sb.reach_end = to->sm.reach_end = 0; -  if (add->exb.len > 0) { -    if (exb_reach) { -      concat_opt_exact(&to->exb, &add->exb, enc); -      clear_opt_exact(&add->exb); +  if (add->sb.len > 0) { +    if (sb_reach) { +      concat_opt_exact(&to->sb, &add->sb, enc); +      clear_opt_exact(&add->sb);      } -    else if (exm_reach) { -      concat_opt_exact(&to->exm, &add->exb, enc); -      clear_opt_exact(&add->exb); +    else if (sm_reach) { +      concat_opt_exact(&to->sm, &add->sb, enc); +      clear_opt_exact(&add->sb);      }    } -  select_opt_exact(enc, &to->exm, &add->exb); -  select_opt_exact(enc, &to->exm, &add->exm); +  select_opt_exact(enc, &to->sm, &add->sb); +  select_opt_exact(enc, &to->sm, &add->sm); -  if (to->expr.len > 0) { +  if (to->spr.len > 0) {      if (add->len.max > 0) { -      if (to->expr.len > (int )add->len.max) -        to->expr.len = add->len.max; +      if (to->spr.len > (int )add->len.max) +        to->spr.len = add->len.max; -      if (to->expr.mmd.max == 0) -        select_opt_exact(enc, &to->exb, &to->expr); +      if (to->spr.mmd.max == 0) +        select_opt_exact(enc, &to->sb, &to->spr);        else -        select_opt_exact(enc, &to->exm, &to->expr); +        select_opt_exact(enc, &to->sm, &to->spr);      }    } -  else if (add->expr.len > 0) { -    copy_opt_exact(&to->expr, &add->expr); +  else if (add->spr.len > 0) { +    copy_opt_exact(&to->spr, &add->spr);    }    select_opt_map(&to->map, &add->map); @@ -5152,12 +5307,12 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add)  }  static void -alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env) +alt_merge_node_opt_info(OptNode* to, OptNode* add, OptEnv* env)  {    alt_merge_opt_anc_info(&to->anc, &add->anc); -  alt_merge_opt_exact(&to->exb,  &add->exb, env); -  alt_merge_opt_exact(&to->exm,  &add->exm, env); -  alt_merge_opt_exact(&to->expr, &add->expr, env); +  alt_merge_opt_exact(&to->sb,  &add->sb, env); +  alt_merge_opt_exact(&to->sm,  &add->sm, env); +  alt_merge_opt_exact(&to->spr, &add->spr, env);    alt_merge_opt_map(env->enc, &to->map, &add->map);    alt_merge_mml(&to->len, &add->len); @@ -5167,11 +5322,11 @@ alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env)  #define MAX_NODE_OPT_INFO_REF_COUNT    5  static int -optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) +optimize_nodes(Node* node, OptNode* opt, OptEnv* env)  {    int i;    int r; -  NodeOpt xo; +  OptNode xo;    OnigEncoding enc;    r = 0; @@ -5217,7 +5372,7 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)        /* int is_raw = NODE_STRING_IS_RAW(node); */        if (! NODE_STRING_IS_AMBIG(node)) { -        concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc); +        concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);          if (slen > 0) {            add_char_opt_map(&opt->map, *(sn->s), enc);          } @@ -5231,8 +5386,10 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)            max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;          }          else { -          concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc); -          opt->exb.ignore_case = 1; +          concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc); +          opt->sb.case_fold = 1; +          if (NODE_STRING_IS_GOOD_AMBIG(node)) +            opt->sb.good_case_fold = 1;            if (slen > 0) {              r = add_char_amb_opt_map(&opt->map, sn->s, sn->end, @@ -5245,9 +5402,6 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)          set_mml(&opt->len, slen, max);        } - -      if (opt->exb.len == slen) -        opt->exb.reach_end = 1;      }      break; @@ -5321,27 +5475,27 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)    case NODE_ANCHOR:      switch (ANCHOR_(node)->type) { -    case ANCHOR_BEGIN_BUF: -    case ANCHOR_BEGIN_POSITION: -    case ANCHOR_BEGIN_LINE: -    case ANCHOR_END_BUF: -    case ANCHOR_SEMI_END_BUF: -    case ANCHOR_END_LINE: -    case ANCHOR_PREC_READ_NOT: -    case ANCHOR_LOOK_BEHIND: +    case ANCR_BEGIN_BUF: +    case ANCR_BEGIN_POSITION: +    case ANCR_BEGIN_LINE: +    case ANCR_END_BUF: +    case ANCR_SEMI_END_BUF: +    case ANCR_END_LINE: +    case ANCR_PREC_READ_NOT: +    case ANCR_LOOK_BEHIND:        add_opt_anc_info(&opt->anc, ANCHOR_(node)->type);        break; -    case ANCHOR_PREC_READ: +    case ANCR_PREC_READ:        {          r = optimize_nodes(NODE_BODY(node), &xo, env);          if (r == 0) { -          if (xo.exb.len > 0) -            copy_opt_exact(&opt->expr, &xo.exb); -          else if (xo.exm.len > 0) -            copy_opt_exact(&opt->expr, &xo.exm); +          if (xo.sb.len > 0) +            copy_opt_exact(&opt->spr, &xo.sb); +          else if (xo.sm.len > 0) +            copy_opt_exact(&opt->spr, &xo.sm); -          opt->expr.reach_end = 0; +          opt->spr.reach_end = 0;            if (xo.map.value > 0)              copy_opt_map(&opt->map, &xo.map); @@ -5349,7 +5503,7 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)        }        break; -    case ANCHOR_LOOK_BEHIND_NOT: +    case ANCR_LOOK_BEHIND_NOT:        break;      }      break; @@ -5384,7 +5538,7 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)        set_mml(&opt->len, 0, INFINITE_LEN);      else {        OnigOptionType save = env->options; -      env->options = ENCLOSURE_(NODE_BODY(node))->o.options; +      env->options = BAG_(NODE_BODY(node))->o.options;        r = optimize_nodes(NODE_BODY(node), opt, env);        env->options = save;      } @@ -5401,31 +5555,31 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)        if (qn->lower > 0) {          copy_node_opt_info(opt, &xo); -        if (xo.exb.len > 0) { -          if (xo.exb.reach_end) { -            for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) { -              int rc = concat_opt_exact(&opt->exb, &xo.exb, enc); +        if (xo.sb.len > 0) { +          if (xo.sb.reach_end) { +            for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->sb); i++) { +              int rc = concat_opt_exact(&opt->sb, &xo.sb, enc);                if (rc > 0) break;              } -            if (i < qn->lower) opt->exb.reach_end = 0; +            if (i < qn->lower) opt->sb.reach_end = 0;            }          }          if (qn->lower != qn->upper) { -          opt->exb.reach_end = 0; -          opt->exm.reach_end = 0; +          opt->sb.reach_end = 0; +          opt->sm.reach_end = 0;          }          if (qn->lower > 1) -          opt->exm.reach_end = 0; +          opt->sm.reach_end = 0;        }        if (IS_REPEAT_INFINITE(qn->upper)) {          if (env->mmd.max == 0 &&              NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {            if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env))) -            add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_ML); +            add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_ML);            else -            add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF); +            add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF);          }          max = (xo.len.max > 0 ? INFINITE_LEN : 0); @@ -5439,12 +5593,12 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)      }      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      { -      EnclosureNode* en = ENCLOSURE_(node); +      BagNode* en = BAG_(node);        switch (en->type) { -      case ENCLOSURE_OPTION: +      case BAG_OPTION:          {            OnigOptionType save = env->options; @@ -5454,7 +5608,7 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)          }          break; -      case ENCLOSURE_MEMORY: +      case BAG_MEMORY:  #ifdef USE_CALL          en->opt_count++;          if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { @@ -5470,23 +5624,23 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)  #endif            {              r = optimize_nodes(NODE_BODY(node), opt, env); -            if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK)) { +            if (is_set_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK)) {                if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum)) -                remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK); +                remove_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK);              }            }          break; -      case ENCLOSURE_STOP_BACKTRACK: +      case BAG_STOP_BACKTRACK:          r = optimize_nodes(NODE_BODY(node), opt, env);          break; -      case ENCLOSURE_IF_ELSE: +      case BAG_IF_ELSE:          {            OptEnv nenv;            copy_opt_env(&nenv, env); -          r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &xo, &nenv); +          r = optimize_nodes(NODE_BAG_BODY(en), &xo, &nenv);            if (r == 0) {              add_mml(&nenv.mmd, &xo.len);              concat_left_node_opt_info(enc, opt, &xo); @@ -5524,39 +5678,47 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)  }  static int -set_optimize_exact(regex_t* reg, OptExact* e) +set_optimize_exact(regex_t* reg, OptStr* e)  {    int r;    if (e->len == 0) return 0; -  if (e->ignore_case) { -    reg->exact = (UChar* )xmalloc(e->len); -    CHECK_NULL_RETURN_MEMERR(reg->exact); -    xmemcpy(reg->exact, e->s, e->len); -    reg->exact_end = reg->exact + e->len; -    reg->optimize = OPTIMIZE_EXACT_IC; +  reg->exact = (UChar* )xmalloc(e->len); +  CHECK_NULL_RETURN_MEMERR(reg->exact); +  xmemcpy(reg->exact, e->s, e->len); +  reg->exact_end = reg->exact + e->len; + +  if (e->case_fold) { +    reg->optimize = OPTIMIZE_STR_CASE_FOLD; +    if (e->good_case_fold != 0) { +      if (e->len >= 2) { +        r = set_sunday_quick_search_or_bmh_skip_table(reg, 1, +                             reg->exact, reg->exact_end, +                             reg->map, &(reg->map_offset)); +        if (r != 0) return r; +        reg->optimize = OPTIMIZE_STR_CASE_FOLD_FAST; +      } +    }    }    else {      int allow_reverse; -    reg->exact = onigenc_strdup(reg->enc, e->s, e->s + e->len); -    CHECK_NULL_RETURN_MEMERR(reg->exact); -    reg->exact_end = reg->exact + e->len; -       allow_reverse =        ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); -    if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { -      r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, -                      reg->map, &(reg->int_map)); +    if (e->len >= 2 || (e->len >= 1 && allow_reverse)) { +      r = set_sunday_quick_search_or_bmh_skip_table(reg, 0, +                                         reg->exact, reg->exact_end, +                                         reg->map, &(reg->map_offset));        if (r != 0) return r;        reg->optimize = (allow_reverse != 0 -                       ? OPTIMIZE_EXACT_BM : OPTIMIZE_EXACT_BM_NO_REV); +                       ? OPTIMIZE_STR_FAST +                       : OPTIMIZE_STR_FAST_STEP_FORWARD);      }      else { -      reg->optimize = OPTIMIZE_EXACT; +      reg->optimize = OPTIMIZE_STR;      }    } @@ -5575,7 +5737,7 @@ set_optimize_map(regex_t* reg, OptMap* m)  {    int i; -  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) +  for (i = 0; i < CHAR_MAP_SIZE; i++)      reg->map[i] = m->map[i];    reg->optimize   = OPTIMIZE_MAP; @@ -5590,8 +5752,8 @@ set_optimize_map(regex_t* reg, OptMap* m)  static void  set_sub_anchor(regex_t* reg, OptAnc* anc)  { -  reg->sub_anchor |= anc->left  & ANCHOR_BEGIN_LINE; -  reg->sub_anchor |= anc->right & ANCHOR_END_LINE; +  reg->sub_anchor |= anc->left  & ANCR_BEGIN_LINE; +  reg->sub_anchor |= anc->right & ANCR_END_LINE;  }  #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) @@ -5602,7 +5764,7 @@ static int  set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)  {    int r; -  NodeOpt opt; +  OptNode opt;    OptEnv env;    env.enc            = reg->enc; @@ -5614,29 +5776,29 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)    r = optimize_nodes(node, &opt, &env);    if (r != 0) return r; -  reg->anchor = opt.anc.left & (ANCHOR_BEGIN_BUF | -        ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML | -        ANCHOR_LOOK_BEHIND); +  reg->anchor = opt.anc.left & (ANCR_BEGIN_BUF | +        ANCR_BEGIN_POSITION | ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML | +        ANCR_LOOK_BEHIND); -  if ((opt.anc.left & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0) -    reg->anchor &= ~ANCHOR_ANYCHAR_INF_ML; +  if ((opt.anc.left & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) != 0) +    reg->anchor &= ~ANCR_ANYCHAR_INF_ML; -  reg->anchor |= opt.anc.right & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF | -       ANCHOR_PREC_READ_NOT); +  reg->anchor |= opt.anc.right & (ANCR_END_BUF | ANCR_SEMI_END_BUF | +                                  ANCR_PREC_READ_NOT); -  if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { +  if (reg->anchor & (ANCR_END_BUF | ANCR_SEMI_END_BUF)) {      reg->anchor_dmin = opt.len.min;      reg->anchor_dmax = opt.len.max;    } -  if (opt.exb.len > 0 || opt.exm.len > 0) { -    select_opt_exact(reg->enc, &opt.exb, &opt.exm); -    if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.exb, &opt.map) > 0) { +  if (opt.sb.len > 0 || opt.sm.len > 0) { +    select_opt_exact(reg->enc, &opt.sb, &opt.sm); +    if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.sb, &opt.map) > 0) {        goto set_map;      }      else { -      r = set_optimize_exact(reg, &opt.exb); -      set_sub_anchor(reg, &opt.exb.anc); +      r = set_optimize_exact(reg, &opt.sb); +      set_sub_anchor(reg, &opt.sb.anc);      }    }    else if (opt.map.value > 0) { @@ -5645,9 +5807,9 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)      set_sub_anchor(reg, &opt.map.anc);    }    else { -    reg->sub_anchor |= opt.anc.left & ANCHOR_BEGIN_LINE; +    reg->sub_anchor |= opt.anc.left & ANCR_BEGIN_LINE;      if (opt.len.max == 0) -      reg->sub_anchor |= opt.anc.right & ANCHOR_END_LINE; +      reg->sub_anchor |= opt.anc.right & ANCR_END_LINE;    }  #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) @@ -5665,6 +5827,7 @@ clear_optimize_info(regex_t* reg)    reg->anchor_dmax   = 0;    reg->sub_anchor    = 0;    reg->exact_end     = (UChar* )NULL; +  reg->map_offset    = 0;    reg->threshold_len = 0;    if (IS_NOT_NULL(reg->exact)) {      xfree(reg->exact); @@ -5733,41 +5896,41 @@ print_anchor(FILE* f, int anchor)    fprintf(f, "["); -  if (anchor & ANCHOR_BEGIN_BUF) { +  if (anchor & ANCR_BEGIN_BUF) {      fprintf(f, "begin-buf");      q = 1;    } -  if (anchor & ANCHOR_BEGIN_LINE) { +  if (anchor & ANCR_BEGIN_LINE) {      if (q) fprintf(f, ", ");      q = 1;      fprintf(f, "begin-line");    } -  if (anchor & ANCHOR_BEGIN_POSITION) { +  if (anchor & ANCR_BEGIN_POSITION) {      if (q) fprintf(f, ", ");      q = 1;      fprintf(f, "begin-pos");    } -  if (anchor & ANCHOR_END_BUF) { +  if (anchor & ANCR_END_BUF) {      if (q) fprintf(f, ", ");      q = 1;      fprintf(f, "end-buf");    } -  if (anchor & ANCHOR_SEMI_END_BUF) { +  if (anchor & ANCR_SEMI_END_BUF) {      if (q) fprintf(f, ", ");      q = 1;      fprintf(f, "semi-end-buf");    } -  if (anchor & ANCHOR_END_LINE) { +  if (anchor & ANCR_END_LINE) {      if (q) fprintf(f, ", ");      q = 1;      fprintf(f, "end-line");    } -  if (anchor & ANCHOR_ANYCHAR_INF) { +  if (anchor & ANCR_ANYCHAR_INF) {      if (q) fprintf(f, ", ");      q = 1;      fprintf(f, "anychar-inf");    } -  if (anchor & ANCHOR_ANYCHAR_INF_ML) { +  if (anchor & ANCR_ANYCHAR_INF_ML) {      if (q) fprintf(f, ", ");      fprintf(f, "anychar-inf-ml");    } @@ -5778,12 +5941,13 @@ print_anchor(FILE* f, int anchor)  static void  print_optimize_info(FILE* f, regex_t* reg)  { -  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", -                              "EXACT_IC", "MAP" }; +  static const char* on[] = { "NONE", "STR", +                              "STR_FAST", "STR_FAST_STEP_FORWARD", +                              "STR_CASE_FOLD_FAST", "STR_CASE_FOLD", "MAP" };    fprintf(f, "optimize: %s\n", on[reg->optimize]);    fprintf(f, "  anchor: "); print_anchor(f, reg->anchor); -  if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0) +  if ((reg->anchor & ANCR_END_BUF_MASK) != 0)      print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);    fprintf(f, "\n"); @@ -5804,14 +5968,14 @@ print_optimize_info(FILE* f, regex_t* reg)    else if (reg->optimize & OPTIMIZE_MAP) {      int c, i, n = 0; -    for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) +    for (i = 0; i < CHAR_MAP_SIZE; i++)        if (reg->map[i]) n++;      fprintf(f, "map: n=%d\n", n);      if (n > 0) {        c = 0;        fputc('[', f); -      for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { +      for (i = 0; i < CHAR_MAP_SIZE; i++) {          if (reg->map[i] != 0) {            if (c > 0)  fputs(", ", f);            c++; @@ -5832,7 +5996,7 @@ print_optimize_info(FILE* f, regex_t* reg)  extern RegexExt*  onig_get_regex_ext(regex_t* reg)  { -  if (IS_NULL(REG_EXTP(reg))) { +  if (IS_NULL(reg->extp)) {      RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext));      if (IS_NULL(ext)) return 0; @@ -5845,10 +6009,10 @@ onig_get_regex_ext(regex_t* reg)      ext->callout_list = 0;  #endif -    REG_EXTPL(reg) = (void* )ext; +    reg->extp = ext;    } -  return REG_EXTP(reg); +  return reg->extp;  }  static void @@ -5895,12 +6059,10 @@ onig_free_body(regex_t* reg)    if (IS_NOT_NULL(reg)) {      if (IS_NOT_NULL(reg->p))                xfree(reg->p);      if (IS_NOT_NULL(reg->exact))            xfree(reg->exact); -    if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map); -    if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);      if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range); -    if (IS_NOT_NULL(REG_EXTP(reg))) { -      free_regex_ext(REG_EXTP(reg)); -      REG_EXTPL(reg) = 0; +    if (IS_NOT_NULL(reg->extp)) { +      free_regex_ext(reg->extp); +      reg->extp = 0;      }      onig_names_free(reg); @@ -6060,7 +6222,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,      if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)  #ifdef USE_CALLOUT -        || (IS_NOT_NULL(REG_EXTP(reg)) && REG_EXTP(reg)->callout_num != 0) +        || (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0)  #endif          )        reg->stack_pop_level = STACK_POP_LEVEL_ALL; @@ -6152,9 +6314,7 @@ onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_fl    (reg)->syntax           = syntax;    (reg)->optimize         = 0;    (reg)->exact            = (UChar* )NULL; -  (reg)->int_map          = (int* )NULL; -  (reg)->int_map_backward = (int* )NULL; -  REG_EXTPL(reg) = NULL; +  (reg)->extp             = (RegexExt* )NULL;    (reg)->p                = (UChar* )NULL;    (reg)->alloc            = 0; @@ -6309,11 +6469,11 @@ onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_        found = 0;      }      else { -      found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); +      found = onig_is_in_code_range(cc->mbuf->p, code) != 0;      }    }    else { -    found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); +    found = BITSET_AT(cc->bs, code) != 0;    }    if (IS_NCCLASS_NOT(cc)) @@ -6387,12 +6547,35 @@ print_indent_tree(FILE* f, Node* node, int indent)      break;    case NODE_STRING: -    fprintf(f, "<string%s:%p>", (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node); -    for (p = STR_(node)->s; p < STR_(node)->end; p++) { -      if (*p >= 0x20 && *p < 0x7f) -        fputc(*p, f); -      else { -        fprintf(f, " 0x%02x", *p); +    { +      char* mode; +      char* dont; +      char* good; + +      if (NODE_STRING_IS_RAW(node)) +        mode = "-raw"; +      else if (NODE_STRING_IS_AMBIG(node)) +        mode = "-ambig"; +      else +        mode = ""; + +      if (NODE_STRING_IS_GOOD_AMBIG(node)) +        good = "-good"; +      else +        good = ""; + +      if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) +        dont = " (dont-opt)"; +      else +        dont = ""; + +      fprintf(f, "<string%s%s%s:%p>", mode, good, dont, node); +      for (p = STR_(node)->s; p < STR_(node)->end; p++) { +        if (*p >= 0x20 && *p < 0x7f) +          fputc(*p, f); +        else { +          fprintf(f, " 0x%02x", *p); +        }        }      }      break; @@ -6436,36 +6619,36 @@ print_indent_tree(FILE* f, Node* node, int indent)    case NODE_ANCHOR:      fprintf(f, "<anchor:%p> ", node);      switch (ANCHOR_(node)->type) { -    case ANCHOR_BEGIN_BUF:        fputs("begin buf",      f); break; -    case ANCHOR_END_BUF:          fputs("end buf",        f); break; -    case ANCHOR_BEGIN_LINE:       fputs("begin line",     f); break; -    case ANCHOR_END_LINE:         fputs("end line",       f); break; -    case ANCHOR_SEMI_END_BUF:     fputs("semi end buf",   f); break; -    case ANCHOR_BEGIN_POSITION:   fputs("begin position", f); break; - -    case ANCHOR_WORD_BOUNDARY:    fputs("word boundary",     f); break; -    case ANCHOR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break; +    case ANCR_BEGIN_BUF:        fputs("begin buf",      f); break; +    case ANCR_END_BUF:          fputs("end buf",        f); break; +    case ANCR_BEGIN_LINE:       fputs("begin line",     f); break; +    case ANCR_END_LINE:         fputs("end line",       f); break; +    case ANCR_SEMI_END_BUF:     fputs("semi end buf",   f); break; +    case ANCR_BEGIN_POSITION:   fputs("begin position", f); break; + +    case ANCR_WORD_BOUNDARY:    fputs("word boundary",     f); break; +    case ANCR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;  #ifdef USE_WORD_BEGIN_END -    case ANCHOR_WORD_BEGIN:       fputs("word begin", f);     break; -    case ANCHOR_WORD_END:         fputs("word end", f);       break; +    case ANCR_WORD_BEGIN:       fputs("word begin", f);     break; +    case ANCR_WORD_END:         fputs("word end", f);       break;  #endif -    case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: +    case ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:        fputs("extended-grapheme-cluster boundary", f); break; -    case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: +    case ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:        fputs("no-extended-grapheme-cluster boundary", f); break; -    case ANCHOR_PREC_READ: +    case ANCR_PREC_READ:        fprintf(f, "prec read\n");        print_indent_tree(f, NODE_BODY(node), indent + add);        break; -    case ANCHOR_PREC_READ_NOT: +    case ANCR_PREC_READ_NOT:        fprintf(f, "prec read not\n");        print_indent_tree(f, NODE_BODY(node), indent + add);        break; -    case ANCHOR_LOOK_BEHIND: +    case ANCR_LOOK_BEHIND:        fprintf(f, "look behind\n");        print_indent_tree(f, NODE_BODY(node), indent + add);        break; -    case ANCHOR_LOOK_BEHIND_NOT: +    case ANCR_LOOK_BEHIND_NOT:        fprintf(f, "look behind not\n");        print_indent_tree(f, NODE_BODY(node), indent + add);        break; @@ -6506,20 +6689,20 @@ print_indent_tree(FILE* f, Node* node, int indent)      print_indent_tree(f, NODE_BODY(node), indent + add);      break; -  case NODE_ENCLOSURE: -    fprintf(f, "<enclosure:%p> ", node); -    switch (ENCLOSURE_(node)->type) { -    case ENCLOSURE_OPTION: -      fprintf(f, "option:%d", ENCLOSURE_(node)->o.options); +  case NODE_BAG: +    fprintf(f, "<bag:%p> ", node); +    switch (BAG_(node)->type) { +    case BAG_OPTION: +      fprintf(f, "option:%d", BAG_(node)->o.options);        break; -    case ENCLOSURE_MEMORY: -      fprintf(f, "memory:%d", ENCLOSURE_(node)->m.regnum); +    case BAG_MEMORY: +      fprintf(f, "memory:%d", BAG_(node)->m.regnum);        break; -    case ENCLOSURE_STOP_BACKTRACK: +    case BAG_STOP_BACKTRACK:        fprintf(f, "stop-bt");        break; - -    default: +    case BAG_IF_ELSE: +      fprintf(f, "if-else");        break;      }      fprintf(f, "\n"); @@ -6561,7 +6744,7 @@ print_indent_tree(FILE* f, Node* node, int indent)    }    if (type != NODE_LIST && type != NODE_ALT && type != NODE_QUANT && -      type != NODE_ENCLOSURE) +      type != NODE_BAG)      fprintf(f, "\n");    fflush(f);  } diff --git a/src/regenc.c b/src/regenc.c index 21f3536..d8f5274 100644 --- a/src/regenc.c +++ b/src/regenc.c @@ -231,7 +231,7 @@ onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)  {    int n = 0;    UChar* q = (UChar* )p; -   +    while (q < end) {      q += ONIGENC_MBC_ENC_LEN(enc, q);      n++; @@ -244,7 +244,7 @@ onigenc_strlen_null(OnigEncoding enc, const UChar* s)  {    int n = 0;    UChar* p = (UChar* )s; -   +    while (1) {      if (*p == '\0') {        UChar* q; diff --git a/src/regenc.h b/src/regenc.h index ae8d65e..8a3397d 100644 --- a/src/regenc.h +++ b/src/regenc.h @@ -121,8 +121,20 @@ struct PropertyNameCtype {  #define ONIG_ENCODING_INIT_DEFAULT           ONIG_ENCODING_ASCII +#define ENC_SKIP_OFFSET_1_OR_0             7 +  #define ENC_FLAG_ASCII_COMPATIBLE      (1<<0)  #define ENC_FLAG_UNICODE               (1<<1) +#define ENC_FLAG_SKIP_OFFSET_MASK      (7<<2) +#define ENC_FLAG_SKIP_OFFSET_0             0 +#define ENC_FLAG_SKIP_OFFSET_1         (1<<2) +#define ENC_FLAG_SKIP_OFFSET_2         (2<<2) +#define ENC_FLAG_SKIP_OFFSET_3         (3<<2) +#define ENC_FLAG_SKIP_OFFSET_4         (4<<2) +#define ENC_FLAG_SKIP_OFFSET_1_OR_0    (ENC_SKIP_OFFSET_1_OR_0<<2) + +#define ENC_GET_SKIP_OFFSET(enc) \ +  (((enc)->flag & ENC_FLAG_SKIP_OFFSET_MASK)>>2)  /* for encoding system implementation (internal) */ @@ -197,7 +209,7 @@ extern int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar*    else if ((buk)->fold_len == 3)\      addr = OnigUnicodeFolds3 + (buk)->index;\    else\ -    addr = 0;\ +    return ONIGERR_INVALID_CODE_POINT_VALUE;\  } while (0)  extern OnigCodePoint OnigUnicodeFolds1[]; @@ -252,7 +264,7 @@ extern const unsigned short OnigEncAsciiCtypeTable[];  #define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \   (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\    ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER)) -    +  #define ONIGENC_IS_UNICODE_ENCODING(enc) \    (((enc)->flag & ENC_FLAG_UNICODE) != 0) diff --git a/src/regerror.c b/src/regerror.c index 70efe9a..3fbcdfe 100644 --- a/src/regerror.c +++ b/src/regerror.c @@ -30,13 +30,7 @@  #include "regint.h"  #include <stdio.h> /* for vsnprintf() */ -#ifdef HAVE_STDARG_PROTOTYPES  #include <stdarg.h> -#define va_init_list(a,b) va_start(a,b) -#else -#include <varargs.h> -#define va_init_list(a,b) va_start(a) -#endif  extern UChar*  onig_error_code_to_format(int code) @@ -247,7 +241,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,        if (len >= buf_size) break;      } -    *is_over = ((p < end) ? 1 : 0); +    *is_over = p < end;    }    else {      len = MIN((int )(end - s), buf_size); @@ -262,15 +256,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,  /* for ONIG_MAX_ERROR_MESSAGE_LEN */  #define MAX_ERROR_PAR_LEN   30 -extern int -#ifdef HAVE_STDARG_PROTOTYPES -onig_error_code_to_str(UChar* s, int code, ...) -#else -onig_error_code_to_str(s, code, va_alist) -  UChar* s; -  int code; -  va_dcl  -#endif +extern int onig_error_code_to_str(UChar* s, int code, ...)  {    UChar *p, *q;    OnigErrorInfo* einfo; @@ -278,7 +264,7 @@ onig_error_code_to_str(s, code, va_alist)    UChar parbuf[MAX_ERROR_PAR_LEN];    va_list vargs; -  va_init_list(vargs, code); +  va_start(vargs, code);    switch (code) {    case ONIGERR_UNDEFINED_NAME_REFERENCE: @@ -330,27 +316,15 @@ onig_error_code_to_str(s, code, va_alist)  } -void -#ifdef HAVE_STDARG_PROTOTYPES -onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, -                           UChar* pat, UChar* pat_end, const UChar *fmt, ...) -#else -onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) -    UChar buf[]; -    int bufsize; -    OnigEncoding enc; -    UChar* pat; -    UChar* pat_end; -    const UChar *fmt; -    va_dcl -#endif +void onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, +                                UChar* pat, UChar* pat_end, const UChar *fmt, ...)  {    int n, need, len;    UChar *p, *s, *bp;    UChar bs[6];    va_list args; -  va_init_list(args, fmt); +  va_start(args, fmt);    n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args);    va_end(args); diff --git a/src/regexec.c b/src/regexec.c index 6c76d85..fa61839 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -782,13 +782,13 @@ static int  onig_region_resize_clear(OnigRegion* region, int n)  {    int r; -   +    r = onig_region_resize(region, n);    if (r != 0) return r;    onig_region_clear(region);    return 0;  } -     +  extern int  onig_region_set(OnigRegion* region, int at, int beg, int end)  { @@ -798,7 +798,7 @@ onig_region_set(OnigRegion* region, int at, int beg, int end)      int r = onig_region_resize(region, at + 1);      if (r < 0) return r;    } -   +    region->beg[at] = beg;    region->end[at] = end;    return 0; @@ -1225,7 +1225,7 @@ onig_initialize_match_param(OnigMatchParam* mp)  static int  adjust_match_param(regex_t* reg, OnigMatchParam* mp)  { -  RegexExt* ext = REG_EXTP(reg); +  RegexExt* ext = reg->extp;    mp->match_at_call_counter = 0; @@ -2337,6 +2337,79 @@ typedef struct {    regoff_t  rm_eo;  } posix_regmatch_t; + +#ifdef __GNUC__ +#define USE_THREADED_CODE +#endif + +#ifdef USE_THREADED_CODE + +#define BYTECODE_INTERPRETER_START      JUMP_OP; +#define BYTECODE_INTERPRETER_END +#define CASE_OP(x)   L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(1) +#define DEFAULT_OP   /* L_DEFAULT: */ +#define NEXT_OP      sprev = sbegin; JUMP_OP +#define JUMP_OP      goto *opcode_to_label[*p++] +#define BREAK_OP     /* Nothing */ + +#else + +#define BYTECODE_INTERPRETER_START \ +  while (1) {\ +  MATCH_DEBUG_OUT(0)\ +  sbegin = s;\ +  switch (*p++) { +#define BYTECODE_INTERPRETER_END  } sprev = sbegin; } +#define CASE_OP(x)   case OP_##x: SOP_IN(OP_##x); +#define DEFAULT_OP   default: +#define NEXT_OP      break +#define JUMP_OP      continue; break +#define BREAK_OP     break + +#endif /* USE_THREADED_CODE */ + +#define NEXT_OUT     SOP_OUT; NEXT_OP +#define JUMP_OUT     SOP_OUT; JUMP_OP +#define BREAK_OUT    SOP_OUT; BREAK_OP +#define CHECK_INTERRUPT_JUMP_OUT  SOP_OUT; CHECK_INTERRUPT_IN_MATCH; JUMP_OP + + +#ifdef ONIG_DEBUG_MATCH +#define MATCH_DEBUG_OUT(offset) do {\ +      UChar *xp, *q, *bp, buf[50];\ +      int len, spos;\ +      spos = IS_NOT_NULL(s) ? (int )(s - str) : -1;\ +      xp = p - (offset);\ +      fprintf(stderr, "%7u: %7ld: %4d> \"",\ +              counter, GET_STACK_INDEX(stk), spos);\ +      counter++;\ +      bp = buf;\ +      if (IS_NOT_NULL(s)) {\ +        for (i = 0, q = s; i < 7 && q < end; i++) {\ +          len = enclen(encode, q);\ +          while (len-- > 0) *bp++ = *q++;\ +        }\ +        if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }\ +        else         { xmemcpy(bp, "\"",    1); bp += 1; }\ +      }\ +      else {\ +        xmemcpy(bp, "\"", 1); bp += 1;\ +      }\ +      *bp = 0;\ +      fputs((char* )buf, stderr);\ +      for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);\ +      if (xp == FinishCode)\ +        fprintf(stderr, "----: ");\ +      else\ +        fprintf(stderr, "%4d: ", (int )(xp - reg->p));\ +      onig_print_compiled_byte_code(stderr, xp, NULL, reg->p, encode);\ +      fprintf(stderr, "\n");\ +  } while(0); +#else +#define MATCH_DEBUG_OUT(offset) +#endif + +  /* match data(str - end) from position (sstart). */  /* if sstart == str then set sprev to NULL. */  static int @@ -2346,6 +2419,107 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,  {    static UChar FinishCode[] = { OP_FINISH }; +#ifdef USE_THREADED_CODE +  static const void *opcode_to_label[] = { +  &&L_FINISH, +  &&L_END, +  &&L_EXACT1, +  &&L_EXACT2, +  &&L_EXACT3, +  &&L_EXACT4, +  &&L_EXACT5, +  &&L_EXACTN, +  &&L_EXACTMB2N1, +  &&L_EXACTMB2N2, +  &&L_EXACTMB2N3, +  &&L_EXACTMB2N, +  &&L_EXACTMB3N, +  &&L_EXACTMBN, +  &&L_EXACT1_IC, +  &&L_EXACTN_IC, +  &&L_CCLASS, +  &&L_CCLASS_MB, +  &&L_CCLASS_MIX, +  &&L_CCLASS_NOT, +  &&L_CCLASS_MB_NOT, +  &&L_CCLASS_MIX_NOT, +#ifdef USE_OP_CCLASS_NODE +  &&L_CCLASS_NODE, +#endif +  &&L_ANYCHAR, +  &&L_ANYCHAR_ML, +  &&L_ANYCHAR_STAR, +  &&L_ANYCHAR_ML_STAR, +  &&L_ANYCHAR_STAR_PEEK_NEXT, +  &&L_ANYCHAR_ML_STAR_PEEK_NEXT, +  &&L_WORD, +  &&L_WORD_ASCII, +  &&L_NO_WORD, +  &&L_NO_WORD_ASCII, +  &&L_WORD_BOUNDARY, +  &&L_NO_WORD_BOUNDARY, +  &&L_WORD_BEGIN, +  &&L_WORD_END, +  &&L_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, +  &&L_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, +  &&L_BEGIN_BUF, +  &&L_END_BUF, +  &&L_BEGIN_LINE, +  &&L_END_LINE, +  &&L_SEMI_END_BUF, +  &&L_BEGIN_POSITION, +  &&L_BACKREF1, +  &&L_BACKREF2, +  &&L_BACKREF_N, +  &&L_BACKREF_N_IC, +  &&L_BACKREF_MULTI, +  &&L_BACKREF_MULTI_IC, +  &&L_BACKREF_WITH_LEVEL, +  &&L_BACKREF_CHECK, +  &&L_BACKREF_CHECK_WITH_LEVEL, +  &&L_MEMORY_START, +  &&L_MEMORY_START_PUSH, +  &&L_MEMORY_END_PUSH, +  &&L_MEMORY_END_PUSH_REC, +  &&L_MEMORY_END, +  &&L_MEMORY_END_REC, +  &&L_FAIL, +  &&L_JUMP, +  &&L_PUSH, +  &&L_PUSH_SUPER, +  &&L_POP_OUT, +  &&L_PUSH_OR_JUMP_EXACT1, +  &&L_PUSH_IF_PEEK_NEXT, +  &&L_REPEAT, +  &&L_REPEAT_NG, +  &&L_REPEAT_INC, +  &&L_REPEAT_INC_NG, +  &&L_REPEAT_INC_SG, +  &&L_REPEAT_INC_NG_SG, +  &&L_EMPTY_CHECK_START, +  &&L_EMPTY_CHECK_END, +  &&L_EMPTY_CHECK_END_MEMST, +  &&L_EMPTY_CHECK_END_MEMST_PUSH, +  &&L_PREC_READ_START, +  &&L_PREC_READ_END, +  &&L_PREC_READ_NOT_START, +  &&L_PREC_READ_NOT_END, +  &&L_ATOMIC_START, +  &&L_ATOMIC_END, +  &&L_LOOK_BEHIND, +  &&L_LOOK_BEHIND_NOT_START, +  &&L_LOOK_BEHIND_NOT_END, +  &&L_CALL, +  &&L_RETURN, +  &&L_PUSH_SAVE_VAL, +  &&L_UPDATE_VAR, +#ifdef USE_CALLOUT +  &&L_CALLOUT_CONTENTS, +  &&L_CALLOUT_NAME, +#endif +  }; +#endif +    int i, n, num_mem, best_len, pop_level;    LengthType tlen, tlen2;    MemNumType mem; @@ -2374,6 +2548,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,    OnigEncoding encode = reg->enc;    OnigCaseFoldType case_fold_flag = reg->case_fold_flag; +#ifdef ONIG_DEBUG_MATCH +  static unsigned int counter = 1; +#endif +  #ifdef USE_CALLOUT    msa->mp->match_at_call_counter++;  #endif @@ -2406,40 +2584,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,    retry_in_match_counter = 0;  #endif -  while (1) { -#ifdef ONIG_DEBUG_MATCH -    { -      static unsigned int counter = 1; - -      UChar *q, *bp, buf[50]; -      int len; -      fprintf(stderr, "%7u: %7ld: %4d> \"", -              counter, GET_STACK_INDEX(stk), (int )(s - str)); -      counter++; - -      bp = buf; -      for (i = 0, q = s; i < 7 && q < end; i++) { -        len = enclen(encode, q); -        while (len-- > 0) *bp++ = *q++; -      } -      if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } -      else         { xmemcpy(bp, "\"",    1); bp += 1; } -      *bp = 0; -      fputs((char* )buf, stderr); - -      for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); -      if (p == FinishCode) -        fprintf(stderr, "----: "); -      else -        fprintf(stderr, "%4d: ", (int )(p - reg->p)); -      onig_print_compiled_byte_code(stderr, p, NULL, reg->p, encode); -      fprintf(stderr, "\n"); -    } -#endif - -    sbegin = s; -    switch (*p++) { -    case OP_END:  SOP_IN(OP_END); +  BYTECODE_INTERPRETER_START { +    CASE_OP(END)        n = (int )(s - sstart);        if (n > best_len) {          OnigRegion* region; @@ -2551,16 +2697,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        /* default behavior: return first-matching result. */        goto finish; -      break; -    case OP_EXACT1:  SOP_IN(OP_EXACT1); +    CASE_OP(EXACT1)        DATA_ENSURE(1);        if (*p != *s) goto fail;        p++; s++; -      SOP_OUT; -      break; +      NEXT_OUT; -    case OP_EXACT1_IC:  SOP_IN(OP_EXACT1_IC); +    CASE_OP(EXACT1_IC)        {          int len;          UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -2579,21 +2723,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            p++; q++;          }        } -      SOP_OUT; -      break; +      NEXT_OUT; -    case OP_EXACT2:  SOP_IN(OP_EXACT2); +    CASE_OP(EXACT2)        DATA_ENSURE(2);        if (*p != *s) goto fail;        p++; s++;        if (*p != *s) goto fail;        sprev = s;        p++; s++; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_EXACT3:  SOP_IN(OP_EXACT3); +    CASE_OP(EXACT3)        DATA_ENSURE(3);        if (*p != *s) goto fail;        p++; s++; @@ -2602,11 +2743,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        if (*p != *s) goto fail;        sprev = s;        p++; s++; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_EXACT4:  SOP_IN(OP_EXACT4); +    CASE_OP(EXACT4)        DATA_ENSURE(4);        if (*p != *s) goto fail;        p++; s++; @@ -2617,11 +2756,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        if (*p != *s) goto fail;        sprev = s;        p++; s++; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_EXACT5:  SOP_IN(OP_EXACT5); +    CASE_OP(EXACT5)        DATA_ENSURE(5);        if (*p != *s) goto fail;        p++; s++; @@ -2634,22 +2771,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        if (*p != *s) goto fail;        sprev = s;        p++; s++; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_EXACTN:  SOP_IN(OP_EXACTN); +    CASE_OP(EXACTN)        GET_LENGTH_INC(tlen, p);        DATA_ENSURE(tlen);        while (tlen-- > 0) {          if (*p++ != *s++) goto fail;        }        sprev = s - 1; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_EXACTN_IC:  SOP_IN(OP_EXACTN_IC); +    CASE_OP(EXACTN_IC)        {          int len;          UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -2673,20 +2806,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          }        } -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_EXACTMB2N1:  SOP_IN(OP_EXACTMB2N1); +    CASE_OP(EXACTMB2N1)        DATA_ENSURE(2);        if (*p != *s) goto fail;        p++; s++;        if (*p != *s) goto fail;        p++; s++; -      SOP_OUT; -      break; +      NEXT_OUT; -    case OP_EXACTMB2N2:  SOP_IN(OP_EXACTMB2N2); +    CASE_OP(EXACTMB2N2)        DATA_ENSURE(4);        if (*p != *s) goto fail;        p++; s++; @@ -2697,11 +2827,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        p++; s++;        if (*p != *s) goto fail;        p++; s++; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_EXACTMB2N3:  SOP_IN(OP_EXACTMB2N3); +    CASE_OP(EXACTMB2N3)        DATA_ENSURE(6);        if (*p != *s) goto fail;        p++; s++; @@ -2716,11 +2844,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        p++; s++;        if (*p != *s) goto fail;        p++; s++; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_EXACTMB2N:  SOP_IN(OP_EXACTMB2N); +    CASE_OP(EXACTMB2N)        GET_LENGTH_INC(tlen, p);        DATA_ENSURE(tlen * 2);        while (tlen-- > 0) { @@ -2730,11 +2856,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          p++; s++;        }        sprev = s - 2; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_EXACTMB3N:  SOP_IN(OP_EXACTMB3N); +    CASE_OP(EXACTMB3N)        GET_LENGTH_INC(tlen, p);        DATA_ENSURE(tlen * 3);        while (tlen-- > 0) { @@ -2746,11 +2870,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          p++; s++;        }        sprev = s - 3; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_EXACTMBN:  SOP_IN(OP_EXACTMBN); +    CASE_OP(EXACTMBN)        GET_LENGTH_INC(tlen,  p);  /* mb-len */        GET_LENGTH_INC(tlen2, p);  /* string len */        tlen2 *= tlen; @@ -2760,19 +2882,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          p++; s++;        }        sprev = s - tlen; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_CCLASS:  SOP_IN(OP_CCLASS); +    CASE_OP(CCLASS)        DATA_ENSURE(1);        if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;        p += SIZE_BITSET;        s += enclen(encode, s);   /* OP_CCLASS can match mb-code. \D, \S */ -      SOP_OUT; -      break; +      NEXT_OUT; -    case OP_CCLASS_MB:  SOP_IN(OP_CCLASS_MB); +    CASE_OP(CCLASS_MB)        if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;      cclass_mb: @@ -2798,10 +2917,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,  #endif        }        p += tlen; -      SOP_OUT; -      break; +      NEXT_OUT; -    case OP_CCLASS_MIX:  SOP_IN(OP_CCLASS_MIX); +    CASE_OP(CCLASS_MIX)        DATA_ENSURE(1);        if (ONIGENC_IS_MBC_HEAD(encode, s)) {          p += SIZE_BITSET; @@ -2816,18 +2934,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          p += tlen;          s++;        } -      SOP_OUT; -      break; +      NEXT_OUT; -    case OP_CCLASS_NOT:  SOP_IN(OP_CCLASS_NOT); +    CASE_OP(CCLASS_NOT)        DATA_ENSURE(1);        if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;        p += SIZE_BITSET;        s += enclen(encode, s); -      SOP_OUT; -      break; +      NEXT_OUT; -    case OP_CCLASS_MB_NOT:  SOP_IN(OP_CCLASS_MB_NOT); +    CASE_OP(CCLASS_MB_NOT)        DATA_ENSURE(1);        if (! ONIGENC_IS_MBC_HEAD(encode, s)) {          s++; @@ -2865,10 +2981,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        p += tlen;      cc_mb_not_success: -      SOP_OUT; -      break; +      NEXT_OUT; -    case OP_CCLASS_MIX_NOT:  SOP_IN(OP_CCLASS_MIX_NOT); +    CASE_OP(CCLASS_MIX_NOT)        DATA_ENSURE(1);        if (ONIGENC_IS_MBC_HEAD(encode, s)) {          p += SIZE_BITSET; @@ -2883,11 +2998,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          p += tlen;          s++;        } -      SOP_OUT; -      break; +      NEXT_OUT;  #ifdef USE_OP_CCLASS_NODE -    case OP_CCLASS_NODE:  SOP_IN(OP_CCLASS_NODE); +    CASE_OP(CCLASS_NODE)        {          OnigCodePoint code;          void *node; @@ -2903,28 +3017,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          code = ONIGENC_MBC_TO_CODE(encode, ss, s);          if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;        } -      SOP_OUT; -      break; +      NEXT_OUT;  #endif -    case OP_ANYCHAR:  SOP_IN(OP_ANYCHAR); +    CASE_OP(ANYCHAR)        DATA_ENSURE(1);        n = enclen(encode, s);        DATA_ENSURE(n);        if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;        s += n; -      SOP_OUT; -      break; +      NEXT_OUT; -    case OP_ANYCHAR_ML:  SOP_IN(OP_ANYCHAR_ML); +    CASE_OP(ANYCHAR_ML)        DATA_ENSURE(1);        n = enclen(encode, s);        DATA_ENSURE(n);        s += n; -      SOP_OUT; -      break; +      NEXT_OUT; -    case OP_ANYCHAR_STAR:  SOP_IN(OP_ANYCHAR_STAR); +    CASE_OP(ANYCHAR_STAR)        while (DATA_ENSURE_CHECK1) {          STACK_PUSH_ALT(p, s, sprev);          n = enclen(encode, s); @@ -2933,11 +3044,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          sprev = s;          s += n;        } -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_ANYCHAR_ML_STAR:  SOP_IN(OP_ANYCHAR_ML_STAR); +    CASE_OP(ANYCHAR_ML_STAR)        while (DATA_ENSURE_CHECK1) {          STACK_PUSH_ALT(p, s, sprev);          n = enclen(encode, s); @@ -2951,11 +3060,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            s++;          }        } -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_ANYCHAR_STAR_PEEK_NEXT:  SOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); +    CASE_OP(ANYCHAR_STAR_PEEK_NEXT)        while (DATA_ENSURE_CHECK1) {          if (*p == *s) {            STACK_PUSH_ALT(p + 1, s, sprev); @@ -2967,10 +3074,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          s += n;        }        p++; -      SOP_OUT; -      break; +      NEXT_OUT; -    case OP_ANYCHAR_ML_STAR_PEEK_NEXT:SOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); +    CASE_OP(ANYCHAR_ML_STAR_PEEK_NEXT)        while (DATA_ENSURE_CHECK1) {          if (*p == *s) {            STACK_PUSH_ALT(p + 1, s, sprev); @@ -2987,46 +3093,41 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          }        }        p++; -      SOP_OUT; -      break; +      NEXT_OUT; -    case OP_WORD:  SOP_IN(OP_WORD); +    CASE_OP(WORD)        DATA_ENSURE(1);        if (! ONIGENC_IS_MBC_WORD(encode, s, end))          goto fail;        s += enclen(encode, s); -      SOP_OUT; -      break; +      NEXT_OUT; -    case OP_WORD_ASCII:  SOP_IN(OP_WORD_ASCII); +    CASE_OP(WORD_ASCII)        DATA_ENSURE(1);        if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))          goto fail;        s += enclen(encode, s); -      SOP_OUT; -      break; +      NEXT_OUT; -    case OP_NO_WORD:  SOP_IN(OP_NO_WORD); +    CASE_OP(NO_WORD)        DATA_ENSURE(1);        if (ONIGENC_IS_MBC_WORD(encode, s, end))          goto fail;        s += enclen(encode, s); -      SOP_OUT; -      break; +      NEXT_OUT; -    case OP_NO_WORD_ASCII:  SOP_IN(OP_NO_WORD_ASCII); +    CASE_OP(NO_WORD_ASCII)        DATA_ENSURE(1);        if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))          goto fail;        s += enclen(encode, s); -      SOP_OUT; -      break; +      NEXT_OUT; -    case OP_WORD_BOUNDARY:  SOP_IN(OP_WORD_BOUNDARY); +    CASE_OP(WORD_BOUNDARY)        {          ModeType mode;          GET_MODE_INC(mode, p); /* ascii_mode */ @@ -3046,11 +3147,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,              goto fail;          }        } -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_NO_WORD_BOUNDARY:  SOP_IN(OP_NO_WORD_BOUNDARY); +    CASE_OP(NO_WORD_BOUNDARY)        {          ModeType mode;          GET_MODE_INC(mode, p); /* ascii_mode */ @@ -3069,189 +3168,150 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,              goto fail;          }        } -      SOP_OUT; -      continue; -      break; +      JUMP_OUT;  #ifdef USE_WORD_BEGIN_END -    case OP_WORD_BEGIN:  SOP_IN(OP_WORD_BEGIN); +    CASE_OP(WORD_BEGIN)        {          ModeType mode;          GET_MODE_INC(mode, p); /* ascii_mode */          if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {            if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { -            SOP_OUT; -            continue; +            JUMP_OUT;            }          }        }        goto fail; -      break; -    case OP_WORD_END:  SOP_IN(OP_WORD_END); +    CASE_OP(WORD_END)        {          ModeType mode;          GET_MODE_INC(mode, p); /* ascii_mode */          if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {            if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { -            SOP_OUT; -            continue; +            JUMP_OUT;            }          }        }        goto fail; -      break;  #endif -    case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: -      SOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); +    CASE_OP(EXTENDED_GRAPHEME_CLUSTER_BOUNDARY)        if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) { -        SOP_OUT; -        continue; +        JUMP_OUT;        }        goto fail; -      break; -    case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: -      SOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); +    CASE_OP(NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY)        if (onigenc_egcb_is_break_position(encode, s, sprev, str, end))          goto fail; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_BEGIN_BUF:  SOP_IN(OP_BEGIN_BUF); +    CASE_OP(BEGIN_BUF)        if (! ON_STR_BEGIN(s)) goto fail; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_END_BUF:  SOP_IN(OP_END_BUF); +    CASE_OP(END_BUF)        if (! ON_STR_END(s)) goto fail; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_BEGIN_LINE:  SOP_IN(OP_BEGIN_LINE); +    CASE_OP(BEGIN_LINE)        if (ON_STR_BEGIN(s)) {          if (IS_NOTBOL(msa->options)) goto fail; -        SOP_OUT; -        continue; +        JUMP_OUT;        }        else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { -        SOP_OUT; -        continue; +        JUMP_OUT;        }        goto fail; -      break; -    case OP_END_LINE:  SOP_IN(OP_END_LINE); +    CASE_OP(END_LINE)        if (ON_STR_END(s)) {  #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE          if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {  #endif            if (IS_NOTEOL(msa->options)) goto fail; -          SOP_OUT; -          continue; +          JUMP_OUT;  #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE          }  #endif        }        else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { -        SOP_OUT; -        continue; +        JUMP_OUT;        }  #ifdef USE_CRNL_AS_LINE_TERMINATOR        else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { -        SOP_OUT; -        continue; +        JUMP_OUT;        }  #endif        goto fail; -      break; -    case OP_SEMI_END_BUF:  SOP_IN(OP_SEMI_END_BUF); +    CASE_OP(SEMI_END_BUF)        if (ON_STR_END(s)) {  #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE          if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {  #endif            if (IS_NOTEOL(msa->options)) goto fail; -          SOP_OUT; -          continue; +          JUMP_OUT;  #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE          }  #endif        }        else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&                 ON_STR_END(s + enclen(encode, s))) { -        SOP_OUT; -        continue; +        JUMP_OUT;        }  #ifdef USE_CRNL_AS_LINE_TERMINATOR        else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {          UChar* ss = s + enclen(encode, s);          ss += enclen(encode, ss);          if (ON_STR_END(ss)) { -          SOP_OUT; -          continue; +          JUMP_OUT;          }        }  #endif        goto fail; -      break; -    case OP_BEGIN_POSITION:  SOP_IN(OP_BEGIN_POSITION); +    CASE_OP(BEGIN_POSITION)        if (s != msa->start)          goto fail; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_MEMORY_START_PUSH:  SOP_IN(OP_MEMORY_START_PUSH); +    CASE_OP(MEMORY_START_PUSH)        GET_MEMNUM_INC(mem, p);        STACK_PUSH_MEM_START(mem, s); -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_MEMORY_START:  SOP_IN(OP_MEMORY_START); +    CASE_OP(MEMORY_START)        GET_MEMNUM_INC(mem, p);        mem_start_stk[mem] = (StackIndex )((void* )s); -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_MEMORY_END_PUSH:  SOP_IN(OP_MEMORY_END_PUSH); +    CASE_OP(MEMORY_END_PUSH)        GET_MEMNUM_INC(mem, p);        STACK_PUSH_MEM_END(mem, s); -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_MEMORY_END:  SOP_IN(OP_MEMORY_END); +    CASE_OP(MEMORY_END)        GET_MEMNUM_INC(mem, p);        mem_end_stk[mem] = (StackIndex )((void* )s); -      SOP_OUT; -      continue; -      break; +      JUMP_OUT;  #ifdef USE_CALL -    case OP_MEMORY_END_PUSH_REC:  SOP_IN(OP_MEMORY_END_PUSH_REC); +    CASE_OP(MEMORY_END_PUSH_REC)        GET_MEMNUM_INC(mem, p);        STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */        STACK_PUSH_MEM_END(mem, s);        mem_start_stk[mem] = GET_STACK_INDEX(stkp); -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_MEMORY_END_REC:  SOP_IN(OP_MEMORY_END_REC); +    CASE_OP(MEMORY_END_REC)        GET_MEMNUM_INC(mem, p);        mem_end_stk[mem] = (StackIndex )((void* )s);        STACK_GET_MEM_START(mem, stkp); @@ -3262,22 +3322,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);        STACK_PUSH_MEM_END_MARK(mem); -      SOP_OUT; -      continue; -      break; +      JUMP_OUT;  #endif -    case OP_BACKREF1:  SOP_IN(OP_BACKREF1); +    CASE_OP(BACKREF1)        mem = 1;        goto backref; -      break; -    case OP_BACKREF2:  SOP_IN(OP_BACKREF2); +    CASE_OP(BACKREF2)        mem = 2;        goto backref; -      break; -    case OP_BACKREF_N:  SOP_IN(OP_BACKREF_N); +    CASE_OP(BACKREF_N)        GET_MEMNUM_INC(mem, p);      backref:        { @@ -3301,13 +3357,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          STRING_CMP(pstart, s, n);          while (sprev + (len = enclen(encode, sprev)) < s)            sprev += len; - -        SOP_OUT; -        continue;        } -      break; +      JUMP_OUT; -    case OP_BACKREF_N_IC:  SOP_IN(OP_BACKREF_N_IC); +    CASE_OP(BACKREF_N_IC)        GET_MEMNUM_INC(mem, p);        {          int len; @@ -3330,13 +3383,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          STRING_CMP_IC(case_fold_flag, pstart, &s, n);          while (sprev + (len = enclen(encode, sprev)) < s)            sprev += len; - -        SOP_OUT; -        continue;        } -      break; +      JUMP_OUT; -    case OP_BACKREF_MULTI:  SOP_IN(OP_BACKREF_MULTI); +    CASE_OP(BACKREF_MULTI)        {          int len, is_fail;          UChar *pstart, *pend, *swork; @@ -3370,12 +3420,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            break; /* success */          }          if (i == tlen) goto fail; -        SOP_OUT; -        continue;        } -      break; +      JUMP_OUT; -    case OP_BACKREF_MULTI_IC:  SOP_IN(OP_BACKREF_MULTI_IC); +    CASE_OP(BACKREF_MULTI_IC)        {          int len, is_fail;          UChar *pstart, *pend, *swork; @@ -3409,13 +3457,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            break; /* success */          }          if (i == tlen) goto fail; -        SOP_OUT; -        continue;        } -      break; +      JUMP_OUT;  #ifdef USE_BACKREF_WITH_LEVEL -    case OP_BACKREF_WITH_LEVEL: +    CASE_OP(BACKREF_WITH_LEVEL)        {          int len;          OnigOptionType ic; @@ -3436,14 +3482,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          }          else            goto fail; - -        SOP_OUT; -        continue;        } -      break; +      JUMP_OUT;  #endif -    case OP_BACKREF_CHECK:  SOP_IN(OP_BACKREF_CHECK); +    CASE_OP(BACKREF_CHECK)        {          GET_LENGTH_INC(tlen, p);          for (i = 0; i < tlen; i++) { @@ -3456,13 +3499,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            break; /* success */          }          if (i == tlen) goto fail; -        SOP_OUT; -        continue;        } -      break; +      JUMP_OUT;  #ifdef USE_BACKREF_WITH_LEVEL -    case OP_BACKREF_CHECK_WITH_LEVEL: +    CASE_OP(BACKREF_CHECK_WITH_LEVEL)        {          LengthType level; @@ -3475,21 +3516,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          }          else            goto fail; - -        SOP_OUT; -        continue;        } -      break; +      JUMP_OUT;  #endif -    case OP_EMPTY_CHECK_START:  SOP_IN(OP_EMPTY_CHECK_START); +    CASE_OP(EMPTY_CHECK_START)        GET_MEMNUM_INC(mem, p);    /* mem: null check id */        STACK_PUSH_EMPTY_CHECK_START(mem, s); -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_EMPTY_CHECK_END:  SOP_IN(OP_EMPTY_CHECK_END); +    CASE_OP(EMPTY_CHECK_END)        {          int is_empty; @@ -3518,12 +3554,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            }          }        } -      SOP_OUT; -      continue; -      break; +      JUMP_OUT;  #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT -    case OP_EMPTY_CHECK_END_MEMST:  SOP_IN(OP_EMPTY_CHECK_END_MEMST); +    CASE_OP(EMPTY_CHECK_END_MEMST)        {          int is_empty; @@ -3537,14 +3571,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            goto empty_check_found;          }        } -      SOP_OUT; -      continue; -      break; +      JUMP_OUT;  #endif  #ifdef USE_CALL -    case OP_EMPTY_CHECK_END_MEMST_PUSH: -      SOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH); +    CASE_OP(EMPTY_CHECK_END_MEMST_PUSH)        {          int is_empty; @@ -3566,68 +3597,51 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            STACK_PUSH_EMPTY_CHECK_END(mem);          }        } -      SOP_OUT; -      continue; -      break; +      JUMP_OUT;  #endif -    case OP_JUMP:  SOP_IN(OP_JUMP); +    CASE_OP(JUMP)        GET_RELADDR_INC(addr, p);        p += addr; -      SOP_OUT; -      CHECK_INTERRUPT_IN_MATCH; -      continue; -      break; +      CHECK_INTERRUPT_JUMP_OUT; -    case OP_PUSH:  SOP_IN(OP_PUSH); +    CASE_OP(PUSH)        GET_RELADDR_INC(addr, p);        STACK_PUSH_ALT(p + addr, s, sprev); -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_PUSH_SUPER:  SOP_IN(OP_PUSH_SUPER); +    CASE_OP(PUSH_SUPER)        GET_RELADDR_INC(addr, p);        STACK_PUSH_SUPER_ALT(p + addr, s, sprev); -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_POP_OUT:  SOP_IN(OP_POP_OUT); +    CASE_OP(POP_OUT)        STACK_POP_ONE;        /* for stop backtrack */        /* CHECK_RETRY_LIMIT_IN_MATCH; */ -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_PUSH_OR_JUMP_EXACT1:  SOP_IN(OP_PUSH_OR_JUMP_EXACT1); +    CASE_OP(PUSH_OR_JUMP_EXACT1)        GET_RELADDR_INC(addr, p);        if (*p == *s && DATA_ENSURE_CHECK1) {          p++;          STACK_PUSH_ALT(p + addr, s, sprev); -        SOP_OUT; -        continue; +        JUMP_OUT;        }        p += (addr + 1); -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_PUSH_IF_PEEK_NEXT:  SOP_IN(OP_PUSH_IF_PEEK_NEXT); +    CASE_OP(PUSH_IF_PEEK_NEXT)        GET_RELADDR_INC(addr, p);        if (*p == *s) {          p++;          STACK_PUSH_ALT(p + addr, s, sprev); -        SOP_OUT; -        continue; +        JUMP_OUT;        }        p++; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_REPEAT:  SOP_IN(OP_REPEAT); +    CASE_OP(REPEAT)        {          GET_MEMNUM_INC(mem, p);    /* mem: OP_REPEAT ID */          GET_RELADDR_INC(addr, p); @@ -3640,11 +3654,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            STACK_PUSH_ALT(p + addr, s, sprev);          }        } -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_REPEAT_NG:  SOP_IN(OP_REPEAT_NG); +    CASE_OP(REPEAT_NG)        {          GET_MEMNUM_INC(mem, p);    /* mem: OP_REPEAT ID */          GET_RELADDR_INC(addr, p); @@ -3658,11 +3670,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            p += addr;          }        } -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_REPEAT_INC:  SOP_IN(OP_REPEAT_INC); +    CASE_OP(REPEAT_INC)        GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */        si = repeat_stk[mem];        stkp = STACK_AT(si); @@ -3680,19 +3690,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          p = stkp->u.repeat.pcode;        }        STACK_PUSH_REPEAT_INC(si); -      SOP_OUT; -      CHECK_INTERRUPT_IN_MATCH; -      continue; -      break; +      CHECK_INTERRUPT_JUMP_OUT; -    case OP_REPEAT_INC_SG:  SOP_IN(OP_REPEAT_INC_SG); +    CASE_OP(REPEAT_INC_SG)        GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */        STACK_GET_REPEAT(mem, stkp);        si = GET_STACK_INDEX(stkp);        goto repeat_inc; -      break; -    case OP_REPEAT_INC_NG:  SOP_IN(OP_REPEAT_INC_NG); +    CASE_OP(REPEAT_INC_NG)        GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */        si = repeat_stk[mem];        stkp = STACK_AT(si); @@ -3714,68 +3720,51 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,        else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {          STACK_PUSH_REPEAT_INC(si);        } -      SOP_OUT; -      CHECK_INTERRUPT_IN_MATCH; -      continue; -      break; +      CHECK_INTERRUPT_JUMP_OUT; -    case OP_REPEAT_INC_NG_SG:  SOP_IN(OP_REPEAT_INC_NG_SG); +    CASE_OP(REPEAT_INC_NG_SG)        GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */        STACK_GET_REPEAT(mem, stkp);        si = GET_STACK_INDEX(stkp);        goto repeat_inc_ng; -      break; -    case OP_PREC_READ_START:  SOP_IN(OP_PREC_READ_START); +    CASE_OP(PREC_READ_START)        STACK_PUSH_POS(s, sprev); -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_PREC_READ_END:  SOP_IN(OP_PREC_READ_END); +    CASE_OP(PREC_READ_END)        {          STACK_EXEC_TO_VOID(stkp);          s     = stkp->u.state.pstr;          sprev = stkp->u.state.pstr_prev;        } -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_PREC_READ_NOT_START:  SOP_IN(OP_PREC_READ_NOT_START); +    CASE_OP(PREC_READ_NOT_START)        GET_RELADDR_INC(addr, p);        STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev); -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_PREC_READ_NOT_END:  SOP_IN(OP_PREC_READ_NOT_END); +    CASE_OP(PREC_READ_NOT_END)        STACK_POP_TIL_ALT_PREC_READ_NOT;        goto fail; -      break; -    case OP_ATOMIC_START:  SOP_IN(OP_ATOMIC_START); +    CASE_OP(ATOMIC_START)        STACK_PUSH_TO_VOID_START; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_ATOMIC_END:  SOP_IN(OP_ATOMIC_END); +    CASE_OP(ATOMIC_END)        STACK_EXEC_TO_VOID(stkp); -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_LOOK_BEHIND:  SOP_IN(OP_LOOK_BEHIND); +    CASE_OP(LOOK_BEHIND)        GET_LENGTH_INC(tlen, p);        s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);        if (IS_NULL(s)) goto fail;        sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_LOOK_BEHIND_NOT_START:  SOP_IN(OP_LOOK_BEHIND_NOT_START); +    CASE_OP(LOOK_BEHIND_NOT_START)        GET_RELADDR_INC(addr, p);        GET_LENGTH_INC(tlen, p);        q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); @@ -3790,33 +3779,26 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,          s = q;          sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);        } -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_LOOK_BEHIND_NOT_END:  SOP_IN(OP_LOOK_BEHIND_NOT_END); +    CASE_OP(LOOK_BEHIND_NOT_END)        STACK_POP_TIL_ALT_LOOK_BEHIND_NOT;        goto fail; -      break;  #ifdef USE_CALL -    case OP_CALL:  SOP_IN(OP_CALL); +    CASE_OP(CALL)        GET_ABSADDR_INC(addr, p);        STACK_PUSH_CALL_FRAME(p);        p = reg->p + addr; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_RETURN:  SOP_IN(OP_RETURN); +    CASE_OP(RETURN)        STACK_RETURN(p);        STACK_PUSH_RETURN; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT;  #endif -    case OP_PUSH_SAVE_VAL: SOP_IN(OP_PUSH_SAVE_VAL); +    CASE_OP(PUSH_SAVE_VAL)        {          SaveType type;          GET_SAVE_TYPE_INC(type, p); @@ -3835,11 +3817,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            break;          }        } -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    case OP_UPDATE_VAR: SOP_IN(OP_UPDATE_VAR); +    CASE_OP(UPDATE_VAR)        {          UpdateVarType type;          enum SaveType save_type; @@ -3867,20 +3847,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            break;          }        } -      SOP_OUT; -      continue; -      break; +      JUMP_OUT;  #ifdef USE_CALLOUT -    case OP_CALLOUT_CONTENTS: SOP_IN(OP_CALLOUT_CONTENTS); +    CASE_OP(CALLOUT_CONTENTS)        of = ONIG_CALLOUT_OF_CONTENTS;        goto callout_common_entry; +      BREAK_OUT; -      SOP_OUT; -      continue; -      break; - -    case OP_CALLOUT_NAME: SOP_IN(OP_CALLOUT_NAME); +    CASE_OP(CALLOUT_NAME)        {          int call_result;          int name_id; @@ -3941,34 +3916,34 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,            }          }        } -      SOP_OUT; -      continue; -      break; +      JUMP_OUT;  #endif -    case OP_FINISH: +    CASE_OP(FINISH)        goto finish; -      break; +#ifdef ONIG_DEBUG_STATISTICS      fail:        SOP_OUT; -      /* fall */ -    case OP_FAIL:  SOP_IN(OP_FAIL); +      goto fail2; +#endif +    CASE_OP(FAIL) +#ifdef ONIG_DEBUG_STATISTICS +    fail2: +#else +    fail: +#endif        STACK_POP;        p     = stk->u.state.pcode;        s     = stk->u.state.pstr;        sprev = stk->u.state.pstr_prev;        CHECK_RETRY_LIMIT_IN_MATCH; -      SOP_OUT; -      continue; -      break; +      JUMP_OUT; -    default: +    DEFAULT_OP        goto bytecode_error; -    } /* end of switch */ -    sprev = sbegin; -  } /* end of while(1) */ +  } BYTECODE_INTERPRETER_END;   finish:    STACK_SAVE; @@ -4130,150 +4105,143 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,    return (UChar* )NULL;  } +  static UChar* -bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, -                 const UChar* text, const UChar* text_end, -                 const UChar* text_range) +sunday_quick_search_step_forward(regex_t* reg, +                                 const UChar* target, const UChar* target_end, +                                 const UChar* text, const UChar* text_end, +                                 const UChar* text_range)  {    const UChar *s, *se, *t, *p, *end;    const UChar *tail;    int skip, tlen1; +  int map_offset; +  OnigEncoding enc;  #ifdef ONIG_DEBUG_SEARCH -  fprintf(stderr, "bm_search_notrev: text: %p, text_end: %p, text_range: %p\n", -          text, text_end, text_range); +  fprintf(stderr, +          "sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range);  #endif +  enc = reg->enc; +    tail = target_end - 1;    tlen1 = (int )(tail - target);    end = text_range;    if (end + tlen1 > text_end)      end = text_end - tlen1; +  map_offset = reg->map_offset;    s = text; -  if (IS_NULL(reg->int_map)) { -    while (s < end) { -      p = se = s + tlen1; -      t = tail; -      while (*p == *t) { -        if (t == target) return (UChar* )s; -        p--; t--; -      } -      skip = reg->map[*se]; -      t = s; -      do { -        s += enclen(reg->enc, s); -      } while ((s - t) < skip && s < end); -    } -  } -  else { -    while (s < end) { -      p = se = s + tlen1; -      t = tail; -      while (*p == *t) { -        if (t == target) return (UChar* )s; -        p--; t--; -      } -      skip = reg->int_map[*se]; -      t = s; -      do { -        s += enclen(reg->enc, s); -      } while ((s - t) < skip && s < end); +  while (s < end) { +    p = se = s + tlen1; +    t = tail; +    while (*p == *t) { +      if (t == target) return (UChar* )s; +      p--; t--;      } +    if (se + map_offset >= text_end) break; +    skip = reg->map[*(se + map_offset)]; +#if 0 +    t = s; +    do { +      s += enclen(enc, s); +    } while ((s - t) < skip && s < end); +#else +    s += skip; +    if (s < end) +      s = onigenc_get_right_adjust_char_head(enc, text, s); +#endif    }    return (UChar* )NULL;  }  static UChar* -bm_search(regex_t* reg, const UChar* target, const UChar* target_end, -          const UChar* text, const UChar* text_end, const UChar* text_range) +sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end, +                    const UChar* text, const UChar* text_end, +                    const UChar* text_range)  {    const UChar *s, *t, *p, *end;    const UChar *tail; +  int map_offset; -  end = text_range + (target_end - target) - 1; +  end = text_range + (target_end - target);    if (end > text_end)      end = text_end; +  map_offset = reg->map_offset;    tail = target_end - 1; -  s = text + (target_end - target) - 1; -  if (IS_NULL(reg->int_map)) { -    while (s < end) { -      p = s; -      t = tail; -      while (*p == *t) { -        if (t == target) return (UChar* )p; -        p--; t--; -      } -      s += reg->map[*s]; -    } -  } -  else { /* see int_map[] */ -    while (s < end) { -      p = s; -      t = tail; -      while (*p == *t) { -        if (t == target) return (UChar* )p; -        p--; t--; -      } -      s += reg->int_map[*s]; +  s = text + (tail - target); + +  while (s < end) { +    p = s; +    t = tail; +    while (*p == *t) { +      if (t == target) return (UChar* )p; +      p--; t--;      } +    if (s + map_offset >= text_end) break; +    s += reg->map[*(s + map_offset)];    } +    return (UChar* )NULL;  } -#ifdef USE_INT_MAP_BACKWARD -static int -set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, int** skip) +static UChar* +sunday_quick_search_case_fold(regex_t* reg, +                              const UChar* target, const UChar* target_end, +                              const UChar* text,   const UChar* text_end, +                              const UChar* text_range)  { -  int i, len; - -  if (IS_NULL(*skip)) { -    *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); -    if (IS_NULL(*skip)) return ONIGERR_MEMORY; -  } - -  len = end - s; -  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) -    (*skip)[i] = len; +  const UChar *s, *se, *end; +  const UChar *tail; +  int skip, tlen1; +  int map_offset; +  int case_fold_flag; +  OnigEncoding enc; -  for (i = len - 1; i > 0; i--) -    (*skip)[s[i]] = i; +#ifdef ONIG_DEBUG_SEARCH +  fprintf(stderr, +          "sunday_quick_search_case_fold: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range); +#endif -  return 0; -} +  enc = reg->enc; +  case_fold_flag = reg->case_fold_flag; -static UChar* -bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, -                   const UChar* text, const UChar* adjust_text, -                   const UChar* text_end, const UChar* text_start) -{ -  const UChar *s, *t, *p; +  tail = target_end - 1; +  tlen1 = (int )(tail - target); +  end = text_range; +  if (end + tlen1 > text_end) +    end = text_end - tlen1; -  s = text_end - (target_end - target); -  if (text_start < s) -    s = text_start; -  else -    s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); +  map_offset = reg->map_offset; +  s = text; -  while (s >= text) { -    p = s; -    t = target; -    while (t < target_end && *p == *t) { -      p++; t++; -    } -    if (t == target_end) +  while (s < end) { +    if (str_lower_case_match(enc, case_fold_flag, target, target_end, +                             s, text_end))        return (UChar* )s; -    s -= reg->int_map_backward[*s]; -    s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); +    se = s + tlen1; +    if (se + map_offset >= text_end) break; +    skip = reg->map[*(se + map_offset)]; +#if 0 +    p = s; +    do { +      s += enclen(enc, s); +    } while ((s - p) < skip && s < end); +#else +    /* This is faster than prev code for long text.  ex: /(?i)Twain/  */ +    s += skip; +    if (s < end) +      s = onigenc_get_right_adjust_char_head(enc, text, s); +#endif    }    return (UChar* )NULL;  } -#endif  static UChar*  map_search(OnigEncoding enc, UChar map[], @@ -4380,20 +4348,26 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,   retry:    switch (reg->optimize) { -  case OPTIMIZE_EXACT: +  case OPTIMIZE_STR:      p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);      break; -  case OPTIMIZE_EXACT_IC: +  case OPTIMIZE_STR_CASE_FOLD:      p = slow_search_ic(reg->enc, reg->case_fold_flag,                         reg->exact, reg->exact_end, p, end, range);      break; -  case OPTIMIZE_EXACT_BM: -    p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); +  case OPTIMIZE_STR_CASE_FOLD_FAST: +    p = sunday_quick_search_case_fold(reg, reg->exact, reg->exact_end, p, end, +                                      range); +    break; + +  case OPTIMIZE_STR_FAST: +    p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);      break; -  case OPTIMIZE_EXACT_BM_NO_REV: -    p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); +  case OPTIMIZE_STR_FAST_STEP_FORWARD: +    p = sunday_quick_search_step_forward(reg, reg->exact, reg->exact_end, +                                         p, end, range);      break;    case OPTIMIZE_MAP: @@ -4413,7 +4387,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,        UChar* prev;        switch (reg->sub_anchor) { -      case ANCHOR_BEGIN_LINE: +      case ANCR_BEGIN_LINE:          if (!ON_STR_BEGIN(p)) {            prev = onigenc_get_prev_char_head(reg->enc,                                              (pprev ? pprev : str), p); @@ -4422,7 +4396,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,          }          break; -      case ANCHOR_END_LINE: +      case ANCR_END_LINE:          if (ON_STR_END(p)) {  #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE            prev = (UChar* )onigenc_get_prev_char_head(reg->enc, @@ -4490,8 +4464,6 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,  } -#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD   100 -  static int  backward_search_range(regex_t* reg, const UChar* str, const UChar* end,                        UChar* s, const UChar* range, UChar* adjrange, @@ -4499,41 +4471,29 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,  {    UChar *p; +  if (range == 0) goto fail; +    range += reg->dmin;    p = s;   retry:    switch (reg->optimize) { -  case OPTIMIZE_EXACT: +  case OPTIMIZE_STR:    exact_method:      p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,                               range, adjrange, end, p);      break; -  case OPTIMIZE_EXACT_IC: +  case OPTIMIZE_STR_CASE_FOLD: +  case OPTIMIZE_STR_CASE_FOLD_FAST:      p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,                                  reg->exact, reg->exact_end,                                  range, adjrange, end, p);      break; -  case OPTIMIZE_EXACT_BM: -  case OPTIMIZE_EXACT_BM_NO_REV: -#ifdef USE_INT_MAP_BACKWARD -    if (IS_NULL(reg->int_map_backward)) { -      int r; - -      if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) -        goto exact_method; - -      r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, -                               &(reg->int_map_backward)); -      if (r != 0) return r; -    } -    p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, -                           end, p); -#else +  case OPTIMIZE_STR_FAST: +  case OPTIMIZE_STR_FAST_STEP_FORWARD:      goto exact_method; -#endif      break;    case OPTIMIZE_MAP: @@ -4546,17 +4506,17 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,        UChar* prev;        switch (reg->sub_anchor) { -      case ANCHOR_BEGIN_LINE: +      case ANCR_BEGIN_LINE:          if (!ON_STR_BEGIN(p)) {            prev = onigenc_get_prev_char_head(reg->enc, str, p); -          if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { +          if (IS_NOT_NULL(prev) && !ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {              p = prev;              goto retry;            }          }          break; -      case ANCHOR_END_LINE: +      case ANCR_END_LINE:          if (ON_STR_END(p)) {  #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE            prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); @@ -4682,7 +4642,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,    if (reg->anchor != 0 && str < end) {      UChar *min_semi_end, *max_semi_end; -    if (reg->anchor & ANCHOR_BEGIN_POSITION) { +    if (reg->anchor & ANCR_BEGIN_POSITION) {        /* search start-position only */      begin_position:        if (range > start) @@ -4690,7 +4650,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,        else          range = start;      } -    else if (reg->anchor & ANCHOR_BEGIN_BUF) { +    else if (reg->anchor & ANCR_BEGIN_BUF) {        /* search str-position only */        if (range > start) {          if (start != str) goto mismatch_no_msa; @@ -4705,7 +4665,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,            goto mismatch_no_msa;        }      } -    else if (reg->anchor & ANCHOR_END_BUF) { +    else if (reg->anchor & ANCR_END_BUF) {        min_semi_end = max_semi_end = (UChar* )end;      end_buf: @@ -4737,7 +4697,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,          if (range > start) goto mismatch_no_msa;        }      } -    else if (reg->anchor & ANCHOR_SEMI_END_BUF) { +    else if (reg->anchor & ANCR_SEMI_END_BUF) {        UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);        max_semi_end = (UChar* )end; @@ -4760,7 +4720,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,          goto end_buf;        }      } -    else if ((reg->anchor & ANCHOR_ANYCHAR_INF_ML)) { +    else if ((reg->anchor & ANCR_ANYCHAR_INF_ML)) {        goto begin_position;      }    } @@ -4833,13 +4793,13 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,          if (! forward_search_range(reg, str, end, s, sch_range,                                     &low, &high, (UChar** )NULL)) goto mismatch; -        if ((reg->anchor & ANCHOR_ANYCHAR_INF) != 0) { +        if ((reg->anchor & ANCR_ANYCHAR_INF) != 0) {            do {              MATCH_AND_RETURN_CHECK(orig_range);              prev = s;              s += enclen(reg->enc, s); -            if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) { +            if ((reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) {                while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {                  prev = s;                  s += enclen(reg->enc, s); @@ -4862,6 +4822,8 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,      }    }    else {  /* backward search */ +    if (range < str) goto mismatch; +      if (orig_start < end)        orig_start += enclen(reg->enc, orig_start); /* is upper range */ diff --git a/src/regint.h b/src/regint.h index c3d1ee1..d6aec9d 100644 --- a/src/regint.h +++ b/src/regint.h @@ -62,7 +62,6 @@  #define USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT    /* /(?:()|())*\2/ */  #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE     /* /\n$/ =~ "\n" */  #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR -  #define USE_RETRY_LIMIT_IN_MATCH  /* internal config */ @@ -70,27 +69,13 @@  #define USE_QUANT_PEEK_NEXT  #define USE_ST_LIBRARY -#include "regenc.h" - -#ifdef __cplusplus -# ifndef  HAVE_STDARG_PROTOTYPES -#  define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */ -#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4 -# ifndef  HAVE_STDARG_PROTOTYPES -#  define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -#ifdef HAVE_STDARG_H -# ifndef  HAVE_STDARG_PROTOTYPES -#  define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif +#define USE_WORD_BEGIN_END        /* "\<", "\>" */ +#define USE_CAPTURE_HISTORY +#define USE_VARIABLE_META_CHARS +#define USE_POSIX_API_REGION_OPTION +#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#include "regenc.h"  #define INIT_MATCH_STACK_SIZE                     160  #define DEFAULT_MATCH_STACK_LIMIT_SIZE              0 /* unlimited */ @@ -103,12 +88,6 @@  #undef ONIG_ESCAPE_UCHAR_COLLISION  #endif -#define USE_WORD_BEGIN_END        /* "\<", "\>" */ -#define USE_CAPTURE_HISTORY -#define USE_VARIABLE_META_CHARS -#define USE_POSIX_API_REGION_OPTION -#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -  #define xmalloc     malloc  #define xrealloc    realloc  #define xcalloc     calloc @@ -152,14 +131,8 @@  #include <stddef.h> - -#ifdef HAVE_LIMITS_H  #include <limits.h> -#endif - -#ifdef HAVE_STDLIB_H  #include <stdlib.h> -#endif  #ifdef HAVE_STDINT_H  #include <stdint.h> @@ -169,11 +142,7 @@  #include <alloca.h>  #endif -#ifdef HAVE_STRING_H -# include <string.h> -#else -# include <strings.h> -#endif +#include <string.h>  #include <ctype.h>  #ifdef HAVE_SYS_TYPES_H @@ -217,6 +186,7 @@ typedef unsigned int  uintptr_t;  #define CHECK_NULL_RETURN_MEMERR(p)   if (IS_NULL(p)) return ONIGERR_MEMORY  #define NULL_UCHARP                   ((UChar* )0) +#define CHAR_MAP_SIZE       256  #define INFINITE_LEN        ONIG_INFINITE_DISTANCE  #ifdef PLATFORM_UNALIGNED_WORD_ACCESS @@ -292,9 +262,6 @@ typedef struct {  #endif  } RegexExt; -#define REG_EXTP(reg)      ((RegexExt* )((reg)->chain)) -#define REG_EXTPL(reg)     ((reg)->chain) -  struct re_pattern_buffer {    /* common members of BBuf(bytes-buffer) */    unsigned char* p;         /* compiled pattern */ @@ -304,7 +271,6 @@ struct re_pattern_buffer {    int num_mem;                   /* used memory(...) num counted from 1 */    int num_repeat;                /* OP_REPEAT/OP_REPEAT_NG id-counter */    int num_null_check;            /* OP_EMPTY_CHECK_START/END id counter */ -  int num_comb_exp_check;        /* no longer used (combination explosion check) */    int num_call;                  /* number of subexp call */    unsigned int capture_history;  /* (?@...) flag (1-31) */    unsigned int bt_mem_start;     /* need backtrack flag */ @@ -323,19 +289,16 @@ struct re_pattern_buffer {    int            optimize;          /* optimize flag */    int            threshold_len;     /* search str-length for apply optimize */    int            anchor;            /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ -  OnigLen   anchor_dmin;       /* (SEMI_)END_BUF anchor distance */ -  OnigLen   anchor_dmax;       /* (SEMI_)END_BUF anchor distance */ +  OnigLen        anchor_dmin;       /* (SEMI_)END_BUF anchor distance */ +  OnigLen        anchor_dmax;       /* (SEMI_)END_BUF anchor distance */    int            sub_anchor;        /* start-anchor for exact or map */    unsigned char *exact;    unsigned char *exact_end; -  unsigned char  map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ -  int           *int_map;                   /* BM skip for exact_len > 255 */ -  int           *int_map_backward;          /* BM skip for backward search */ -  OnigLen   dmin;                      /* min-distance of exact or map */ -  OnigLen   dmax;                      /* max-distance of exact or map */ - -  /* regex_t link chain */ -  struct re_pattern_buffer* chain;  /* escape compile-conflict */ +  unsigned char  map[CHAR_MAP_SIZE]; /* used as BMH skip or char-map */ +  int            map_offset; +  OnigLen        dmin;                      /* min-distance of exact or map */ +  OnigLen        dmax;                      /* max-distance of exact or map */ +  RegexExt*      extp;  }; @@ -348,12 +311,13 @@ enum StackPopLevel {  /* optimize flags */  enum OptimizeType { -  OPTIMIZE_NONE            = 0, -  OPTIMIZE_EXACT           = 1,  /* Slow Search */ -  OPTIMIZE_EXACT_BM        = 2,  /* Boyer Moore Search */ -  OPTIMIZE_EXACT_BM_NO_REV = 3,  /* BM   (but not simple match) */ -  OPTIMIZE_EXACT_IC        = 4,  /* Slow Search (ignore case) */ -  OPTIMIZE_MAP             = 5   /* char map */ +  OPTIMIZE_NONE = 0, +  OPTIMIZE_STR,                   /* Slow Search */ +  OPTIMIZE_STR_FAST,              /* Sunday quick search / BMH */ +  OPTIMIZE_STR_FAST_STEP_FORWARD, /* Sunday quick search / BMH */ +  OPTIMIZE_STR_CASE_FOLD_FAST,    /* Sunday quick search / BMH (ignore case) */ +  OPTIMIZE_STR_CASE_FOLD,         /* Slow Search (ignore case) */ +  OPTIMIZE_MAP                    /* char map */  };  /* bit status */ @@ -541,32 +505,32 @@ typedef struct _BBuf {  /* has body */ -#define ANCHOR_PREC_READ        (1<<0) -#define ANCHOR_PREC_READ_NOT    (1<<1) -#define ANCHOR_LOOK_BEHIND      (1<<2) -#define ANCHOR_LOOK_BEHIND_NOT  (1<<3) +#define ANCR_PREC_READ        (1<<0) +#define ANCR_PREC_READ_NOT    (1<<1) +#define ANCR_LOOK_BEHIND      (1<<2) +#define ANCR_LOOK_BEHIND_NOT  (1<<3)  /* no body */ -#define ANCHOR_BEGIN_BUF        (1<<4) -#define ANCHOR_BEGIN_LINE       (1<<5) -#define ANCHOR_BEGIN_POSITION   (1<<6) -#define ANCHOR_END_BUF          (1<<7) -#define ANCHOR_SEMI_END_BUF     (1<<8) -#define ANCHOR_END_LINE         (1<<9) -#define ANCHOR_WORD_BOUNDARY    (1<<10) -#define ANCHOR_NO_WORD_BOUNDARY (1<<11) -#define ANCHOR_WORD_BEGIN       (1<<12) -#define ANCHOR_WORD_END         (1<<13) -#define ANCHOR_ANYCHAR_INF      (1<<14) -#define ANCHOR_ANYCHAR_INF_ML   (1<<15) -#define ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY    (1<<16) -#define ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<17) - - -#define ANCHOR_HAS_BODY(a)      ((a)->type < ANCHOR_BEGIN_BUF) +#define ANCR_BEGIN_BUF        (1<<4) +#define ANCR_BEGIN_LINE       (1<<5) +#define ANCR_BEGIN_POSITION   (1<<6) +#define ANCR_END_BUF          (1<<7) +#define ANCR_SEMI_END_BUF     (1<<8) +#define ANCR_END_LINE         (1<<9) +#define ANCR_WORD_BOUNDARY    (1<<10) +#define ANCR_NO_WORD_BOUNDARY (1<<11) +#define ANCR_WORD_BEGIN       (1<<12) +#define ANCR_WORD_END         (1<<13) +#define ANCR_ANYCHAR_INF      (1<<14) +#define ANCR_ANYCHAR_INF_ML   (1<<15) +#define ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY    (1<<16) +#define ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<17) + + +#define ANCHOR_HAS_BODY(a)      ((a)->type < ANCR_BEGIN_BUF)  #define IS_WORD_ANCHOR_TYPE(type) \ -  ((type) == ANCHOR_WORD_BOUNDARY || (type) == ANCHOR_NO_WORD_BOUNDARY || \ -   (type) == ANCHOR_WORD_BEGIN || (type) == ANCHOR_WORD_END) +  ((type) == ANCR_WORD_BOUNDARY || (type) == ANCR_NO_WORD_BOUNDARY || \ +   (type) == ANCR_WORD_BEGIN || (type) == ANCR_WORD_END)  /* operation code */  enum OpCode { @@ -851,6 +815,7 @@ extern void   onig_transfer P_((regex_t* to, regex_t* from));  extern int    onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc));  extern RegexExt* onig_get_regex_ext(regex_t* reg);  extern int    onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end); +extern int    onig_positive_int_multiply(int x, int y);  #ifdef USE_CALLOUT diff --git a/src/regparse.c b/src/regparse.c index fcc05cf..9e42e71 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -71,7 +71,7 @@ OnigSyntaxType OnigSyntaxOniguruma = {        ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |        ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |        ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 ) -  , ( SYN_GNU_REGEX_BV |  +  , ( SYN_GNU_REGEX_BV |        ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |        ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |        ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | @@ -113,7 +113,7 @@ OnigSyntaxType OnigSyntaxRuby = {        ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |        ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |        ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 ) -  , ( SYN_GNU_REGEX_BV |  +  , ( SYN_GNU_REGEX_BV |        ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |        ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |        ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | @@ -198,17 +198,6 @@ onig_set_parse_depth_limit(unsigned int depth)    return 0;  } -static int -positive_int_multiply(int x, int y) -{ -  if (x == 0 || y == 0) return 0; - -  if (x < INT_MAX / y) -    return x * y; -  else -    return -1; -} -  static void  bbuf_free(BBuf* bbuf)  { @@ -966,6 +955,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)  #ifdef USE_ST_LIBRARY      if (IS_NULL(t)) {        t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM); +      CHECK_NULL_RETURN_MEMERR(t);        reg->name_table = (void* )t;      }      e = (NameEntry* )xmalloc(sizeof(NameEntry)); @@ -1372,6 +1362,7 @@ callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,  #ifdef USE_ST_LIBRARY      if (IS_NULL(t)) {        t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM); +      CHECK_NULL_RETURN_MEMERR(t);        GlobalCalloutNameTable = t;      }      e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry)); @@ -1571,6 +1562,7 @@ onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,    }    for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {      if (fe->arg_types[i] == ONIG_TYPE_STRING) { +      if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT;        OnigValue* val = opt_defaults + j;        UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end);        CHECK_NULL_RETURN_MEMERR(ds); @@ -1616,6 +1608,7 @@ onig_get_callout_start_func(regex_t* reg, int callout_num)    CalloutListEntry* e;    e = onig_reg_callout_list_at(reg, callout_num); +  CHECK_NULL_RETURN(e);    return e->start_func;  } @@ -1623,6 +1616,7 @@ extern const UChar*  onig_get_callout_tag_start(regex_t* reg, int callout_num)  {    CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num); +  CHECK_NULL_RETURN(e);    return e->tag_start;  } @@ -1630,6 +1624,7 @@ extern const UChar*  onig_get_callout_tag_end(regex_t* reg, int callout_num)  {    CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num); +  CHECK_NULL_RETURN(e);    return e->tag_end;  } @@ -1736,7 +1731,7 @@ setup_ext_callout_list_values(regex_t* reg)    int i, j;    RegexExt* ext; -  ext = REG_EXTP(reg); +  ext = reg->extp;    if (IS_NOT_NULL(ext->tag_table)) {      onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,                      (st_data_t )ext); @@ -1766,13 +1761,13 @@ setup_ext_callout_list_values(regex_t* reg)  extern int  onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)  { -  RegexExt* ext = REG_EXTP(reg); +  RegexExt* ext = reg->extp;    if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;    if (callout_num > ext->callout_num) return 0;    return (ext->callout_list[callout_num].flag & -          CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0  ? 1 : 0; +          CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0;  }  static int @@ -1814,7 +1809,7 @@ onig_get_callout_num_by_tag(regex_t* reg,    RegexExt* ext;    CalloutTagVal e; -  ext = REG_EXTP(reg); +  ext = reg->extp;    if (IS_NULL(ext) || IS_NULL(ext->tag_table))      return ONIGERR_INVALID_CALLOUT_TAG_NAME; @@ -1901,9 +1896,11 @@ callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,    if (r != ONIG_NORMAL) return r;    ext = onig_get_regex_ext(reg); +  CHECK_NULL_RETURN_MEMERR(ext);    r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val);    e = onig_reg_callout_list_at(reg, (int )entry_val); +  CHECK_NULL_RETURN_MEMERR(e);    e->tag_start = name;    e->tag_end   = name_end; @@ -2008,7 +2005,7 @@ onig_node_free(Node* node)    switch (NODE_TYPE(node)) {    case NODE_STRING: -    if (STR_(node)->capa != 0 && +    if (STR_(node)->capacity != 0 &&          IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {        xfree(STR_(node)->s);      } @@ -2040,13 +2037,13 @@ onig_node_free(Node* node)        xfree(BACKREF_(node)->back_dynamic);      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      if (NODE_BODY(node))        onig_node_free(NODE_BODY(node));      { -      EnclosureNode* en = ENCLOSURE_(node); -      if (en->type == ENCLOSURE_IF_ELSE) { +      BagNode* en = BAG_(node); +      if (en->type == BAG_IF_ELSE) {          onig_node_free(en->te.Then);          onig_node_free(en->te.Else);        } @@ -2082,6 +2079,7 @@ node_new(void)    Node* node;    node = (Node* )xmalloc(sizeof(Node)); +  CHECK_NULL_RETURN(node);    xmemset(node, 0, sizeof(*node));  #ifdef DEBUG_NODE_FREE @@ -2138,6 +2136,8 @@ node_new_anychar_with_fixed_option(OnigOptionType option)    Node* node;    node = node_new_anychar(); +  CHECK_NULL_RETURN(node); +    ct = CTYPE_(node);    ct->options = option;    NODE_STATUS_ADD(node, FIXED_OPTION); @@ -2381,62 +2381,62 @@ node_new_quantifier(int lower, int upper, int by_number)  }  static Node* -node_new_enclosure(enum EnclosureType type) +node_new_bag(enum BagType type)  {    Node* node = node_new();    CHECK_NULL_RETURN(node); -  NODE_SET_TYPE(node, NODE_ENCLOSURE); -  ENCLOSURE_(node)->type = type; +  NODE_SET_TYPE(node, NODE_BAG); +  BAG_(node)->type = type;    switch (type) { -  case ENCLOSURE_MEMORY: -    ENCLOSURE_(node)->m.regnum       =  0; -    ENCLOSURE_(node)->m.called_addr  = -1; -    ENCLOSURE_(node)->m.entry_count  =  1; -    ENCLOSURE_(node)->m.called_state =  0; +  case BAG_MEMORY: +    BAG_(node)->m.regnum       =  0; +    BAG_(node)->m.called_addr  = -1; +    BAG_(node)->m.entry_count  =  1; +    BAG_(node)->m.called_state =  0;      break; -  case ENCLOSURE_OPTION: -    ENCLOSURE_(node)->o.options =  0; +  case BAG_OPTION: +    BAG_(node)->o.options =  0;      break; -  case ENCLOSURE_STOP_BACKTRACK: +  case BAG_STOP_BACKTRACK:      break; -  case ENCLOSURE_IF_ELSE: -    ENCLOSURE_(node)->te.Then = 0; -    ENCLOSURE_(node)->te.Else = 0; +  case BAG_IF_ELSE: +    BAG_(node)->te.Then = 0; +    BAG_(node)->te.Else = 0;      break;    } -  ENCLOSURE_(node)->opt_count = 0; +  BAG_(node)->opt_count = 0;    return node;  }  extern Node* -onig_node_new_enclosure(int type) +onig_node_new_bag(enum BagType type)  { -  return node_new_enclosure(type); +  return node_new_bag(type);  }  static Node* -node_new_enclosure_if_else(Node* cond, Node* Then, Node* Else) +node_new_bag_if_else(Node* cond, Node* Then, Node* Else)  {    Node* n; -  n = node_new_enclosure(ENCLOSURE_IF_ELSE); +  n = node_new_bag(BAG_IF_ELSE);    CHECK_NULL_RETURN(n);    NODE_BODY(n) = cond; -  ENCLOSURE_(n)->te.Then = Then; -  ENCLOSURE_(n)->te.Else = Else; +  BAG_(n)->te.Then = Then; +  BAG_(n)->te.Else = Else;    return n;  }  static Node*  node_new_memory(int is_named)  { -  Node* node = node_new_enclosure(ENCLOSURE_MEMORY); +  Node* node = node_new_bag(BAG_MEMORY);    CHECK_NULL_RETURN(node);    if (is_named != 0)      NODE_STATUS_ADD(node, NAMED_GROUP); @@ -2447,12 +2447,37 @@ node_new_memory(int is_named)  static Node*  node_new_option(OnigOptionType option)  { -  Node* node = node_new_enclosure(ENCLOSURE_OPTION); +  Node* node = node_new_bag(BAG_OPTION); +  CHECK_NULL_RETURN(node); +  BAG_(node)->o.options = option; +  return node; +} + +static Node* +node_new_group(Node* content) +{ +  Node* node; + +  node = node_new();    CHECK_NULL_RETURN(node); -  ENCLOSURE_(node)->o.options = option; +  NODE_SET_TYPE(node, NODE_LIST); +  NODE_CAR(node) = content; +  NODE_CDR(node) = NULL_NODE; +    return node;  } +static Node* +node_drop_group(Node* group) +{ +  Node* content; + +  content = NODE_CAR(group); +  NODE_CAR(group) = NULL_NODE; +  onig_node_free(group); +  return content; +} +  static int  node_new_fail(Node** node, ScanEnv* env)  { @@ -2543,7 +2568,7 @@ onig_free_reg_callout_list(int n, CalloutListEntry* list)  extern CalloutListEntry*  onig_reg_callout_list_at(regex_t* reg, int num)  { -  RegexExt* ext = REG_EXTP(reg); +  RegexExt* ext = reg->extp;    CHECK_NULL_RETURN(ext);    if (num <= 0 || num > ext->callout_num) @@ -2634,7 +2659,7 @@ make_extended_grapheme_cluster(Node** node, ScanEnv* env)    ns[1] = NULL_NODE;    r = ONIGERR_MEMORY; -  ns[0] = onig_node_new_anchor(ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0); +  ns[0] = onig_node_new_anchor(ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0);    if (IS_NULL(ns[0])) goto err;    r = node_new_true_anychar(&ns[1], env); @@ -2661,7 +2686,7 @@ make_extended_grapheme_cluster(Node** node, ScanEnv* env)    ns[0] = x;    ns[1] = NULL_NODE; -  x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); +  x = node_new_bag(BAG_STOP_BACKTRACK);    if (IS_NULL(x)) goto err;    NODE_BODY(x) = ns[0]; @@ -2721,7 +2746,7 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent,    ns[0] = x;    if (possessive != 0) { -    x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); +    x = node_new_bag(BAG_STOP_BACKTRACK);      if (IS_NULL(x)) goto err0;      NODE_BODY(x) = ns[0]; @@ -2873,11 +2898,11 @@ is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,      quant = node;    }    else { -    if (NODE_TYPE(node) == NODE_ENCLOSURE) { -      EnclosureNode* en = ENCLOSURE_(node); -      if (en->type == ENCLOSURE_STOP_BACKTRACK) { +    if (NODE_TYPE(node) == NODE_BAG) { +      BagNode* en = BAG_(node); +      if (en->type == BAG_STOP_BACKTRACK) {          *is_possessive = 1; -        quant = NODE_ENCLOSURE_BODY(en); +        quant = NODE_BAG_BODY(en);          if (NODE_TYPE(quant) != NODE_QUANT)            return 0;        } @@ -3054,7 +3079,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,    else {      r = make_absent_tail(&ns[5], &ns[6], id1, env);      if (r != 0) goto err; -   +      x = make_list(7, ns);      if (IS_NULL(x)) goto err0;    } @@ -3066,7 +3091,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,    r = ONIGERR_MEMORY;   err:    for (i = 0; i < 7; i++) onig_node_free(ns[i]); -  return r;   +  return r;  }  extern int @@ -3077,11 +3102,11 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end)    if (addlen > 0) {      int len  = (int )(STR_(node)->end - STR_(node)->s); -    if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) { +    if (STR_(node)->capacity > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {        UChar* p;        int capa = len + addlen + NODE_STRING_MARGIN; -      if (capa <= STR_(node)->capa) { +      if (capa <= STR_(node)->capacity) {          onig_strcpy(STR_(node)->s + len, s, end);        }        else { @@ -3092,8 +3117,8 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end)            p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa);          CHECK_NULL_RETURN_MEMERR(p); -        STR_(node)->s    = p; -        STR_(node)->capa = capa; +        STR_(node)->s        = p; +        STR_(node)->capacity = capa;        }      }      else { @@ -3125,24 +3150,24 @@ extern void  onig_node_conv_to_str_node(Node* node, int flag)  {    NODE_SET_TYPE(node, NODE_STRING); -  STR_(node)->flag = flag; -  STR_(node)->capa = 0; -  STR_(node)->s    = STR_(node)->buf; -  STR_(node)->end  = STR_(node)->buf; +  STR_(node)->flag     = flag; +  STR_(node)->capacity = 0; +  STR_(node)->s        = STR_(node)->buf; +  STR_(node)->end      = STR_(node)->buf;  }  extern void  onig_node_str_clear(Node* node)  { -  if (STR_(node)->capa != 0 && +  if (STR_(node)->capacity != 0 &&        IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {      xfree(STR_(node)->s);    } -  STR_(node)->capa = 0; -  STR_(node)->flag = 0; -  STR_(node)->s    = STR_(node)->buf; -  STR_(node)->end  = STR_(node)->buf; +  STR_(node)->capacity = 0; +  STR_(node)->flag     = 0; +  STR_(node)->s        = STR_(node)->buf; +  STR_(node)->end      = STR_(node)->buf;  }  static Node* @@ -3152,10 +3177,10 @@ node_new_str(const UChar* s, const UChar* end)    CHECK_NULL_RETURN(node);    NODE_SET_TYPE(node, NODE_STRING); -  STR_(node)->capa = 0; -  STR_(node)->flag = 0; -  STR_(node)->s    = STR_(node)->buf; -  STR_(node)->end  = STR_(node)->buf; +  STR_(node)->capacity = 0; +  STR_(node)->flag     = 0; +  STR_(node)->s        = STR_(node)->buf; +  STR_(node)->end      = STR_(node)->buf;    if (onig_node_str_cat(node, s, end)) {      onig_node_free(node);      return NULL; @@ -3173,6 +3198,7 @@ static Node*  node_new_str_raw(UChar* s, UChar* end)  {    Node* node = node_new_str(s, end); +  CHECK_NULL_RETURN(node);    NODE_STRING_SET_RAW(node);    return node;  } @@ -3205,6 +3231,7 @@ str_node_split_last_char(Node* node, OnigEncoding enc)      p = onigenc_get_prev_char_head(enc, sn->s, sn->end);      if (p && p > sn->s) { /* can be split. */        rn = node_new_str(p, sn->end); +      CHECK_NULL_RETURN(rn);        if (NODE_STRING_IS_RAW(node))          NODE_STRING_SET_RAW(rn); @@ -3795,7 +3822,7 @@ is_invalid_quantifier_target(Node* node)      return 1;      break; -  case NODE_ENCLOSURE: +  case NODE_BAG:      /* allow enclosed elements */      /* return is_invalid_quantifier_target(NODE_BODY(node)); */      break; @@ -3877,7 +3904,7 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)    if (pnum < 0 || cnum < 0) {      if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) {        if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) { -        int n = positive_int_multiply(p->lower, c->lower); +        int n = onig_positive_int_multiply(p->lower, c->lower);          if (n >= 0) {            p->lower = p->upper = n;            NODE_BODY(pnode) = NODE_BODY(cnode); @@ -3972,7 +3999,7 @@ node_new_general_newline(Node** node, ScanEnv* env)      if (r != 0) goto err1;    } -  x = node_new_enclosure_if_else(crnl, 0, ncc); +  x = node_new_bag_if_else(crnl, 0, ncc);    if (IS_NULL(x)) goto err1;    *node = x; @@ -4552,7 +4579,7 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,    OnigCodePoint x;    UChar *q;    UChar *p = from; -   +    while (p < to) {      x = ONIGENC_MBC_TO_CODE(enc, p, to);      q = p + enclen(enc, p); @@ -4701,12 +4728,12 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)            IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {          PINC;          tok->type = TK_CHAR_PROPERTY; -        tok->u.prop.not = (c == 'P' ? 1 : 0); +        tok->u.prop.not = c == 'P';          if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {            PFETCH(c2);            if (c2 == '^') { -            tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); +            tok->u.prop.not = tok->u.prop.not == 0;            }            else              PUNFETCH; @@ -4986,38 +5013,38 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)      case 'b':        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;        tok->type = TK_ANCHOR; -      tok->u.anchor = ANCHOR_WORD_BOUNDARY; +      tok->u.anchor = ANCR_WORD_BOUNDARY;        break;      case 'B':        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;        tok->type = TK_ANCHOR; -      tok->u.anchor = ANCHOR_NO_WORD_BOUNDARY; +      tok->u.anchor = ANCR_NO_WORD_BOUNDARY;        break;      case 'y':        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;        tok->type = TK_ANCHOR; -      tok->u.anchor = ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY; +      tok->u.anchor = ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;        break;      case 'Y':        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;        tok->type = TK_ANCHOR; -      tok->u.anchor = ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY; +      tok->u.anchor = ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;        break;  #ifdef USE_WORD_BEGIN_END      case '<':        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;        tok->type = TK_ANCHOR; -      tok->u.anchor = ANCHOR_WORD_BEGIN; +      tok->u.anchor = ANCR_WORD_BEGIN;        break;      case '>':        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;        tok->type = TK_ANCHOR; -      tok->u.anchor = ANCHOR_WORD_END; +      tok->u.anchor = ANCR_WORD_END;        break;  #endif @@ -5092,26 +5119,26 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;      begin_buf:        tok->type = TK_ANCHOR; -      tok->u.subtype = ANCHOR_BEGIN_BUF; +      tok->u.subtype = ANCR_BEGIN_BUF;        break;      case 'Z':        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;        tok->type = TK_ANCHOR; -      tok->u.subtype = ANCHOR_SEMI_END_BUF; +      tok->u.subtype = ANCR_SEMI_END_BUF;        break;      case 'z':        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;      end_buf:        tok->type = TK_ANCHOR; -      tok->u.subtype = ANCHOR_END_BUF; +      tok->u.subtype = ANCR_END_BUF;        break;      case 'G':        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;        tok->type = TK_ANCHOR; -      tok->u.subtype = ANCHOR_BEGIN_POSITION; +      tok->u.subtype = ANCR_BEGIN_POSITION;        break;      case '`': @@ -5214,7 +5241,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)          goto skip_backref;        } -      if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&  +      if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&            (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */          if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {            if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node)) @@ -5382,13 +5409,13 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)            IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {          PINC;          tok->type = TK_CHAR_PROPERTY; -        tok->u.prop.not = (c == 'P' ? 1 : 0); +        tok->u.prop.not = c == 'P';          if (!PEND &&              IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {            PFETCH(c);            if (c == '^') { -            tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); +            tok->u.prop.not = tok->u.prop.not == 0;            }            else              PUNFETCH; @@ -5606,14 +5633,14 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;        tok->type = TK_ANCHOR;        tok->u.subtype = (IS_SINGLELINE(env->options) -                        ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); +                        ? ANCR_BEGIN_BUF : ANCR_BEGIN_LINE);        break;      case '$':        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;        tok->type = TK_ANCHOR;        tok->u.subtype = (IS_SINGLELINE(env->options) -                        ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); +                        ? ANCR_SEMI_END_BUF : ANCR_END_LINE);        break;      case '[': @@ -6509,7 +6536,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* en  }  static int parse_subexp(Node** top, OnigToken* tok, int term, -                        UChar** src, UChar* end, ScanEnv* env); +                        UChar** src, UChar* end, ScanEnv* env, int group_head);  #ifdef USE_CALLOUT @@ -6605,6 +6632,7 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv    if (r != 0) return r;    ext = onig_get_regex_ext(env->reg); +  CHECK_NULL_RETURN_MEMERR(ext);    if (IS_NULL(ext->pattern)) {      r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);      if (r != ONIG_NORMAL) return r; @@ -6625,6 +6653,11 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv    }    e = onig_reg_callout_list_at(env->reg, num); +  if (IS_NULL(e)) { +    xfree(contents); +    return ONIGERR_MEMORY; +  } +    e->of      = ONIG_CALLOUT_OF_CONTENTS;    e->in      = in;    e->name_id = ONIG_NON_NAME_ID; @@ -6920,6 +6953,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en    if (r != 0) return r;    ext = onig_get_regex_ext(env->reg); +  CHECK_NULL_RETURN_MEMERR(ext);    if (IS_NULL(ext->pattern)) {      r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);      if (r != ONIG_NORMAL) return r; @@ -6934,6 +6968,8 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en    if (r != ONIG_NORMAL) return r;    e = onig_reg_callout_list_at(env->reg, num); +  CHECK_NULL_RETURN_MEMERR(e); +    e->of         = ONIG_CALLOUT_OF_NAME;    e->in         = in;    e->name_id    = name_id; @@ -6957,8 +6993,8 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en  #endif  static int -parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, -                ScanEnv* env) +parse_bag(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, +          ScanEnv* env)  {    int r, num;    Node *target; @@ -6985,20 +7021,20 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,      group:        r = fetch_token(tok, &p, end, env);        if (r < 0) return r; -      r = parse_subexp(np, tok, term, &p, end, env); +      r = parse_subexp(np, tok, term, &p, end, env, 0);        if (r < 0) return r;        *src = p;        return 1; /* group */        break;      case '=': -      *np = onig_node_new_anchor(ANCHOR_PREC_READ, 0); +      *np = onig_node_new_anchor(ANCR_PREC_READ, 0);        break;      case '!':  /*         preceding read */ -      *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT, 0); +      *np = onig_node_new_anchor(ANCR_PREC_READ_NOT, 0);        break;      case '>':            /* (?>...) stop backtrack */ -      *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); +      *np = node_new_bag(BAG_STOP_BACKTRACK);        break;      case '\'': @@ -7013,9 +7049,9 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,        if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;        PFETCH(c);        if (c == '=') -        *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND, 0); +        *np = onig_node_new_anchor(ANCR_LOOK_BEHIND, 0);        else if (c == '!') -        *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT, 0); +        *np = onig_node_new_anchor(ANCR_LOOK_BEHIND_NOT, 0);        else {          if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {            UChar *name; @@ -7043,7 +7079,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,            if (r != 0) return r;            *np = node_new_memory(1);            CHECK_NULL_RETURN_MEMERR(*np); -          ENCLOSURE_(*np)->m.regnum = num; +          BAG_(*np)->m.regnum = num;            if (list_capture != 0)              MEM_STATUS_ON_SIMPLE(env->capture_history, num);            env->num_named++; @@ -7080,7 +7116,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,          r = fetch_token(tok, &p, end, env);          if (r < 0) return r; -        r = parse_subexp(&absent, tok, term, &p, end, env); +        r = parse_subexp(&absent, tok, term, &p, end, env, 1);          if (r < 0) {            onig_node_free(absent);            return r; @@ -7258,7 +7294,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,            condition_is_checker = 0;            r = fetch_token(tok, &p, end, env);            if (r < 0) return r; -          r = parse_subexp(&condition, tok, term, &p, end, env); +          r = parse_subexp(&condition, tok, term, &p, end, env, 0);            if (r < 0) {              onig_node_free(condition);              return r; @@ -7299,7 +7335,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,              onig_node_free(condition);              return r;            } -          r = parse_subexp(&target, tok, term, &p, end, env); +          r = parse_subexp(&target, tok, term, &p, end, env, 1);            if (r < 0) {              onig_node_free(condition);              onig_node_free(target); @@ -7327,7 +7363,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,              }            } -          *np = node_new_enclosure_if_else(condition, Then, Else); +          *np = node_new_bag_if_else(condition, Then, Else);            if (IS_NULL(*np)) {              onig_node_free(condition);              onig_node_free(Then); @@ -7362,7 +7398,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,          else if (num >= (int )MEM_STATUS_BITS_NUM) {            return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;          } -        ENCLOSURE_(*np)->m.regnum = num; +        BAG_(*np)->m.regnum = num;          MEM_STATUS_ON_SIMPLE(env->capture_history, num);        }        else { @@ -7431,7 +7467,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,              env->options = option;              r = fetch_token(tok, &p, end, env);              if (r < 0) return r; -            r = parse_subexp(&target, tok, term, &p, end, env); +            r = parse_subexp(&target, tok, term, &p, end, env, 0);              env->options = prev;              if (r < 0) {                onig_node_free(target); @@ -7472,13 +7508,13 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,      CHECK_NULL_RETURN_MEMERR(*np);      num = scan_env_add_mem_entry(env);      if (num < 0) return num; -    ENCLOSURE_(*np)->m.regnum = num; +    BAG_(*np)->m.regnum = num;    }    CHECK_NULL_RETURN_MEMERR(*np);    r = fetch_token(tok, &p, end, env);    if (r < 0) return r; -  r = parse_subexp(&target, tok, term, &p, end, env); +  r = parse_subexp(&target, tok, term, &p, end, env, 0);    if (r < 0) {      onig_node_free(target);      return r; @@ -7486,10 +7522,10 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,    NODE_BODY(*np) = target; -  if (NODE_TYPE(*np) == NODE_ENCLOSURE) { -    if (ENCLOSURE_(*np)->type == ENCLOSURE_MEMORY) { +  if (NODE_TYPE(*np) == NODE_BAG) { +    if (BAG_(*np)->type == BAG_MEMORY) {        /* Don't move this to previous of parse_subexp() */ -      r = scan_env_set_mem_node(env, ENCLOSURE_(*np)->m.regnum, *np); +      r = scan_env_set_mem_node(env, BAG_(*np)->m.regnum, *np);        if (r != 0) return r;      }    } @@ -7518,7 +7554,7 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)    switch (NODE_TYPE(target)) {    case NODE_STRING: -    if (! group) { +    if (group == 0) {        if (str_node_can_be_split(target, env->enc)) {          Node* n = str_node_split_last_char(target, env->enc);          if (IS_NOT_NULL(n)) { @@ -7710,7 +7746,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)  static int  parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, -          ScanEnv* env) +          ScanEnv* env, int group_head)  {    int r, len, group = 0;    Node* qn; @@ -7724,22 +7760,35 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,    case TK_ALT:    case TK_EOT:    end_of_token: -  *np = node_new_empty(); -  return tok->type; +    *np = node_new_empty(); +    CHECK_NULL_RETURN_MEMERR(*np); +    return tok->type;    break;    case TK_SUBEXP_OPEN: -    r = parse_enclosure(np, tok, TK_SUBEXP_CLOSE, src, end, env); +    r = parse_bag(np, tok, TK_SUBEXP_CLOSE, src, end, env);      if (r < 0) return r; -    if (r == 1) group = 1; +    if (r == 1) { /* group */ +      if (group_head == 0) +        group = 1; +      else { +        Node* target = *np; +        *np = node_new_group(target); +        if (IS_NULL(*np)) { +          onig_node_free(target); +          return ONIGERR_MEMORY; +        } +        group = 2; +      } +    }      else if (r == 2) { /* option only */        Node* target;        OnigOptionType prev = env->options; -      env->options = ENCLOSURE_(*np)->o.options; +      env->options = BAG_(*np)->o.options;        r = fetch_token(tok, src, end, env);        if (r < 0) return r; -      r = parse_subexp(&target, tok, term, src, end, env); +      r = parse_subexp(&target, tok, term, src, end, env, 0);        env->options = prev;        if (r < 0) {          onig_node_free(target); @@ -7968,6 +8017,7 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,        int ascii_mode =          IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0;        *np = onig_node_new_anchor(tok->u.anchor, ascii_mode); +      CHECK_NULL_RETURN_MEMERR(*np);      }      break; @@ -7976,8 +8026,10 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,      if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {        if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))          return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED; -      else +      else {          *np = node_new_empty(); +        CHECK_NULL_RETURN_MEMERR(*np); +      }      }      else {        goto tk_byte; @@ -8023,14 +8075,23 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,    repeat:      if (r == TK_OP_REPEAT || r == TK_INTERVAL) { +      Node* target; +        if (is_invalid_quantifier_target(*targetp))          return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;        qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, -                               (r == TK_INTERVAL ? 1 : 0)); +                               r == TK_INTERVAL);        CHECK_NULL_RETURN_MEMERR(qn);        QUANT_(qn)->greedy = tok->u.repeat.greedy; -      r = set_quantifier(qn, *targetp, group, env); +      if (group == 2) { +        target = node_drop_group(*np); +        *np = NULL_NODE; +      } +      else { +        target = *targetp; +      } +      r = set_quantifier(qn, target, group, env);        if (r < 0) {          onig_node_free(qn);          return r; @@ -8038,7 +8099,7 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,        if (tok->u.repeat.possessive != 0) {          Node* en; -        en = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); +        en = node_new_bag(BAG_STOP_BACKTRACK);          if (IS_NULL(en)) {            onig_node_free(qn);            return ONIGERR_MEMORY; @@ -8077,13 +8138,13 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,  static int  parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, -             ScanEnv* env) +             ScanEnv* env, int group_head)  {    int r;    Node *node, **headp;    *top = NULL; -  r = parse_exp(&node, tok, term, src, end, env); +  r = parse_exp(&node, tok, term, src, end, env, group_head);    if (r < 0) {      onig_node_free(node);      return r; @@ -8094,9 +8155,14 @@ parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,    }    else {      *top  = node_new_list(node, NULL); +    if (IS_NULL(*top)) { +      onig_node_free(node); +      return ONIGERR_MEMORY; +    } +      headp = &(NODE_CDR(*top));      while (r != TK_EOT && r != term && r != TK_ALT) { -      r = parse_exp(&node, tok, term, src, end, env); +      r = parse_exp(&node, tok, term, src, end, env, 0);        if (r < 0) {          onig_node_free(node);          return r; @@ -8120,7 +8186,7 @@ parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,  /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */  static int  parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, -             ScanEnv* env) +             ScanEnv* env, int group_head)  {    int r;    Node *node, **headp; @@ -8129,7 +8195,8 @@ parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,    env->parse_depth++;    if (env->parse_depth > ParseDepthLimit)      return ONIGERR_PARSE_DEPTH_LIMIT_OVER; -  r = parse_branch(&node, tok, term, src, end, env); + +  r = parse_branch(&node, tok, term, src, end, env, group_head);    if (r < 0) {      onig_node_free(node);      return r; @@ -8140,16 +8207,27 @@ parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,    }    else if (r == TK_ALT) {      *top  = onig_node_new_alt(node, NULL); +    if (IS_NULL(*top)) { +      onig_node_free(node); +      return ONIGERR_MEMORY; +    } +      headp = &(NODE_CDR(*top));      while (r == TK_ALT) {        r = fetch_token(tok, src, end, env);        if (r < 0) return r; -      r = parse_branch(&node, tok, term, src, end, env); +      r = parse_branch(&node, tok, term, src, end, env, 0);        if (r < 0) {          onig_node_free(node);          return r;        }        *headp = onig_node_new_alt(node, NULL); +      if (IS_NULL(*headp)) { +        onig_node_free(node); +        onig_node_free(*top); +        return ONIGERR_MEMORY; +      } +        headp = &(NODE_CDR(*headp));      } @@ -8177,7 +8255,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)    r = fetch_token(&tok, src, end, env);    if (r < 0) return r; -  r = parse_subexp(top, &tok, TK_EOT, src, end, env); +  r = parse_subexp(top, &tok, TK_EOT, src, end, env, 0);    if (r < 0) return r;    return 0; @@ -8193,7 +8271,7 @@ make_call_zero_body(Node* node, ScanEnv* env, Node** rnode)    CHECK_NULL_RETURN_MEMERR(x);    NODE_BODY(x) = node; -  ENCLOSURE_(x)->m.regnum = 0; +  BAG_(x)->m.regnum = 0;    r = scan_env_set_mem_node(env, 0, x);    if (r != 0) {      onig_node_free(x); @@ -8249,7 +8327,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,    reg->num_mem = env->num_mem;  #ifdef USE_CALLOUT -  ext = REG_EXTP(reg); +  ext = reg->extp;    if (IS_NOT_NULL(ext) && ext->callout_num > 0) {      r = setup_ext_callout_list_values(reg);    } diff --git a/src/regparse.h b/src/regparse.h index ff24eeb..ede9bb8 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -31,6 +31,10 @@  #include "regint.h" +#define NODE_STRING_MARGIN         16 +#define NODE_STRING_BUF_SIZE       24  /* sizeof(CClassNode) - sizeof(int)*4 */ +#define NODE_BACKREFS_SIZE          6 +  /* node type */  typedef enum {    NODE_STRING    =  0, @@ -38,7 +42,7 @@ typedef enum {    NODE_CTYPE     =  2,    NODE_BACKREF   =  3,    NODE_QUANT     =  4, -  NODE_ENCLOSURE =  5, +  NODE_BAG       =  5,    NODE_ANCHOR    =  6,    NODE_LIST      =  7,    NODE_ALT       =  8, @@ -46,95 +50,23 @@ typedef enum {    NODE_GIMMICK   = 10  } NodeType; +enum BagType { +  BAG_MEMORY         = 0, +  BAG_OPTION         = 1, +  BAG_STOP_BACKTRACK = 2, +  BAG_IF_ELSE        = 3, +}; +  enum GimmickType { -  GIMMICK_FAIL = 0, -  GIMMICK_KEEP = 1, -  GIMMICK_SAVE = 2, +  GIMMICK_FAIL       = 0, +  GIMMICK_KEEP       = 1, +  GIMMICK_SAVE       = 2,    GIMMICK_UPDATE_VAR = 3,  #ifdef USE_CALLOUT -  GIMMICK_CALLOUT = 4, +  GIMMICK_CALLOUT    = 4,  #endif  }; - -/* node type bit */ -#define NODE_TYPE2BIT(type)      (1<<(type)) - -#define NODE_BIT_STRING     NODE_TYPE2BIT(NODE_STRING) -#define NODE_BIT_CCLASS     NODE_TYPE2BIT(NODE_CCLASS) -#define NODE_BIT_CTYPE      NODE_TYPE2BIT(NODE_CTYPE) -#define NODE_BIT_BACKREF    NODE_TYPE2BIT(NODE_BACKREF) -#define NODE_BIT_QUANT      NODE_TYPE2BIT(NODE_QUANT) -#define NODE_BIT_ENCLOSURE  NODE_TYPE2BIT(NODE_ENCLOSURE) -#define NODE_BIT_ANCHOR     NODE_TYPE2BIT(NODE_ANCHOR) -#define NODE_BIT_LIST       NODE_TYPE2BIT(NODE_LIST) -#define NODE_BIT_ALT        NODE_TYPE2BIT(NODE_ALT) -#define NODE_BIT_CALL       NODE_TYPE2BIT(NODE_CALL) -#define NODE_BIT_GIMMICK    NODE_TYPE2BIT(NODE_GIMMICK) - -#define NODE_IS_SIMPLE_TYPE(node) \ -  ((NODE_TYPE2BIT(NODE_TYPE(node)) & \ -    (NODE_BIT_STRING | NODE_BIT_CCLASS | NODE_BIT_CTYPE | NODE_BIT_BACKREF)) != 0) - -#define NODE_TYPE(node)             ((node)->u.base.node_type) -#define NODE_SET_TYPE(node, ntype)   (node)->u.base.node_type = (ntype) - -#define STR_(node)         (&((node)->u.str)) -#define CCLASS_(node)      (&((node)->u.cclass)) -#define CTYPE_(node)       (&((node)->u.ctype)) -#define BACKREF_(node)     (&((node)->u.backref)) -#define QUANT_(node)       (&((node)->u.quant)) -#define ENCLOSURE_(node)   (&((node)->u.enclosure)) -#define ANCHOR_(node)      (&((node)->u.anchor)) -#define CONS_(node)        (&((node)->u.cons)) -#define CALL_(node)        (&((node)->u.call)) -#define GIMMICK_(node)     (&((node)->u.gimmick)) - -#define NODE_CAR(node)         (CONS_(node)->car) -#define NODE_CDR(node)         (CONS_(node)->cdr) - -#define CTYPE_ANYCHAR      -1 -#define NODE_IS_ANYCHAR(node) \ -  (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR) - -#define CTYPE_OPTION(node, reg) \ -  (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options) - - -#define ANCHOR_ANYCHAR_INF_MASK  (ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML) -#define ANCHOR_END_BUF_MASK      (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) - -enum EnclosureType { -  ENCLOSURE_MEMORY = 0, -  ENCLOSURE_OPTION = 1, -  ENCLOSURE_STOP_BACKTRACK = 2, -  ENCLOSURE_IF_ELSE = 3, -}; - -#define NODE_STRING_MARGIN         16 -#define NODE_STRING_BUF_SIZE       24  /* sizeof(CClassNode) - sizeof(int)*4 */ -#define NODE_BACKREFS_SIZE       6 - -#define NODE_STRING_RAW                (1<<0) /* by backslashed number */ -#define NODE_STRING_AMBIG              (1<<1) -#define NODE_STRING_DONT_GET_OPT_INFO  (1<<2) - -#define NODE_STRING_LEN(node)            (int )((node)->u.str.end - (node)->u.str.s) -#define NODE_STRING_SET_RAW(node)        (node)->u.str.flag |= NODE_STRING_RAW -#define NODE_STRING_CLEAR_RAW(node)      (node)->u.str.flag &= ~NODE_STRING_RAW -#define NODE_STRING_SET_AMBIG(node)      (node)->u.str.flag |= NODE_STRING_AMBIG -#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \ -  (node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO -#define NODE_STRING_IS_RAW(node) \ -  (((node)->u.str.flag & NODE_STRING_RAW) != 0) -#define NODE_STRING_IS_AMBIG(node) \ -  (((node)->u.str.flag & NODE_STRING_AMBIG) != 0) -#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \ -  (((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0) - -#define BACKREFS_P(br) \ -  (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static) -  enum QuantBodyEmpty {    QUANT_BODY_IS_NOT_EMPTY = 0,    QUANT_BODY_IS_EMPTY     = 1, @@ -142,65 +74,6 @@ enum QuantBodyEmpty {    QUANT_BODY_IS_EMPTY_REC = 3  }; -/* node status bits */ -#define NODE_ST_MIN_FIXED             (1<<0) -#define NODE_ST_MAX_FIXED             (1<<1) -#define NODE_ST_CLEN_FIXED            (1<<2) -#define NODE_ST_MARK1                 (1<<3) -#define NODE_ST_MARK2                 (1<<4) -#define NODE_ST_STOP_BT_SIMPLE_REPEAT (1<<5) -#define NODE_ST_RECURSION             (1<<6) -#define NODE_ST_CALLED                (1<<7) -#define NODE_ST_ADDR_FIXED            (1<<8) -#define NODE_ST_NAMED_GROUP           (1<<9) -#define NODE_ST_IN_REAL_REPEAT        (1<<10) /* STK_REPEAT is nested in stack. */ -#define NODE_ST_IN_ZERO_REPEAT        (1<<11) /* (....){0} */ -#define NODE_ST_IN_MULTI_ENTRY        (1<<12) -#define NODE_ST_NEST_LEVEL            (1<<13) -#define NODE_ST_BY_NUMBER             (1<<14) /* {n,m} */ -#define NODE_ST_BY_NAME               (1<<15) /* backref by name */ -#define NODE_ST_BACKREF               (1<<16) -#define NODE_ST_CHECKER               (1<<17) -#define NODE_ST_FIXED_OPTION          (1<<18) -#define NODE_ST_PROHIBIT_RECURSION    (1<<19) -#define NODE_ST_SUPER                 (1<<20) - - -#define NODE_STATUS(node)           (((Node* )node)->u.base.status) -#define NODE_STATUS_ADD(node,f)     (NODE_STATUS(node) |= (NODE_ST_ ## f)) -#define NODE_STATUS_REMOVE(node,f)  (NODE_STATUS(node) &= ~(NODE_ST_ ## f)) - -#define NODE_IS_BY_NUMBER(node)       ((NODE_STATUS(node) & NODE_ST_BY_NUMBER)      != 0) -#define NODE_IS_IN_REAL_REPEAT(node)  ((NODE_STATUS(node) & NODE_ST_IN_REAL_REPEAT) != 0) -#define NODE_IS_CALLED(node)          ((NODE_STATUS(node) & NODE_ST_CALLED)         != 0) -#define NODE_IS_IN_MULTI_ENTRY(node)  ((NODE_STATUS(node) & NODE_ST_IN_MULTI_ENTRY) != 0) -#define NODE_IS_RECURSION(node)       ((NODE_STATUS(node) & NODE_ST_RECURSION)      != 0) -#define NODE_IS_IN_ZERO_REPEAT(node)  ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0) -#define NODE_IS_NAMED_GROUP(node)     ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP)  != 0) -#define NODE_IS_ADDR_FIXED(node)      ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED)   != 0) -#define NODE_IS_CLEN_FIXED(node)      ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED)   != 0) -#define NODE_IS_MIN_FIXED(node)       ((NODE_STATUS(node) & NODE_ST_MIN_FIXED)    != 0) -#define NODE_IS_MAX_FIXED(node)       ((NODE_STATUS(node) & NODE_ST_MAX_FIXED)    != 0) -#define NODE_IS_MARK1(node)           ((NODE_STATUS(node) & NODE_ST_MARK1)        != 0) -#define NODE_IS_MARK2(node)           ((NODE_STATUS(node) & NODE_ST_MARK2)        != 0) -#define NODE_IS_NEST_LEVEL(node)      ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL)   != 0) -#define NODE_IS_BY_NAME(node)         ((NODE_STATUS(node) & NODE_ST_BY_NAME)      != 0) -#define NODE_IS_BACKREF(node)         ((NODE_STATUS(node) & NODE_ST_BACKREF)      != 0) -#define NODE_IS_CHECKER(node)         ((NODE_STATUS(node) & NODE_ST_CHECKER)      != 0) -#define NODE_IS_FIXED_OPTION(node)    ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0) -#define NODE_IS_SUPER(node)           ((NODE_STATUS(node) & NODE_ST_SUPER)        != 0) -#define NODE_IS_PROHIBIT_RECURSION(node) \ -    ((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0) -#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \ -    ((NODE_STATUS(node) & NODE_ST_STOP_BT_SIMPLE_REPEAT) != 0) - -#define NODE_BODY(node)           ((node)->u.base.body) -#define NODE_QUANT_BODY(node)      ((node)->body) -#define NODE_ENCLOSURE_BODY(node)   ((node)->body) -#define NODE_CALL_BODY(node)      ((node)->body) -#define NODE_ANCHOR_BODY(node)    ((node)->body) - -  typedef struct {    NodeType node_type;    int status; @@ -208,7 +81,7 @@ typedef struct {    UChar* s;    UChar* end;    unsigned int flag; -  int    capa;    /* (allocated size - 1) or 0: use buf[] */ +  int    capacity;    /* (allocated size - 1) or 0: use buf[] */    UChar  buf[NODE_STRING_BUF_SIZE];  } StrNode; @@ -240,7 +113,7 @@ typedef struct {    int status;    struct _Node* body; -  enum EnclosureType type; +  enum BagType type;    union {      struct {        int regnum; @@ -262,7 +135,7 @@ typedef struct {    OnigLen max_len;   /* max length (byte) */    int char_len;      /* character length  */    int opt_count;     /* referenced count in optimize_nodes() */ -} EnclosureNode; +} BagNode;  #ifdef USE_CALL @@ -280,7 +153,7 @@ typedef struct {  typedef struct {    NodeType node_type;    int status; -  struct _Node* body; /* to EnclosureNode : ENCLOSURE_MEMORY */ +  struct _Node* body; /* to BagNode : BAG_MEMORY */    int     by_number;    int     group_num; @@ -350,7 +223,7 @@ typedef struct _Node {      StrNode       str;      CClassNode    cclass;      QuantNode     quant; -    EnclosureNode enclosure; +    BagNode       bag;      BackRefNode   backref;      AnchorNode    anchor;      ConsAltNode   cons; @@ -362,9 +235,138 @@ typedef struct _Node {    } u;  } Node; -  #define NULL_NODE  ((Node* )0) + +/* node type bit */ +#define NODE_TYPE2BIT(type)      (1<<(type)) + +#define NODE_BIT_STRING     NODE_TYPE2BIT(NODE_STRING) +#define NODE_BIT_CCLASS     NODE_TYPE2BIT(NODE_CCLASS) +#define NODE_BIT_CTYPE      NODE_TYPE2BIT(NODE_CTYPE) +#define NODE_BIT_BACKREF    NODE_TYPE2BIT(NODE_BACKREF) +#define NODE_BIT_QUANT      NODE_TYPE2BIT(NODE_QUANT) +#define NODE_BIT_BAG        NODE_TYPE2BIT(NODE_BAG) +#define NODE_BIT_ANCHOR     NODE_TYPE2BIT(NODE_ANCHOR) +#define NODE_BIT_LIST       NODE_TYPE2BIT(NODE_LIST) +#define NODE_BIT_ALT        NODE_TYPE2BIT(NODE_ALT) +#define NODE_BIT_CALL       NODE_TYPE2BIT(NODE_CALL) +#define NODE_BIT_GIMMICK    NODE_TYPE2BIT(NODE_GIMMICK) + +#define NODE_IS_SIMPLE_TYPE(node) \ +  ((NODE_TYPE2BIT(NODE_TYPE(node)) & \ +    (NODE_BIT_STRING | NODE_BIT_CCLASS | NODE_BIT_CTYPE | NODE_BIT_BACKREF)) != 0) + +#define NODE_TYPE(node)             ((node)->u.base.node_type) +#define NODE_SET_TYPE(node, ntype)   (node)->u.base.node_type = (ntype) + +#define STR_(node)         (&((node)->u.str)) +#define CCLASS_(node)      (&((node)->u.cclass)) +#define CTYPE_(node)       (&((node)->u.ctype)) +#define BACKREF_(node)     (&((node)->u.backref)) +#define QUANT_(node)       (&((node)->u.quant)) +#define BAG_(node)         (&((node)->u.bag)) +#define ANCHOR_(node)      (&((node)->u.anchor)) +#define CONS_(node)        (&((node)->u.cons)) +#define CALL_(node)        (&((node)->u.call)) +#define GIMMICK_(node)     (&((node)->u.gimmick)) + +#define NODE_CAR(node)         (CONS_(node)->car) +#define NODE_CDR(node)         (CONS_(node)->cdr) + +#define CTYPE_ANYCHAR      -1 +#define NODE_IS_ANYCHAR(node) \ +  (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR) + +#define CTYPE_OPTION(node, reg) \ +  (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options) + + +#define ANCR_ANYCHAR_INF_MASK  (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML) +#define ANCR_END_BUF_MASK      (ANCR_END_BUF | ANCR_SEMI_END_BUF) + +#define NODE_STRING_RAW                (1<<0) /* by backslashed number */ +#define NODE_STRING_AMBIG              (1<<1) +#define NODE_STRING_GOOD_AMBIG         (1<<2) +#define NODE_STRING_DONT_GET_OPT_INFO  (1<<3) + +#define NODE_STRING_LEN(node)            (int )((node)->u.str.end - (node)->u.str.s) +#define NODE_STRING_SET_RAW(node)        (node)->u.str.flag |= NODE_STRING_RAW +#define NODE_STRING_CLEAR_RAW(node)      (node)->u.str.flag &= ~NODE_STRING_RAW +#define NODE_STRING_SET_AMBIG(node)      (node)->u.str.flag |= NODE_STRING_AMBIG +#define NODE_STRING_SET_GOOD_AMBIG(node) (node)->u.str.flag |= NODE_STRING_GOOD_AMBIG +#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \ +  (node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO +#define NODE_STRING_IS_RAW(node) \ +  (((node)->u.str.flag & NODE_STRING_RAW) != 0) +#define NODE_STRING_IS_AMBIG(node) \ +  (((node)->u.str.flag & NODE_STRING_AMBIG) != 0) +#define NODE_STRING_IS_GOOD_AMBIG(node) \ +  (((node)->u.str.flag & NODE_STRING_GOOD_AMBIG) != 0) +#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \ +  (((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0) + +#define BACKREFS_P(br) \ +  (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static) + +/* node status bits */ +#define NODE_ST_MIN_FIXED             (1<<0) +#define NODE_ST_MAX_FIXED             (1<<1) +#define NODE_ST_CLEN_FIXED            (1<<2) +#define NODE_ST_MARK1                 (1<<3) +#define NODE_ST_MARK2                 (1<<4) +#define NODE_ST_STOP_BT_SIMPLE_REPEAT (1<<5) +#define NODE_ST_RECURSION             (1<<6) +#define NODE_ST_CALLED                (1<<7) +#define NODE_ST_ADDR_FIXED            (1<<8) +#define NODE_ST_NAMED_GROUP           (1<<9) +#define NODE_ST_IN_REAL_REPEAT        (1<<10) /* STK_REPEAT is nested in stack. */ +#define NODE_ST_IN_ZERO_REPEAT        (1<<11) /* (....){0} */ +#define NODE_ST_IN_MULTI_ENTRY        (1<<12) +#define NODE_ST_NEST_LEVEL            (1<<13) +#define NODE_ST_BY_NUMBER             (1<<14) /* {n,m} */ +#define NODE_ST_BY_NAME               (1<<15) /* backref by name */ +#define NODE_ST_BACKREF               (1<<16) +#define NODE_ST_CHECKER               (1<<17) +#define NODE_ST_FIXED_OPTION          (1<<18) +#define NODE_ST_PROHIBIT_RECURSION    (1<<19) +#define NODE_ST_SUPER                 (1<<20) + + +#define NODE_STATUS(node)           (((Node* )node)->u.base.status) +#define NODE_STATUS_ADD(node,f)     (NODE_STATUS(node) |= (NODE_ST_ ## f)) +#define NODE_STATUS_REMOVE(node,f)  (NODE_STATUS(node) &= ~(NODE_ST_ ## f)) + +#define NODE_IS_BY_NUMBER(node)       ((NODE_STATUS(node) & NODE_ST_BY_NUMBER)      != 0) +#define NODE_IS_IN_REAL_REPEAT(node)  ((NODE_STATUS(node) & NODE_ST_IN_REAL_REPEAT) != 0) +#define NODE_IS_CALLED(node)          ((NODE_STATUS(node) & NODE_ST_CALLED)         != 0) +#define NODE_IS_IN_MULTI_ENTRY(node)  ((NODE_STATUS(node) & NODE_ST_IN_MULTI_ENTRY) != 0) +#define NODE_IS_RECURSION(node)       ((NODE_STATUS(node) & NODE_ST_RECURSION)      != 0) +#define NODE_IS_IN_ZERO_REPEAT(node)  ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0) +#define NODE_IS_NAMED_GROUP(node)     ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP)  != 0) +#define NODE_IS_ADDR_FIXED(node)      ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED)   != 0) +#define NODE_IS_CLEN_FIXED(node)      ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED)   != 0) +#define NODE_IS_MIN_FIXED(node)       ((NODE_STATUS(node) & NODE_ST_MIN_FIXED)    != 0) +#define NODE_IS_MAX_FIXED(node)       ((NODE_STATUS(node) & NODE_ST_MAX_FIXED)    != 0) +#define NODE_IS_MARK1(node)           ((NODE_STATUS(node) & NODE_ST_MARK1)        != 0) +#define NODE_IS_MARK2(node)           ((NODE_STATUS(node) & NODE_ST_MARK2)        != 0) +#define NODE_IS_NEST_LEVEL(node)      ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL)   != 0) +#define NODE_IS_BY_NAME(node)         ((NODE_STATUS(node) & NODE_ST_BY_NAME)      != 0) +#define NODE_IS_BACKREF(node)         ((NODE_STATUS(node) & NODE_ST_BACKREF)      != 0) +#define NODE_IS_CHECKER(node)         ((NODE_STATUS(node) & NODE_ST_CHECKER)      != 0) +#define NODE_IS_FIXED_OPTION(node)    ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0) +#define NODE_IS_SUPER(node)           ((NODE_STATUS(node) & NODE_ST_SUPER)        != 0) +#define NODE_IS_PROHIBIT_RECURSION(node) \ +    ((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0) +#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \ +    ((NODE_STATUS(node) & NODE_ST_STOP_BT_SIMPLE_REPEAT) != 0) + +#define NODE_BODY(node)           ((node)->u.base.body) +#define NODE_QUANT_BODY(node)     ((node)->body) +#define NODE_BAG_BODY(node)       ((node)->body) +#define NODE_CALL_BODY(node)      ((node)->body) +#define NODE_ANCHOR_BODY(node)    ((node)->body) +  #define SCANENV_MEMENV_SIZE               8  #define SCANENV_MEMENV(senv) \   (IS_NOT_NULL((senv)->mem_env_dynamic) ? \ @@ -434,7 +436,7 @@ extern void   onig_node_conv_to_str_node P_((Node* node, int raw));  extern int    onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));  extern int    onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));  extern void   onig_node_free P_((Node* node)); -extern Node*  onig_node_new_enclosure P_((int type)); +extern Node*  onig_node_new_bag P_((enum BagType type));  extern Node*  onig_node_new_anchor P_((int type, int ascii_mode));  extern Node*  onig_node_new_str P_((const UChar* s, const UChar* end));  extern Node*  onig_node_new_list P_((Node* left, Node* right)); diff --git a/src/regposerr.c b/src/regposerr.c index 2e2a8e2..c640a81 100644 --- a/src/regposerr.c +++ b/src/regposerr.c @@ -37,11 +37,7 @@  #include "config.h"  #include "onigposix.h" -#ifdef HAVE_STRING_H -# include <string.h> -#else -# include <strings.h> -#endif +#include <string.h>  #if defined(__GNUC__)  #  define ARG_UNUSED  __attribute__ ((unused)) diff --git a/src/regsyntax.c b/src/regsyntax.c index aa95479..98b815c 100644 --- a/src/regsyntax.c +++ b/src/regsyntax.c @@ -67,8 +67,8 @@ OnigSyntaxType OnigSyntaxPosixExtended = {      ONIG_SYN_OP_BRACE_INTERVAL |      ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )    , 0 -  , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |  -      ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |  +  , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | +      ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |        ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |        ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )    , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) @@ -113,10 +113,7 @@ static int  code_to_mbclen(OnigCodePoint code)  {    if (code < 256) { -    if (EncLen_SJIS[(int )code] == 1) -      return 1; -    else -      return 0; +    return EncLen_SJIS[(int )code] == 1;    }    else if (code <= 0xffff) {      return 2; @@ -188,7 +185,7 @@ is_mbc_ambiguous(OnigCaseFoldType flag,  		 const UChar** pp, const UChar* end)  {    return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end); -                                       +  }  #endif @@ -223,7 +220,7 @@ left_adjust_char_head(const UChar* start, const UChar* s)  	p++;  	break;        } -    }  +    }    }    len = enclen(ONIG_ENCODING_SJIS, p);    if (p + len > s) return (UChar* )p; @@ -338,6 +335,6 @@ OnigEncodingType OnigEncodingSJIS = {    NULL, /* init */    NULL, /* is_initialized */    is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1_OR_0,    0, 0  }; diff --git a/src/unicode.c b/src/unicode.c index 63bc65c..04944b9 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -657,8 +657,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,  #ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER    if (! ONIGENC_IS_UNICODE_ENCODING(enc)) { -    if (from == 0x000d && to == 0x000a) return 0; -    else return 1; +    return from != 0x000d || to != 0x000a;    }    btype = unicode_egcb_is_break_2code(from, to); @@ -701,8 +700,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,    return 1;  #else -  if (from == 0x000d && to == 0x000a) return 0; -  else return 1; +  return from != 0x000d || to != 0x000a;  #endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */  } @@ -729,6 +727,7 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)    int len;    int c;    char* s; +  UChar* uname;    if (UserDefinedPropertyNum >= USER_DEFINED_PROPERTY_MAX_NUM)      return ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS; @@ -741,10 +740,11 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)    if (s == 0)      return ONIGERR_MEMORY; +  uname = (UChar* )name;    n = 0;    for (i = 0; i < len; i++) { -    c = name[i]; -    if (c <= 0 || c >= 0x80) { +    c = uname[i]; +    if (c < 0x20 || c >= 0x80) {        xfree(s);        return ONIGERR_INVALID_CHAR_PROPERTY_NAME;      } @@ -758,6 +758,10 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)    if (UserDefinedPropertyTable == 0) {      UserDefinedPropertyTable = onig_st_init_strend_table_with_size(10); +    if (IS_NULL(UserDefinedPropertyTable)) { +      xfree(s); +      return ONIGERR_MEMORY; +    }    }    e = UserDefinedPropertyRanges + UserDefinedPropertyNum; diff --git a/src/unicode_fold1_key.c b/src/unicode_fold1_key.c index 0f4712a..07916b4 100644 --- a/src/unicode_fold1_key.c +++ b/src/unicode_fold1_key.c @@ -2988,5 +2988,3 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[])      }    return -1;  } - - diff --git a/src/unicode_fold2_key.c b/src/unicode_fold2_key.c index 1469a46..3bc4bd6 100644 --- a/src/unicode_fold2_key.c +++ b/src/unicode_fold2_key.c @@ -225,5 +225,3 @@ onigenc_unicode_fold2_key(OnigCodePoint codes[])      }    return -1;  } - - diff --git a/src/unicode_fold3_key.c b/src/unicode_fold3_key.c index 70e70c8..b935db6 100644 --- a/src/unicode_fold3_key.c +++ b/src/unicode_fold3_key.c @@ -135,5 +135,3 @@ onigenc_unicode_fold3_key(OnigCodePoint codes[])      }    return -1;  } - - diff --git a/src/unicode_fold_data.c b/src/unicode_fold_data.c index 7f7e24e..0b2c519 100644 --- a/src/unicode_fold_data.c +++ b/src/unicode_fold_data.c @@ -1513,4 +1513,3 @@ OnigCodePoint OnigUnicodeFolds3[] = {   /* ----- LOCALE ----- */  #define FOLDS3_END_INDEX   72  }; - diff --git a/src/unicode_unfold_key.c b/src/unicode_unfold_key.c index b4c0318..bf71df0 100644 --- a/src/unicode_unfold_key.c +++ b/src/unicode_unfold_key.c @@ -3283,5 +3283,3 @@ onigenc_unicode_unfold_key(OnigCodePoint code)      }    return 0;  } - - diff --git a/src/utf16_be.c b/src/utf16_be.c index 8f5b8bf..a812a32 100644 --- a/src/utf16_be.c +++ b/src/utf16_be.c @@ -278,6 +278,6 @@ OnigEncodingType OnigEncodingUTF16_BE = {    init,    0, /* is_initialized */    is_valid_mbc_string, -  ENC_FLAG_UNICODE, +  ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_2,    0, 0  }; diff --git a/src/utf16_le.c b/src/utf16_le.c index 92bf318..da9571f 100644 --- a/src/utf16_le.c +++ b/src/utf16_le.c @@ -286,6 +286,6 @@ OnigEncodingType OnigEncodingUTF16_LE = {    init,    0, /* is_initialized */    is_valid_mbc_string, -  ENC_FLAG_UNICODE, +  ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; diff --git a/src/utf32_be.c b/src/utf32_be.c index 92476ec..9339b15 100644 --- a/src/utf32_be.c +++ b/src/utf32_be.c @@ -191,6 +191,6 @@ OnigEncodingType OnigEncodingUTF32_BE = {    NULL, /* init */    NULL, /* is_initialized */    is_valid_mbc_string, -  ENC_FLAG_UNICODE, +  ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_4,    0, 0  }; diff --git a/src/utf32_le.c b/src/utf32_le.c index dc3fd92..22e007c 100644 --- a/src/utf32_le.c +++ b/src/utf32_le.c @@ -191,6 +191,6 @@ OnigEncodingType OnigEncodingUTF32_LE = {    NULL, /* init */    NULL, /* is_initialized */    is_valid_mbc_string, -  ENC_FLAG_UNICODE, +  ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1,    0, 0  }; @@ -57,7 +57,7 @@ static const int EncLen_UTF8[] = {    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -  4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1 +  4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1  };  static int @@ -280,7 +280,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,  OnigEncodingType OnigEncodingUTF8 = {    mbc_enc_len,    "UTF-8",     /* name */ -  6,           /* max enc length */ +  4,           /* max enc length */    1,           /* min enc length */    onigenc_is_mbc_newline_0x0a,    mbc_to_code, @@ -297,6 +297,6 @@ OnigEncodingType OnigEncodingUTF8 = {    NULL, /* init */    NULL, /* is_initialized */    is_valid_mbc_string, -  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_UNICODE, +  ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1_OR_0,    0, 0  }; | 
